Introduction

This report provides a static sentiment analysis of a given set of news headlines. It processes the text, calculates sentiment scores using a chosen lexicon, and visualizes the results. All code is hidden by default but can be expanded for review.

1. Setup and User Inputs

This first section loads all necessary libraries and defines the input data for the analysis.

# Load Libraries
library(dplyr)
library(tidytext)
library(ggplot2)
library(stringr)
library(wordcloud)
library(RColorBrewer)
library(DT)
library(knitr)

# Global plotting options for the report
opts_chunk$set(
  echo = TRUE,          
  message = FALSE,      # Hide messages
  warning = FALSE,      # Hide warnings
  fig.align = 'center'  # Center-align all plots
)

Define Your Inputs Here

Change the headline_input text and the lexicon_choice to run a new report.

# PASTE YOUR HEADLINES HERE (one per line, inside the quotes)

headline_input <- "
Treasury wary of National State Enterprises Bill – concerns raised about holding company for SOEs
Thousands of vehicle recalls in SA raise questions over standards
Miners' stocks soar along with copper prices – Anglo American, BHP and Glencore
Nedbank nabs FNB Business CEO Andiswa Bata
US treasury head to skip G20 meeting
Activity in residential property market cools
Prosus prices €750m bond
Stefanutti Stocks sells its Mozambique and Mauritius operations
Tharisa reports higher output but lowers its guidance
Merck poised to land $10bn biotech deal
Starlink lands licence to operate in India
SA central bank could lower CPI goal in July
Obsidian fund shuns SA bonds for Latin American yield
"

# CHOOSE YOUR SENTIMENT LEXICON: "bing", "afinn", or "nrc"

lexicon_choice <- "bing"

2. Core Sentiment Analysis

The following code processes the raw text into a structured data frame, calculating a sentiment score for each headline based on the chosen lexicon.

analyze_sentiments <- function(headlines_text, lexicon_choice = "bing") {
  
  headlines <- unlist(strsplit(headlines_text, "\n"))
  headlines <- trimws(headlines)
  headlines <- headlines[headlines != ""]
  
  if (length(headlines) == 0) return(NULL)
  
  df <- data.frame(headline_id = 1:length(headlines), headline = headlines, stringsAsFactors = FALSE)
  
  if (lexicon_choice %in% c("bing", "afinn")) {
    lexicon <- get_sentiments(lexicon_choice)
    if (lexicon_choice == "bing") {
      lexicon <- lexicon %>% mutate(value = ifelse(sentiment == "positive", 1, -1))
    }
    # ** THE FIX IS HERE: drop = FALSE **
    sentiment_scored <- df %>%
      unnest_tokens(word, headline, drop = FALSE) %>% # <-- ADDED drop = FALSE
      inner_join(lexicon, by = "word") %>%
      group_by(headline_id, headline) %>%
      summarise(score = sum(value), .groups = "drop") %>%
      ungroup()
      
  } else { # NRC Lexicon
    nrc_sentiments <- get_sentiments("nrc")
   
    sentiment_scored <- df %>%
      unnest_tokens(word, headline, drop = FALSE) %>%
      inner_join(nrc_sentiments, by = "word") %>%
      filter(sentiment %in% c("positive", "negative")) %>%
      count(headline_id, headline, sentiment) %>%
      tidyr::pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
     
      {
          if (!"positive" %in% names(.)) .$positive <- 0
          if (!"negative" %in% names(.)) .$negative <- 0
          .
      } %>%
      mutate(score = (positive - negative)) %>%
      ungroup()
  }
  
  
  final_df <- df %>%
    left_join(sentiment_scored, by = c("headline_id", "headline")) %>%
    mutate(
      score = ifelse(is.na(score), 0, score),
      label = case_when(
        score > 0 ~ "Positive",
        score < 0 ~ "Negative",
        TRUE ~ "Neutral"
      )
    )
  
  return(final_df)
}


analysis_results <- analyze_sentiments(headline_input, lexicon_choice)

3. Results and Visualizations

Analysis Summary

# This code chunk requires that analysis_results is not NULL
if(!is.null(analysis_results) && nrow(analysis_results) > 0){
  data <- analysis_results
  neg_news <- sum(data$label == "Negative")
  pos_news <- sum(data$label == "Positive")
  neu_news <- sum(data$label == "Neutral")
  
  if (pos_news > neg_news) {
    summary_text <- "Overall sentiment is 'Positive', suggesting bullish signals."
    summary_class <- "summary-positive"
  } else if (neg_news > pos_news) {
    summary_text <- "Overall sentiment is 'Negative', indicating potential bearish signals or risks."
    summary_class <- "summary-negative"
  } else {
    summary_text <- "Sentiment appears 'Mixed or Neutral'. Monitor closely for market-moving catalysts."
    summary_class <- "summary-neutral"
  }
  
  cat(paste0("<div class='summary-box ", summary_class, "'>"))
  cat(paste("#### Overall Assessment: ", summary_text, "\n\n"))
  cat(paste0("- **Total Headlines Analyzed:** ", nrow(data), "\n"))
  cat(paste0("- **Positive Headlines:** ", pos_news, "\n"))
  cat(paste0("- **Negative Headlines:** ", neg_news, "\n"))
  cat(paste0("- **Neutral Headlines:** ", neu_news, "\n"))
  cat("</div>")
} else {
  cat("No headlines were processed or found. Please check your inputs.")
}

Overall Assessment: Overall sentiment is ‘Positive’, suggesting bullish signals.

Total Headlines Analyzed: 13
Positive Headlines: 2
Negative Headlines: 1
Neutral Headlines: 10

Sentiment Breakdown Plot

if(!is.null(analysis_results) && nrow(analysis_results) > 0){
  if(lexicon_choice %in% c("bing", "afinn")) {
    ggplot(analysis_results, aes(x = score, fill = label)) +
      geom_histogram(binwidth = 1, alpha = 0.8, position = "identity") +
      scale_fill_manual(name = "Sentiment", values = c("Positive" = "#2c7bb6", "Negative" = "#d7191c", "Neutral" = "#fdae61")) +
      theme_minimal(base_size = 15) +
      labs(title = "Distribution of Sentiment Scores", x = "Sentiment Score (Positive - Negative)", y = "Number of Headlines")
  } else { # NRC Plot
    analysis_results %>%
      unnest_tokens(word, headline) %>%
      inner_join(get_sentiments("nrc"), by = "word") %>%
      count(sentiment, sort = TRUE) %>%
      filter(!sentiment %in% c("positive", "negative")) %>%
      ggplot(aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
      geom_col(show.legend = FALSE) +
      coord_flip() +
      theme_minimal(base_size = 15) +
      labs(title = "NRC Emotion Category Word Count", x = "Emotion", y = "Total Word Count")
  }
}

Keyword Cloud

library(tibble)

if(!is.null(analysis_results) && nrow(analysis_results) > 0){
  cloud_data <- analysis_results %>%
    unnest_tokens(word, headline) %>%
    anti_join(stop_words, by = "word")

  if(lexicon_choice %in% c("bing", "afinn") && nrow(cloud_data) > 0) {
    
    # This pipeline prepares the data for comparison.cloud
    comparison_data <- cloud_data %>%
      inner_join(get_sentiments("bing"), by = "word") %>%
      count(word, sentiment, sort = TRUE) %>%
      tidyr::pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
      # Ensure required columns exist
      { if (!"positive" %in% names(.)) .$positive <- 0; if (!"negative" %in% names(.)) .$negative <- 0; . }
    
    # We only proceed if we have words to plot
    if (nrow(comparison_data) > 0) {
      comparison_data %>%
        # Convert the 'word' column to row names, which is what the function expects
        column_to_rownames("word") %>%
        # Then call the function on the resulting numeric-only data
        comparison.cloud(colors = c("#d7191c", "#2c7bb6"), 
                         max.words = 150, 
                         title.size = 1.5,
                         scale = c(4, 0.8),
                         random.order = FALSE)
    } else {
        plot.new()
        text(x=0.5, y=0.5, "No sentiment words found for comparison cloud.")
    }

  } else { # Fallback for NRC lexicon or if no data
    word_counts <- count(cloud_data, word, sort = TRUE)
    if (nrow(word_counts) > 0) {
      wordcloud(words = word_counts$word, freq = word_counts$n, 
                max.words = 100, 
                random.order = FALSE, 
                colors = brewer.pal(8, "Dark2"))
    } else {
      plot.new()
      text(x=0.5, y=0.5, "Not enough words to generate a word cloud.")
    }
  }
}

Full Headline Data Table

if(!is.null(analysis_results) && nrow(analysis_results) > 0){
  data_for_table <- analysis_results %>% 
    select(Headline = headline, `Sentiment Score` = score, `Label` = label)
  
  datatable(
    data_for_table,
    extensions = 'Responsive',
    options = list(
      pageLength = 10,
      columnDefs = list(
        list(
          targets = 0, 
          render = JS(
            "function(data, type, row, meta) {",
            "  return type === 'display' && data.length > 80 ?",
            "    '<span title=\"' + data + '\">' + data.substr(0, 80) + '...</span>' : data;",
            "}"
          )
        )
      )
    ),
    rownames = FALSE
  ) %>%
    formatStyle(
        'Sentiment Score',
        color = styleInterval(c(-0.5, 0.5), c('#d7191c', 'gray', '#2c7bb6')),
        fontWeight = 'bold'
    )
}

News Sentiment Analysis Report

Investment Research Team

2025-07-10