News Sentiment & Text Analytics: Trending Tickers

install.packages(c("tidyverse", "tidytext", "textdata", "jsonlite",
                   "wordcloud", "RColorBrewer", "lubridate",
                   "scales", "knitr", "kableExtra"))

library(textdata)
lexicon_afinn()

## # A tibble: 2,477 × 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ℹ 2,467 more rows

lexicon_bing()

## # A tibble: 6,789 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faced     negative 
##  2 2-faces     negative 
##  3 abnormal    negative 
##  4 abolish     negative 
##  5 abominable  negative 
##  6 abominably  negative 
##  7 abominate   negative 
##  8 abomination negative 
##  9 abort       negative 
## 10 aborted     negative 
## # ℹ 6,779 more rows

lexicon_nrc()

## # A tibble: 13,872 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ℹ 13,862 more rows

library(tidyverse)
library(tidytext)
library(textdata)
library(jsonlite)
library(wordcloud)
library(RColorBrewer)
library(lubridate)
library(scales)
library(knitr)
library(kableExtra)

## add your NewsAPI secret in the quotation mark below
api_key <- "29970c98c8d84c2d9ff8e44f250a749a"

fetch_news <- function(query, api_key, page_size = 20) {
  url <- paste0(
    "https://newsapi.org/v2/everything?",
    "q=",        URLencode(query, reserved = TRUE),
    "&language=en",
    "&sortBy=publishedAt",
    "&pageSize=", page_size,
    "&apiKey=",  api_key
  )

  response <- fromJSON(url, flatten = TRUE)
  articles <- as_tibble(response$articles)

  articles %>%
    rename_with(~ str_replace_all(.x, "\\.", "_")) %>%
    mutate(query = query)
}

news_raw <- bind_rows(
  fetch_news("Coherent Corp",    api_key),
  fetch_news("Lumentum Holdings", api_key),
  fetch_news("Marvell Technology",    api_key),
  fetch_news("Corning", api_key)
)

glimpse(news_raw)

## Rows: 79
## Columns: 10
## $ author      <chr> "Hidehito Honda, Takatomi Kubo, Ryosuke Hisamatsu, Yoshima…
## $ title       <chr> "The occurrence of a particular state is a predictor of su…
## $ description <chr> "Dyadic interactions, such as consultations between clerks…
## $ url         <chr> "https://journals.plos.org/plosone/article?id=10.1371/jour…
## $ urlToImage  <chr> "https://journals.plos.org/plosone/article/figure/image?id…
## $ publishedAt <chr> "2026-06-22T14:00:00Z", "2026-06-19T10:22:37Z", "2026-06-1…
## $ content     <chr> "Citation: Honda H, Kubo T, Hisamatsu R, Ohmoto Y, Ikeda K…
## $ source_id   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ source_name <chr> "Plos.org", "Yahoo Entertainment", "Yahoo Entertainment", …
## $ query       <chr> "Coherent Corp", "Coherent Corp", "Coherent Corp", "Cohere…

news_clean <- news_raw %>%
  filter(!is.na(.data$title)) %>%
  mutate(
    pub_date    = ymd_hms(.data$publishedAt, quiet = TRUE),
    pub_day     = as.Date(pub_date),
    title_clean = str_remove(.data$title, "\\s*-\\s*[^-]+$"),
    title_clean = str_squish(str_replace_all(title_clean, "[^[:alnum:][:space:]]", " ")),
    title_clean = str_to_lower(title_clean)
  ) %>%
  distinct(title_clean, .keep_all = TRUE)

cat("Total unique headlines:", nrow(news_clean), "\n")

## Total unique headlines: 69

news_clean %>%
  select(query, title_clean, any_of(c("source_name", "source", "sourceName")), pub_day) %>%
  head(10) %>%
  kable(caption = "Sample Cleaned Headlines") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Sample Cleaned Headlines
query	title_clean	source_name	pub_day
Coherent Corp	the occurrence of a particular state is a predictor of successful travel consultation	Plos.org	2026-06-22
Coherent Corp	is coherent cohr one of the best us stocks to buy and hold for the next 5 years	Yahoo Entertainment	2026-06-19
Coherent Corp	is coherent corp cohr among the best photonic computing stocks to buy now	Yahoo Entertainment	2026-06-19
Coherent Corp	us tells asml it s concerned china may have top chip tool	Moneycontrol	2026-06-19
Coherent Corp	developing transformative pathways for youth inclusion and empowerment in agri	Plos.org	2026-06-18
Coherent Corp	the spacex ipo is a giant unworkable con orchestrated by an overt white supremacist huckster	Karlbode.com	2026-06-18
Coherent Corp	ultrafast laser market size to worth 12 24 billion by 2035 sns insider	GlobeNewswire	2026-06-17
Coherent Corp	coherent corp pours 650m into texas photonics facility as nvidia deepens ai optics partnership	Crypto Briefing	2026-06-16
Coherent Corp	3 stocks diluting shareholders to fund big long	MarketBeat	2026-06-16
Coherent Corp	the department of commerce s chips program announces a letter of intent with coherent for up to 50 million to expand indium phosphide production	Nist.gov	2026-06-16

news_tokens <- news_clean %>%
  select(query, title_clean) %>%
  unnest_tokens(word, title_clean) %>%
  anti_join(stop_words, by = "word") %>%
  filter(!str_detect(word, "^\\d+$"), nchar(word) > 2)

top_words <- news_tokens %>%
  count(word, sort = TRUE) %>%
  slice_head(n = 20)

top_words %>%
  kable(caption = "Top 20 Words Across All Headlines") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Top 20 Words Across All Headlines
word	n
marvell	11
lumentum	10
stock	10
technology	10
buy	9
holdings	7
lite	7
stocks	7
market	6
mrvl	6
optical	6
coherent	5
billion	4
growth	4
optics	4
worth	4
analysis	3
inside	3
networking	3
news	3

top_words %>%
  mutate(word = fct_reorder(word, n)) %>%
  ggplot(aes(x = n, y = word, fill = n)) +
  geom_col(show.legend = FALSE) +
  scale_fill_gradient(low = "#a8d8ea", high = "#0077b6") +
  labs(
    title    = "Top 20 Words in News Headlines",
    subtitle = "Coherent Corp., Lumentum Holdings, Marvell Technology, Corning",
    x        = "Count",
    y        = NULL,
    caption  = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
  ) +
  theme_minimal(base_size = 13)

word_freq <- news_tokens %>%
  count(word, sort = TRUE) %>%
  filter(n >= 2)

set.seed(42)
wordcloud(
  words  = word_freq$word,
  freq   = word_freq$n,
  min.freq = 1,
  max.words = 80,
  random.order = FALSE,
  colors = brewer.pal(8, "Dark2"),
  scale  = c(3.5, 0.5)
)
title("News Headline Word Cloud — Trending Tickers")

afinn <- get_sentiments("afinn")

sentiment_afinn <- news_tokens %>%
  inner_join(afinn, by = "word") %>%
  group_by(query) %>%
  summarise(
    total_words    = n(),
    mean_sentiment = round(mean(value), 3),
    sum_sentiment  = sum(value),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_sentiment))

sentiment_afinn %>%
  kable(caption = "AFINN Sentiment Score by Topic") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

AFINN Sentiment Score by Topic
query	total_words	mean_sentiment	sum_sentiment
Marvell Technology	4	1.750	7
Corning	13	1.538	20
Lumentum Holdings	8	1.250	10
Coherent Corp	13	0.923	12

sentiment_afinn %>%
  mutate(query = fct_reorder(query, mean_sentiment),
         sentiment_dir = ifelse(mean_sentiment >= 0, "Positive", "Negative")) %>%
  ggplot(aes(x = mean_sentiment, y = query, fill = sentiment_dir)) +
  geom_col(width = 0.6) +
  scale_fill_manual(values = c("Positive" = "#2ecc71", "Negative" = "#e74c3c")) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") +
  labs(
    title   = "Mean AFINN Sentiment Score by Topic",
    x       = "Mean Sentiment Score",
    y       = NULL,
    fill    = NULL,
    caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

bing <- get_sentiments("bing")

sentiment_bing <- news_tokens %>%
  inner_join(bing, by = "word") %>%
  count(query, sentiment) %>%
  pivot_wider(
    names_from  = sentiment,
    values_from = n,
    values_fill = list(n = 0)
  ) %>%
  mutate(
    positive = coalesce(positive, 0L),
    negative = coalesce(negative, 0L),
    net_sentiment = positive - negative
  )

sentiment_bing %>%
  kable(caption = "Bing Sentiment Count by Topic") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Bing Sentiment Count by Topic
query	negative	positive	net_sentiment
Coherent Corp	8	12	4
Corning	4	9	5
Lumentum Holdings	1	1	0
Marvell Technology	0	5	5

news_tokens %>%
  inner_join(bing, by = "word") %>%
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  slice_head(n = 10) %>%
  ungroup() %>%
  mutate(word = reorder_within(word, n, sentiment)) %>%
  ggplot(aes(x = n, y = word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ sentiment, scales = "free_y") +
  scale_y_reordered() +
  scale_fill_manual(values = c("positive" = "#2ecc71", "negative" = "#e74c3c")) +
  labs(
    title   = "Top Positive & Negative Words in Headlines",
    x       = "Count", y = NULL,
    caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
  ) +
  theme_minimal(base_size = 12)

nrc <- get_sentiments("nrc")

emotion_nrc <- news_tokens %>%
  inner_join(nrc, by = "word") %>%
  filter(!sentiment %in% c("positive", "negative")) %>%
  count(query, sentiment) %>%
  group_by(query) %>%
  mutate(prop = n / sum(n))

ggplot(emotion_nrc, aes(x = sentiment, y = prop, fill = query)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title   = "NRC Emotion Proportions by Topic",
    x       = "Emotion",
    y       = "Proportion of Emotional Words",
    fill    = "Topic",
    caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        legend.position = "top")

tfidf_words <- news_tokens %>%
  count(query, word) %>%
  bind_tf_idf(word, query, n) %>%
  group_by(query) %>%
  slice_max(tf_idf, n = 6) %>%
  ungroup()

tfidf_words %>%
  mutate(word = reorder_within(word, tf_idf, query)) %>%
  ggplot(aes(x = tf_idf, y = word, fill = query)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ query, scales = "free_y", ncol = 2) +
  scale_y_reordered() +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title    = "Top TF-IDF Terms by Topic",
    subtitle = "Words most distinctive to each news topic",
    x        = "TF-IDF Score", y = NULL,
    caption  = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
  ) +
  theme_minimal(base_size = 12)

summary_tbl <- sentiment_afinn %>%
  left_join(sentiment_bing %>% select(query, positive, negative, net_sentiment),
            by = "query") %>%
  rename(
    Topic           = query,
    `Words Matched` = total_words,
    `Mean AFINN`    = mean_sentiment,
    `AFINN Sum`     = sum_sentiment,
    Positive        = positive,
    Negative        = negative,
    `Net (Bing)`    = net_sentiment
  )

summary_tbl %>%
  kable(caption = "Sentiment Summary: All Topics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  column_spec(3, color = ifelse(summary_tbl$`Mean AFINN` >= 0, "darkgreen", "red"))

Sentiment Summary: All Topics
Topic	Words Matched	Mean AFINN	AFINN Sum	Positive	Negative	Net (Bing)
Marvell Technology	4	1.750	7	5	0	5
Corning	13	1.538	20	9	4	5
Lumentum Holdings	8	1.250	10	1	1	0
Coherent Corp	13	0.923	12	12	8	4

News Sentiment & Text Analytics: Trending Tickers

Josh Randolph

2026-06-24