install.packages(c("tidyverse", "tidytext", "textdata", "jsonlite",
"wordcloud", "RColorBrewer", "lubridate",
"scales", "knitr", "kableExtra"))
library(textdata)
lexicon_afinn()
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ℹ 2,467 more rows
## # A tibble: 6,789 × 2
## word sentiment
## <chr> <chr>
## 1 2-faced negative
## 2 2-faces negative
## 3 abnormal negative
## 4 abolish negative
## 5 abominable negative
## 6 abominably negative
## 7 abominate negative
## 8 abomination negative
## 9 abort negative
## 10 aborted negative
## # ℹ 6,779 more rows
## # A tibble: 13,872 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ℹ 13,862 more rows
library(tidyverse)
library(tidytext)
library(textdata)
library(jsonlite)
library(wordcloud)
library(RColorBrewer)
library(lubridate)
library(scales)
library(knitr)
library(kableExtra)
## add your NewsAPI secret in the quotation mark below
api_key <- "29970c98c8d84c2d9ff8e44f250a749a"
fetch_news <- function(query, api_key, page_size = 20) {
url <- paste0(
"https://newsapi.org/v2/everything?",
"q=", URLencode(query, reserved = TRUE),
"&language=en",
"&sortBy=publishedAt",
"&pageSize=", page_size,
"&apiKey=", api_key
)
response <- fromJSON(url, flatten = TRUE)
articles <- as_tibble(response$articles)
articles %>%
rename_with(~ str_replace_all(.x, "\\.", "_")) %>%
mutate(query = query)
}
news_raw <- bind_rows(
fetch_news("Coherent Corp", api_key),
fetch_news("Lumentum Holdings", api_key),
fetch_news("Marvell Technology", api_key),
fetch_news("Corning", api_key)
)
glimpse(news_raw)
## Rows: 79
## Columns: 10
## $ author <chr> "Hidehito Honda, Takatomi Kubo, Ryosuke Hisamatsu, Yoshima…
## $ title <chr> "The occurrence of a particular state is a predictor of su…
## $ description <chr> "Dyadic interactions, such as consultations between clerks…
## $ url <chr> "https://journals.plos.org/plosone/article?id=10.1371/jour…
## $ urlToImage <chr> "https://journals.plos.org/plosone/article/figure/image?id…
## $ publishedAt <chr> "2026-06-22T14:00:00Z", "2026-06-19T10:22:37Z", "2026-06-1…
## $ content <chr> "Citation: Honda H, Kubo T, Hisamatsu R, Ohmoto Y, Ikeda K…
## $ source_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ source_name <chr> "Plos.org", "Yahoo Entertainment", "Yahoo Entertainment", …
## $ query <chr> "Coherent Corp", "Coherent Corp", "Coherent Corp", "Cohere…
news_clean <- news_raw %>%
filter(!is.na(.data$title)) %>%
mutate(
pub_date = ymd_hms(.data$publishedAt, quiet = TRUE),
pub_day = as.Date(pub_date),
title_clean = str_remove(.data$title, "\\s*-\\s*[^-]+$"),
title_clean = str_squish(str_replace_all(title_clean, "[^[:alnum:][:space:]]", " ")),
title_clean = str_to_lower(title_clean)
) %>%
distinct(title_clean, .keep_all = TRUE)
cat("Total unique headlines:", nrow(news_clean), "\n")
## Total unique headlines: 69
news_clean %>%
select(query, title_clean, any_of(c("source_name", "source", "sourceName")), pub_day) %>%
head(10) %>%
kable(caption = "Sample Cleaned Headlines") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Sample Cleaned Headlines
|
query
|
title_clean
|
source_name
|
pub_day
|
|
Coherent Corp
|
the occurrence of a particular state is a predictor of successful travel
consultation
|
Plos.org
|
2026-06-22
|
|
Coherent Corp
|
is coherent cohr one of the best us stocks to buy and hold for the next
5 years
|
Yahoo Entertainment
|
2026-06-19
|
|
Coherent Corp
|
is coherent corp cohr among the best photonic computing stocks to buy
now
|
Yahoo Entertainment
|
2026-06-19
|
|
Coherent Corp
|
us tells asml it s concerned china may have top chip tool
|
Moneycontrol
|
2026-06-19
|
|
Coherent Corp
|
developing transformative pathways for youth inclusion and empowerment
in agri
|
Plos.org
|
2026-06-18
|
|
Coherent Corp
|
the spacex ipo is a giant unworkable con orchestrated by an overt white
supremacist huckster
|
Karlbode.com
|
2026-06-18
|
|
Coherent Corp
|
ultrafast laser market size to worth 12 24 billion by 2035 sns insider
|
GlobeNewswire
|
2026-06-17
|
|
Coherent Corp
|
coherent corp pours 650m into texas photonics facility as nvidia deepens
ai optics partnership
|
Crypto Briefing
|
2026-06-16
|
|
Coherent Corp
|
3 stocks diluting shareholders to fund big long
|
MarketBeat
|
2026-06-16
|
|
Coherent Corp
|
the department of commerce s chips program announces a letter of intent
with coherent for up to 50 million to expand indium phosphide production
|
Nist.gov
|
2026-06-16
|
news_tokens <- news_clean %>%
select(query, title_clean) %>%
unnest_tokens(word, title_clean) %>%
anti_join(stop_words, by = "word") %>%
filter(!str_detect(word, "^\\d+$"), nchar(word) > 2)
top_words <- news_tokens %>%
count(word, sort = TRUE) %>%
slice_head(n = 20)
top_words %>%
kable(caption = "Top 20 Words Across All Headlines") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Top 20 Words Across All Headlines
|
word
|
n
|
|
marvell
|
11
|
|
lumentum
|
10
|
|
stock
|
10
|
|
technology
|
10
|
|
buy
|
9
|
|
holdings
|
7
|
|
lite
|
7
|
|
stocks
|
7
|
|
market
|
6
|
|
mrvl
|
6
|
|
optical
|
6
|
|
coherent
|
5
|
|
billion
|
4
|
|
growth
|
4
|
|
optics
|
4
|
|
worth
|
4
|
|
analysis
|
3
|
|
inside
|
3
|
|
networking
|
3
|
|
news
|
3
|
top_words %>%
mutate(word = fct_reorder(word, n)) %>%
ggplot(aes(x = n, y = word, fill = n)) +
geom_col(show.legend = FALSE) +
scale_fill_gradient(low = "#a8d8ea", high = "#0077b6") +
labs(
title = "Top 20 Words in News Headlines",
subtitle = "Coherent Corp., Lumentum Holdings, Marvell Technology, Corning",
x = "Count",
y = NULL,
caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
) +
theme_minimal(base_size = 13)

word_freq <- news_tokens %>%
count(word, sort = TRUE) %>%
filter(n >= 2)
set.seed(42)
wordcloud(
words = word_freq$word,
freq = word_freq$n,
min.freq = 1,
max.words = 80,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"),
scale = c(3.5, 0.5)
)
title("News Headline Word Cloud — Trending Tickers")

afinn <- get_sentiments("afinn")
sentiment_afinn <- news_tokens %>%
inner_join(afinn, by = "word") %>%
group_by(query) %>%
summarise(
total_words = n(),
mean_sentiment = round(mean(value), 3),
sum_sentiment = sum(value),
.groups = "drop"
) %>%
arrange(desc(mean_sentiment))
sentiment_afinn %>%
kable(caption = "AFINN Sentiment Score by Topic") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
AFINN Sentiment Score by Topic
|
query
|
total_words
|
mean_sentiment
|
sum_sentiment
|
|
Marvell Technology
|
4
|
1.750
|
7
|
|
Corning
|
13
|
1.538
|
20
|
|
Lumentum Holdings
|
8
|
1.250
|
10
|
|
Coherent Corp
|
13
|
0.923
|
12
|
sentiment_afinn %>%
mutate(query = fct_reorder(query, mean_sentiment),
sentiment_dir = ifelse(mean_sentiment >= 0, "Positive", "Negative")) %>%
ggplot(aes(x = mean_sentiment, y = query, fill = sentiment_dir)) +
geom_col(width = 0.6) +
scale_fill_manual(values = c("Positive" = "#2ecc71", "Negative" = "#e74c3c")) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") +
labs(
title = "Mean AFINN Sentiment Score by Topic",
x = "Mean Sentiment Score",
y = NULL,
fill = NULL,
caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")

bing <- get_sentiments("bing")
sentiment_bing <- news_tokens %>%
inner_join(bing, by = "word") %>%
count(query, sentiment) %>%
pivot_wider(
names_from = sentiment,
values_from = n,
values_fill = list(n = 0)
) %>%
mutate(
positive = coalesce(positive, 0L),
negative = coalesce(negative, 0L),
net_sentiment = positive - negative
)
sentiment_bing %>%
kable(caption = "Bing Sentiment Count by Topic") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Bing Sentiment Count by Topic
|
query
|
negative
|
positive
|
net_sentiment
|
|
Coherent Corp
|
8
|
12
|
4
|
|
Corning
|
4
|
9
|
5
|
|
Lumentum Holdings
|
1
|
1
|
0
|
|
Marvell Technology
|
0
|
5
|
5
|
news_tokens %>%
inner_join(bing, by = "word") %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
slice_head(n = 10) %>%
ungroup() %>%
mutate(word = reorder_within(word, n, sentiment)) %>%
ggplot(aes(x = n, y = word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ sentiment, scales = "free_y") +
scale_y_reordered() +
scale_fill_manual(values = c("positive" = "#2ecc71", "negative" = "#e74c3c")) +
labs(
title = "Top Positive & Negative Words in Headlines",
x = "Count", y = NULL,
caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
) +
theme_minimal(base_size = 12)

nrc <- get_sentiments("nrc")
emotion_nrc <- news_tokens %>%
inner_join(nrc, by = "word") %>%
filter(!sentiment %in% c("positive", "negative")) %>%
count(query, sentiment) %>%
group_by(query) %>%
mutate(prop = n / sum(n))
ggplot(emotion_nrc, aes(x = sentiment, y = prop, fill = query)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = percent_format()) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "NRC Emotion Proportions by Topic",
x = "Emotion",
y = "Proportion of Emotional Words",
fill = "Topic",
caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
legend.position = "top")

tfidf_words <- news_tokens %>%
count(query, word) %>%
bind_tf_idf(word, query, n) %>%
group_by(query) %>%
slice_max(tf_idf, n = 6) %>%
ungroup()
tfidf_words %>%
mutate(word = reorder_within(word, tf_idf, query)) %>%
ggplot(aes(x = tf_idf, y = word, fill = query)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ query, scales = "free_y", ncol = 2) +
scale_y_reordered() +
scale_fill_brewer(palette = "Set1") +
labs(
title = "Top TF-IDF Terms by Topic",
subtitle = "Words most distinctive to each news topic",
x = "TF-IDF Score", y = NULL,
caption = "Source: NewsAPI | Jimmy Zhenning Xu, Ph.D. | github.com/utjimmyx"
) +
theme_minimal(base_size = 12)

summary_tbl <- sentiment_afinn %>%
left_join(sentiment_bing %>% select(query, positive, negative, net_sentiment),
by = "query") %>%
rename(
Topic = query,
`Words Matched` = total_words,
`Mean AFINN` = mean_sentiment,
`AFINN Sum` = sum_sentiment,
Positive = positive,
Negative = negative,
`Net (Bing)` = net_sentiment
)
summary_tbl %>%
kable(caption = "Sentiment Summary: All Topics") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
column_spec(3, color = ifelse(summary_tbl$`Mean AFINN` >= 0, "darkgreen", "red"))
Sentiment Summary: All Topics
|
Topic
|
Words Matched
|
Mean AFINN
|
AFINN Sum
|
Positive
|
Negative
|
Net (Bing)
|
|
Marvell Technology
|
4
|
1.750
|
7
|
5
|
0
|
5
|
|
Corning
|
13
|
1.538
|
20
|
9
|
4
|
5
|
|
Lumentum Holdings
|
8
|
1.250
|
10
|
1
|
1
|
0
|
|
Coherent Corp
|
13
|
0.923
|
12
|
12
|
8
|
4
|