install.packages(c("tidyverse", "tidytext", "textdata", "jsonlite",
"wordcloud", "RColorBrewer", "lubridate",
"scales", "knitr", "kableExtra"))
library(textdata)
lexicon_afinn()
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ℹ 2,467 more rows
lexicon_bing()
## # A tibble: 6,789 × 2
## word sentiment
## <chr> <chr>
## 1 2-faced negative
## 2 2-faces negative
## 3 abnormal negative
## 4 abolish negative
## 5 abominable negative
## 6 abominably negative
## 7 abominate negative
## 8 abomination negative
## 9 abort negative
## 10 aborted negative
## # ℹ 6,779 more rows
lexicon_nrc()
## # A tibble: 13,872 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ℹ 13,862 more rows
library(tidyverse)
library(tidytext)
library(textdata)
library(jsonlite)
library(wordcloud)
library(RColorBrewer)
library(lubridate)
library(scales)
library(knitr)
library(kableExtra)
## add your NewsAPI secret in the quotation mark below
api_key <- Sys.getenv("NEWS_API_KEY")
fetch_news <- function(query, api_key, page_size = 20) {
url <- paste0(
"https://newsapi.org/v2/everything?",
"q=", URLencode(query, reserved = TRUE),
"&language=en",
"&sortBy=publishedAt",
"&pageSize=", page_size,
"&apiKey=", api_key
)
response <- fromJSON(url, flatten = TRUE)
articles <- as_tibble(response$articles)
articles %>%
rename_with(~ str_replace_all(.x, "\\.", "_")) %>%
mutate(query = query)
}
news_raw <- bind_rows(
fetch_news("Honda", api_key),
fetch_news("Toyota", api_key),
fetch_news("Nissan", api_key),
)
glimpse(news_raw)
## Rows: 59
## Columns: 10
## $ author <chr> "Bloomberg News", "Bloomberg News", "Richard Dredge", "Gar…
## $ title <chr> "Chinese Carmakers Weighing Canada Manufacturing Deals, Jo…
## $ description <chr> "At least four Chinese carmakers are looking at how to use…
## $ url <chr> "https://financialpost.com/pmn/business-pmn/chinese-carmak…
## $ urlToImage <chr> NA, "https://smartcdn.gprod.postmedia.digital/financialpos…
## $ publishedAt <chr> "2026-06-22T16:27:25Z", "2026-06-22T16:27:25Z", "2026-06-2…
## $ content <chr> "(Bloomberg) At least four Chinese carmakers are looking a…
## $ source_id <chr> "financial-post", "financial-post", NA, NA, NA, NA, NA, NA…
## $ source_name <chr> "Financial Post", "Financial Post", "Autocar", "autosport.…
## $ query <chr> "Honda", "Honda", "Honda", "Honda", "Honda", "Honda", "Hon…
news_clean <- news_raw %>%
filter(!is.na(.data$title)) %>%
mutate(
pub_date = ymd_hms(.data$publishedAt, quiet = TRUE),
pub_day = as.Date(pub_date),
title_clean = str_remove(.data$title, "\\s*-\\s*[^-]+$"),
title_clean = str_squish(str_replace_all(title_clean, "[^[:alnum:][:space:]]", " ")),
title_clean = str_to_lower(title_clean)
) %>%
distinct(title_clean, .keep_all = TRUE)
cat("Total unique headlines:", nrow(news_clean), "\n")
## Total unique headlines: 51
news_clean %>%
select(query, title_clean, any_of(c("source_name", "source", "sourceName")), pub_day) %>%
head(10) %>%
kable(caption = "Sample Cleaned Headlines") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Sample Cleaned Headlines
|
query
|
title_clean
|
source_name
|
pub_day
|
|
Honda
|
chinese carmakers weighing canada manufacturing deals joly says
|
Financial Post
|
2026-06-22
|
|
Honda
|
the greatest road car engines ever made
|
Autocar
|
2026-06-22
|
|
Honda
|
obituary ferrari s first monocoque maker john thompson
|
autosport.com
|
2026-06-22
|
|
Honda
|
kia s electric van is becoming so popular that supply is now being
rationed
|
Electrek
|
2026-06-22
|
|
Honda
|
joan mir lauds honda s smart move as he tries out its 2027 motogp
package at brno test
|
Motorsport.com
|
2026-06-22
|
|
Honda
|
adhesion and polarity
|
Plos.org
|
2026-06-22
|
|
Honda
|
the occurrence of a particular state is a predictor of successful travel
consultation
|
Plos.org
|
2026-06-22
|
|
Honda
|
hot ones extra heat brings the fire to netflix beginning with the home
run derby
|
Thefutoncritic.com
|
2026-06-22
|
|
Honda
|
exclusive the ai company powering public safety operations for the 2026
world cup just raised 250 million
|
Fortune
|
2026-06-22
|
|
Honda
|
get ready for prime day oedro announces exclusive deals for vehicle
owners
|
PRNewswire
|
2026-06-22
|
news_tokens <- news_clean %>%
select(query, title_clean) %>%
unnest_tokens(word, title_clean) %>%
anti_join(stop_words, by = "word") %>%
filter(!str_detect(word, "^\\d+$"), nchar(word) > 2)
top_words <- news_tokens %>%
count(word, sort = TRUE) %>%
slice_head(n = 20)
top_words %>%
kable(caption = "Top 20 Words Across All Headlines") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Top 20 Words Across All Headlines
|
word
|
n
|
|
draft
|
5
|
|
nba
|
5
|
|
nissan
|
5
|
|
car
|
4
|
|
cup
|
3
|
|
exclusive
|
3
|
|
motogp
|
3
|
|
nvidia
|
3
|
|
quality
|
3
|
|
safety
|
3
|
|
time
|
3
|
|
toyota
|
3
|
|
announces
|
2
|
|
aston
|
2
|
|
believing
|
2
|
|
busts
|
2
|
|
customers
|
2
|
|
czech
|
2
|
|
deals
|
2
|
|
electric
|
2
|
top_words %>%
mutate(word = fct_reorder(word, n)) %>%
ggplot(aes(x = n, y = word, fill = n)) +
geom_col(show.legend = FALSE) +
scale_fill_gradient(low = "#a8d8ea", high = "#0077b6") +
labs(
title = "Top 20 Words in News Headlines",
subtitle = "Honda, Toyota, Nissan",
x = "Count",
y = NULL,
caption = "Source: NewsAPI"
) +
theme_minimal(base_size = 13)

word_freq <- news_tokens %>%
count(word, sort = TRUE) %>%
filter(n >= 2)
set.seed(42)
wordcloud(
words = word_freq$word,
freq = word_freq$n,
min.freq = 1,
max.words = 80,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"),
scale = c(3.5, 0.5)
)
title("News Headline Word Cloud — Trending Tickers")

afinn <- get_sentiments("afinn")
sentiment_afinn <- news_tokens %>%
inner_join(afinn, by = "word") %>%
group_by(query) %>%
summarise(
total_words = n(),
mean_sentiment = round(mean(value), 3),
sum_sentiment = sum(value),
.groups = "drop"
) %>%
arrange(desc(mean_sentiment))
sentiment_afinn %>%
kable(caption = "AFINN Sentiment Score by Topic") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
AFINN Sentiment Score by Topic
|
query
|
total_words
|
mean_sentiment
|
sum_sentiment
|
|
Honda
|
11
|
1.364
|
15
|
|
Toyota
|
8
|
0.875
|
7
|
|
Nissan
|
6
|
-1.000
|
-6
|
sentiment_afinn %>%
mutate(query = fct_reorder(query, mean_sentiment),
sentiment_dir = ifelse(mean_sentiment >= 0, "Positive", "Negative")) %>%
ggplot(aes(x = mean_sentiment, y = query, fill = sentiment_dir)) +
geom_col(width = 0.6) +
scale_fill_manual(values = c("Positive" = "#2ecc71", "Negative" = "#e74c3c")) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") +
labs(
title = "Mean AFINN Sentiment Score by Topic",
x = "Mean Sentiment Score",
y = NULL,
fill = NULL,
caption = "Source: NewsAPI"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")

bing <- get_sentiments("bing")
sentiment_bing <- news_tokens %>%
inner_join(bing, by = "word") %>%
count(query, sentiment) %>%
pivot_wider(
names_from = sentiment,
values_from = n,
values_fill = list(n = 0)
) %>%
mutate(
positive = coalesce(positive, 0L),
negative = coalesce(negative, 0L),
net_sentiment = positive - negative
)
sentiment_bing %>%
kable(caption = "Bing Sentiment Count by Topic") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Bing Sentiment Count by Topic
|
query
|
negative
|
positive
|
net_sentiment
|
|
Honda
|
2
|
9
|
7
|
|
Nissan
|
4
|
2
|
-2
|
|
Toyota
|
5
|
2
|
-3
|
news_tokens %>%
inner_join(bing, by = "word") %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
slice_head(n = 10) %>%
ungroup() %>%
mutate(word = reorder_within(word, n, sentiment)) %>%
ggplot(aes(x = n, y = word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ sentiment, scales = "free_y") +
scale_y_reordered() +
scale_fill_manual(values = c("positive" = "#2ecc71", "negative" = "#e74c3c")) +
labs(
title = "Top Positive & Negative Words in Headlines",
x = "Count", y = NULL,
caption = "Source: NewsAPI"
) +
theme_minimal(base_size = 12)

nrc <- get_sentiments("nrc")
emotion_nrc <- news_tokens %>%
inner_join(nrc, by = "word") %>%
filter(!sentiment %in% c("positive", "negative")) %>%
count(query, sentiment) %>%
group_by(query) %>%
mutate(prop = n / sum(n))
ggplot(emotion_nrc, aes(x = sentiment, y = prop, fill = query)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = percent_format()) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "NRC Emotion Proportions by Topic",
x = "Emotion",
y = "Proportion of Emotional Words",
fill = "Topic",
caption = "Source: NewsAPI"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 30, hjust = 1),
legend.position = "top")

tfidf_words <- news_tokens %>%
count(query, word) %>%
bind_tf_idf(word, query, n) %>%
group_by(query) %>%
slice_max(tf_idf, n = 6) %>%
ungroup()
tfidf_words %>%
mutate(word = reorder_within(word, tf_idf, query)) %>%
ggplot(aes(x = tf_idf, y = word, fill = query)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ query, scales = "free_y", ncol = 2) +
scale_y_reordered() +
scale_fill_brewer(palette = "Set1") +
labs(
title = "Top TF-IDF Terms by Topic",
subtitle = "Words most distinctive to each news topic",
x = "TF-IDF Score", y = NULL,
caption = "Source: NewsAPI"
) +
theme_minimal(base_size = 12)

summary_tbl <- sentiment_afinn %>%
left_join(sentiment_bing %>% select(query, positive, negative, net_sentiment),
by = "query") %>%
rename(
Topic = query,
`Words Matched` = total_words,
`Mean AFINN` = mean_sentiment,
`AFINN Sum` = sum_sentiment,
Positive = positive,
Negative = negative,
`Net (Bing)` = net_sentiment
)
summary_tbl %>%
kable(caption = "Sentiment Summary: All Topics") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
column_spec(3, color = ifelse(summary_tbl$`Mean AFINN` >= 0, "green", "red"))
Sentiment Summary: All Topics
|
Topic
|
Words Matched
|
Mean AFINN
|
AFINN Sum
|
Positive
|
Negative
|
Net (Bing)
|
|
Honda
|
11
|
1.364
|
15
|
9
|
2
|
7
|
|
Toyota
|
8
|
0.875
|
7
|
2
|
5
|
-3
|
|
Nissan
|
6
|
-1.000
|
-6
|
2
|
4
|
-2
|
Analysis
Toyota had the highest peak, as it recorded the highest mean AFINN
sentiment score (1.533) and the strongest net positive Bing sentiment
(+5), outperforming both Honda and Nissan. Toyota showed the strongest
positive news sentiment among the three brands, reaching the highest
sentiment peak across the analyses. This may be because Toyota’s
headlines contained more positive terms like wins, win, won, and fewer
negative terms, resulting in a higher overall sentiment score than Honda
or Nissan.