install.packages(c("tidyverse", "tidytext", "textdata", "jsonlite",
                   "wordcloud", "RColorBrewer", "lubridate",
                   "scales", "knitr", "kableExtra"))

library(textdata)
lexicon_afinn()
## # A tibble: 2,477 × 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ℹ 2,467 more rows
lexicon_bing()
## # A tibble: 6,789 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faced     negative 
##  2 2-faces     negative 
##  3 abnormal    negative 
##  4 abolish     negative 
##  5 abominable  negative 
##  6 abominably  negative 
##  7 abominate   negative 
##  8 abomination negative 
##  9 abort       negative 
## 10 aborted     negative 
## # ℹ 6,779 more rows
lexicon_nrc()
## # A tibble: 13,872 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ℹ 13,862 more rows
library(tidyverse)
library(tidytext)
library(textdata)
library(jsonlite)
library(wordcloud)
library(RColorBrewer)
library(lubridate)
library(scales)
library(knitr)
library(kableExtra)
## add your NewsAPI secret in the quotation mark below
api_key <- Sys.getenv("NEWS_API_KEY")
fetch_news <- function(query, api_key, page_size = 20) {
  url <- paste0(
    "https://newsapi.org/v2/everything?",
    "q=",        URLencode(query, reserved = TRUE),
    "&language=en",
    "&sortBy=publishedAt",
    "&pageSize=", page_size,
    "&apiKey=",  api_key
  )

  response <- fromJSON(url, flatten = TRUE)
  articles <- as_tibble(response$articles)

  articles %>%
    rename_with(~ str_replace_all(.x, "\\.", "_")) %>%
    mutate(query = query)
}

news_raw <- bind_rows(
  fetch_news("Honda", api_key),
  fetch_news("Toyota", api_key),
  fetch_news("Nissan", api_key),
)

glimpse(news_raw)
## Rows: 59
## Columns: 10
## $ author      <chr> "Bloomberg News", "Bloomberg News", "Richard Dredge", "Gar…
## $ title       <chr> "Chinese Carmakers Weighing Canada Manufacturing Deals, Jo…
## $ description <chr> "At least four Chinese carmakers are looking at how to use…
## $ url         <chr> "https://financialpost.com/pmn/business-pmn/chinese-carmak…
## $ urlToImage  <chr> NA, "https://smartcdn.gprod.postmedia.digital/financialpos…
## $ publishedAt <chr> "2026-06-22T16:27:25Z", "2026-06-22T16:27:25Z", "2026-06-2…
## $ content     <chr> "(Bloomberg) At least four Chinese carmakers are looking a…
## $ source_id   <chr> "financial-post", "financial-post", NA, NA, NA, NA, NA, NA…
## $ source_name <chr> "Financial Post", "Financial Post", "Autocar", "autosport.…
## $ query       <chr> "Honda", "Honda", "Honda", "Honda", "Honda", "Honda", "Hon…
news_clean <- news_raw %>%
  filter(!is.na(.data$title)) %>%
  mutate(
    pub_date    = ymd_hms(.data$publishedAt, quiet = TRUE),
    pub_day     = as.Date(pub_date),
    title_clean = str_remove(.data$title, "\\s*-\\s*[^-]+$"),
    title_clean = str_squish(str_replace_all(title_clean, "[^[:alnum:][:space:]]", " ")),
    title_clean = str_to_lower(title_clean)
  ) %>%
  distinct(title_clean, .keep_all = TRUE)

cat("Total unique headlines:", nrow(news_clean), "\n")
## Total unique headlines: 51
news_clean %>%
  select(query, title_clean, any_of(c("source_name", "source", "sourceName")), pub_day) %>%
  head(10) %>%
  kable(caption = "Sample Cleaned Headlines") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Sample Cleaned Headlines
query title_clean source_name pub_day
Honda chinese carmakers weighing canada manufacturing deals joly says Financial Post 2026-06-22
Honda the greatest road car engines ever made Autocar 2026-06-22
Honda obituary ferrari s first monocoque maker john thompson autosport.com 2026-06-22
Honda kia s electric van is becoming so popular that supply is now being rationed Electrek 2026-06-22
Honda joan mir lauds honda s smart move as he tries out its 2027 motogp package at brno test Motorsport.com 2026-06-22
Honda adhesion and polarity Plos.org 2026-06-22
Honda the occurrence of a particular state is a predictor of successful travel consultation Plos.org 2026-06-22
Honda hot ones extra heat brings the fire to netflix beginning with the home run derby Thefutoncritic.com 2026-06-22
Honda exclusive the ai company powering public safety operations for the 2026 world cup just raised 250 million Fortune 2026-06-22
Honda get ready for prime day oedro announces exclusive deals for vehicle owners PRNewswire 2026-06-22
news_tokens <- news_clean %>%
  select(query, title_clean) %>%
  unnest_tokens(word, title_clean) %>%
  anti_join(stop_words, by = "word") %>%
  filter(!str_detect(word, "^\\d+$"), nchar(word) > 2)

top_words <- news_tokens %>%
  count(word, sort = TRUE) %>%
  slice_head(n = 20)

top_words %>%
  kable(caption = "Top 20 Words Across All Headlines") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Top 20 Words Across All Headlines
word n
draft 5
nba 5
nissan 5
car 4
cup 3
exclusive 3
motogp 3
nvidia 3
quality 3
safety 3
time 3
toyota 3
announces 2
aston 2
believing 2
busts 2
customers 2
czech 2
deals 2
electric 2
top_words %>%
  mutate(word = fct_reorder(word, n)) %>%
  ggplot(aes(x = n, y = word, fill = n)) +
  geom_col(show.legend = FALSE) +
  scale_fill_gradient(low = "#a8d8ea", high = "#0077b6") +
  labs(
    title    = "Top 20 Words in News Headlines",
    subtitle = "Honda, Toyota, Nissan",
    x        = "Count",
    y        = NULL,
    caption  = "Source: NewsAPI"
  ) +
  theme_minimal(base_size = 13)

word_freq <- news_tokens %>%
  count(word, sort = TRUE) %>%
  filter(n >= 2)

set.seed(42)
wordcloud(
  words  = word_freq$word,
  freq   = word_freq$n,
  min.freq = 1,
  max.words = 80,
  random.order = FALSE,
  colors = brewer.pal(8, "Dark2"),
  scale  = c(3.5, 0.5)
)
title("News Headline Word Cloud — Trending Tickers")

afinn <- get_sentiments("afinn")

sentiment_afinn <- news_tokens %>%
  inner_join(afinn, by = "word") %>%
  group_by(query) %>%
  summarise(
    total_words    = n(),
    mean_sentiment = round(mean(value), 3),
    sum_sentiment  = sum(value),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_sentiment))

sentiment_afinn %>%
  kable(caption = "AFINN Sentiment Score by Topic") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
AFINN Sentiment Score by Topic
query total_words mean_sentiment sum_sentiment
Honda 11 1.364 15
Toyota 8 0.875 7
Nissan 6 -1.000 -6
sentiment_afinn %>%
  mutate(query = fct_reorder(query, mean_sentiment),
         sentiment_dir = ifelse(mean_sentiment >= 0, "Positive", "Negative")) %>%
  ggplot(aes(x = mean_sentiment, y = query, fill = sentiment_dir)) +
  geom_col(width = 0.6) +
  scale_fill_manual(values = c("Positive" = "#2ecc71", "Negative" = "#e74c3c")) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray40") +
  labs(
    title   = "Mean AFINN Sentiment Score by Topic",
    x       = "Mean Sentiment Score",
    y       = NULL,
    fill    = NULL,
    caption = "Source: NewsAPI"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

bing <- get_sentiments("bing")

sentiment_bing <- news_tokens %>%
  inner_join(bing, by = "word") %>%
  count(query, sentiment) %>%
  pivot_wider(
    names_from  = sentiment,
    values_from = n,
    values_fill = list(n = 0)
  ) %>%
  mutate(
    positive = coalesce(positive, 0L),
    negative = coalesce(negative, 0L),
    net_sentiment = positive - negative
  )

sentiment_bing %>%
  kable(caption = "Bing Sentiment Count by Topic") %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Bing Sentiment Count by Topic
query negative positive net_sentiment
Honda 2 9 7
Nissan 4 2 -2
Toyota 5 2 -3
news_tokens %>%
  inner_join(bing, by = "word") %>%
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  slice_head(n = 10) %>%
  ungroup() %>%
  mutate(word = reorder_within(word, n, sentiment)) %>%
  ggplot(aes(x = n, y = word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ sentiment, scales = "free_y") +
  scale_y_reordered() +
  scale_fill_manual(values = c("positive" = "#2ecc71", "negative" = "#e74c3c")) +
  labs(
    title   = "Top Positive & Negative Words in Headlines",
    x       = "Count", y = NULL,
    caption = "Source: NewsAPI"
  ) +
  theme_minimal(base_size = 12)

nrc <- get_sentiments("nrc")

emotion_nrc <- news_tokens %>%
  inner_join(nrc, by = "word") %>%
  filter(!sentiment %in% c("positive", "negative")) %>%
  count(query, sentiment) %>%
  group_by(query) %>%
  mutate(prop = n / sum(n))

ggplot(emotion_nrc, aes(x = sentiment, y = prop, fill = query)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title   = "NRC Emotion Proportions by Topic",
    x       = "Emotion",
    y       = "Proportion of Emotional Words",
    fill    = "Topic",
    caption = "Source: NewsAPI"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        legend.position = "top")

tfidf_words <- news_tokens %>%
  count(query, word) %>%
  bind_tf_idf(word, query, n) %>%
  group_by(query) %>%
  slice_max(tf_idf, n = 6) %>%
  ungroup()

tfidf_words %>%
  mutate(word = reorder_within(word, tf_idf, query)) %>%
  ggplot(aes(x = tf_idf, y = word, fill = query)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ query, scales = "free_y", ncol = 2) +
  scale_y_reordered() +
  scale_fill_brewer(palette = "Set1") +
  labs(
    title    = "Top TF-IDF Terms by Topic",
    subtitle = "Words most distinctive to each news topic",
    x        = "TF-IDF Score", y = NULL,
    caption  = "Source: NewsAPI"
  ) +
  theme_minimal(base_size = 12)

summary_tbl <- sentiment_afinn %>%
  left_join(sentiment_bing %>% select(query, positive, negative, net_sentiment),
            by = "query") %>%
  rename(
    Topic           = query,
    `Words Matched` = total_words,
    `Mean AFINN`    = mean_sentiment,
    `AFINN Sum`     = sum_sentiment,
    Positive        = positive,
    Negative        = negative,
    `Net (Bing)`    = net_sentiment
  )

summary_tbl %>%
  kable(caption = "Sentiment Summary: All Topics") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  column_spec(3, color = ifelse(summary_tbl$`Mean AFINN` >= 0, "green", "red"))
Sentiment Summary: All Topics
Topic Words Matched Mean AFINN AFINN Sum Positive Negative Net (Bing)
Honda 11 1.364 15 9 2 7
Toyota 8 0.875 7 2 5 -3
Nissan 6 -1.000 -6 2 4 -2

Analysis

Toyota had the highest peak, as it recorded the highest mean AFINN sentiment score (1.533) and the strongest net positive Bing sentiment (+5), outperforming both Honda and Nissan. Toyota showed the strongest positive news sentiment among the three brands, reaching the highest sentiment peak across the analyses. This may be because Toyota’s headlines contained more positive terms like wins, win, won, and fewer negative terms, resulting in a higher overall sentiment score than Honda or Nissan.