# install.packages(c("tuber", "tidytext", "dplyr", "stringr",
# "ggplot2", "wordcloud2", "readr"))
library(tuber)
library(tidytext)
library(dplyr)
library(stringr)
library(ggplot2)
library(wordcloud2)
library(readr)
Sys.setenv(YOUTUBE_CLIENT_ID = "your_client_id_here",
YOUTUBE_CLIENT_SECRET = "your_client_secret_here")
yt_oauth(app_id = Sys.getenv("YOUTUBE_CLIENT_ID"),
app_secret = Sys.getenv("YOUTUBE_CLIENT_SECRET"))
target_video_id <- "JSYjv7QprJw"
target_video_title <- "Action Bronson Eats with NYC Mayor Zohran Mamdani: FTD"
cat("Video ID:", target_video_id, "\n")
## Video ID: JSYjv7QprJw
cat("URL: https://www.youtube.com/watch?v=", target_video_id, "\n", sep = "")
## URL: https://www.youtube.com/watch?v=JSYjv7QprJw
get_all_comments() handles pagination automatically.
comments_raw_list <- get_all_comments(video_id = target_video_id)
comments_raw <- comments_raw_list %>%
as_tibble() %>%
transmute(
comment_id = textOriginal, # placeholder, see note below
author = authorDisplayName,
text = textOriginal,
like_count = as.integer(likeCount),
published_at = publishedAt
)
cat("\nTotal comments collected:", nrow(comments_raw), "\n")
write_csv(comments_raw, "youtube_comments_raw.csv")
cat("Saved to youtube_comments_raw.csv\n")
comments_clean <- comments_raw %>%
distinct(text, author, .keep_all = TRUE) %>%
filter(!is.na(text), str_length(text) > 3) %>%
mutate(
text = str_squish(text),
published_at = as.POSIXct(published_at,
format = "%Y-%m-%dT%H:%M:%SZ", tz = "UTC"),
like_count = as.integer(like_count)
)
cat("Comments after cleaning:", nrow(comments_clean), "\n")
## Comments after cleaning: 1858
glimpse(comments_clean)
## Rows: 1,858
## Columns: 5
## $ comment_id <chr> "That was wholesome AF, thanks I needed them smiles and l…
## $ author <chr> "@abegending", "@antondoty5552", "@jamesripley4168", "@ju…
## $ text <chr> "That was wholesome AF, thanks I needed them smiles and l…
## $ like_count <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 0, 0, …
## $ published_at <dttm> 2026-06-30 00:44:23, 2026-06-29 17:38:29, 2026-06-29 14:…
custom_stops <- tibble(word = c("https", "t.co", "amp", "na", "ya",
"video", "watch", "youtube"))
tokens <- comments_clean %>%
select(author, text) %>%
unnest_tokens(word, text) %>%
filter(str_detect(word, "^[a-z]+$")) %>%
anti_join(stop_words, by = "word") %>%
anti_join(custom_stops, by = "word") %>%
filter(str_length(word) > 2)
word_freq <- tokens %>%
count(word, sort = TRUE)
cat("Top 20 words:\n")
## Top 20 words:
print(head(word_freq, 20))
## # A tibble: 20 × 2
## word n
## <chr> <int>
## 1 action 196
## 2 mayor 142
## 3 love 116
## 4 mamdani 114
## 5 bronson 103
## 6 food 94
## 7 people 78
## 8 nyc 65
## 9 guy 61
## 10 episode 52
## 11 york 52
## 12 zohran 49
## 13 bro 48
## 14 dude 48
## 15 lol 47
## 16 time 41
## 17 lmao 35
## 18 city 34
## 19 korea 34
## 20 real 33
word_freq %>%
slice_max(n, n = 25) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = n, y = word, fill = n)) +
geom_col(show.legend = FALSE) +
scale_fill_gradient(low = "#74c476", high = "#006d2c") +
labs(
title = "Top 25 Most Frequent Words in YouTube Comments",
subtitle = paste("Source video:", target_video_title),
x = "Frequency",
y = NULL,
caption = "Stop words and generic terms removed. Data collected via tuber/YouTube Data API v3."
) +
theme_minimal(base_size = 13)
wc_data <- word_freq %>%
filter(n >= 2) %>%
slice_max(n, n = 150)
wordcloud2(
data = wc_data,
size = 0.5,
color = "random-dark",
backgroundColor = "white"
)
cat("=== Dataset Summary ===\n")
## === Dataset Summary ===
cat("Total comments collected: ", nrow(comments_clean), "\n")
## Total comments collected: 1858
cat("Unique commenters: ", n_distinct(comments_clean$author), "\n")
## Unique commenters: 1568
cat("Date range: ",
format(min(comments_clean$published_at, na.rm = TRUE), "%Y-%m-%d"), "to",
format(max(comments_clean$published_at, na.rm = TRUE), "%Y-%m-%d"), "\n")
## Date range: 2026-03-30 to 2026-06-30
cat("Avg. likes per comment: ",
round(mean(comments_clean$like_count, na.rm = TRUE), 2), "\n")
## Avg. likes per comment: 32.19
cat("Total unique tokens: ", nrow(word_freq), "\n")
## Total unique tokens: 3402
cat("Top 3 words: ",
paste(head(word_freq$word, 3), collapse = ", "), "\n")
## Top 3 words: action, mayor, love
This video features Action Bronson, a rapper and cultural figure known for blending hip hop with food culture (while repping New York City culture), as he reviews a Yemenese restaurant in New York City with the new mayor. The video explores themes of New York diversity, personal experience, and culinary storytelling rather than explicit political commentary. With 931 thousand views, 38 thousand likes, and 2,432 total comments, the engagement metrics suggest moderate but meaningful audience interest.
The word frequency analysis reveals that top terms like “mayor,” “mamdani,” “zohran,” “bronson,” and “nyc” directly align with the video’s content — names of key figures and geographic identity. However, the prevalence of words like “action,” “food,” “love,” “people,” and “episode” suggests commenters are engaging with both the personality-driven and culinary aspects of the content. The word cloud visualization emphasizes these dominant themes, indicating that viewers are discussing the show’s format and the cultural/geographic elements rather than critiquing or debating.
One notable pattern is the relative absence of negative sentiment words in the top frequencies, suggesting that comments skew positive or neutral — viewers seem to be appreciating the cultural narrative and food exploration rather than expressing criticism. This aligns with the high like-to-comment ratio (38K likes to 2.4K comments), which often indicates satisfied, engaged viewers.