Goal
This section answers: Which places are named inside the
comments themselves?
The YouTube comment data used in this report does
not provide a reliable viewer state or country field.
The official YouTube commentThread resource documents
fields such as thread ID, video/channel IDs, top-level comment details,
reply count, public visibility, and a limited replies object, but it
does not provide commenter geography. Therefore, this section is only a
location-reference-in-text proxy: it counts places that
commenters wrote inside comments. It should not be interpreted as where
commenters are actually from.
# ================================================================
# LOCATION-MENTION DICTIONARY
# ================================================================
# This is a cautious proxy analysis. It only counts explicit state/country
# names written in the text. It does not infer hidden viewer location.
us_state_names <- tibble::tibble(
location = c(
"alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut",
"delaware", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa",
"kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan",
"minnesota", "mississippi", "missouri", "montana", "nebraska", "nevada",
"new hampshire", "new jersey", "new mexico", "new york", "north carolina",
"north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island",
"south carolina", "south dakota", "tennessee", "texas", "utah", "vermont",
"virginia", "washington", "west virginia", "wisconsin", "wyoming"
),
location_type = "U.S. state name"
)
country_names <- tibble::tibble(
location = c(
"america", "united states", "usa", "canada", "mexico", "england", "ireland", "scotland",
"wales", "france", "germany", "italy", "spain", "australia", "new zealand", "brazil",
"india", "china", "japan", "ukraine", "russia", "poland", "netherlands", "sweden",
"norway", "denmark", "finland", "south africa"
),
location_type = "Country or country reference"
)
location_dictionary <- dplyr::bind_rows(us_state_names, country_names) |>
dplyr::distinct(location, location_type)
extract_location_mentions <- function(data, dictionary) {
location_rows <- lapply(seq_len(nrow(dictionary)), function(i) {
location_value <- dictionary$location[i]
location_type_value <- dictionary$location_type[i]
pattern_value <- paste0("\\b", stringr::str_replace_all(location_value, " ", "\\\\s+"), "\\b")
data |>
dplyr::filter(stringr::str_detect(text_clean, stringr::regex(pattern_value, ignore_case = TRUE))) |>
dplyr::transmute(
comment_id = comment_id,
location = stringr::str_to_title(location_value),
location_type = location_type_value
)
})
dplyr::bind_rows(location_rows) |>
dplyr::distinct(comment_id, location, .keep_all = TRUE)
}
location_mentions <- extract_location_mentions(comments_clean, location_dictionary)
location_counts <- location_mentions |>
dplyr::count(location_type, location, sort = TRUE)
if (nrow(location_counts) > 0) {
knitr::kable(
location_counts |>
dplyr::slice_head(n = 20),
caption = "Top explicit location references found inside comment text",
align = c("l", "l", "r")
)
} else {
knitr::kable(
tibble::tibble(
Result = "No state or country names from the dictionary were detected in the cleaned comments.",
Interpretation = "This does not mean viewers came from no regions; it only means commenters did not explicitly write recognizable place names in the sampled text."
),
caption = "Location-mention proxy result"
)
}
Top explicit location references found inside comment
text
| Country or country reference |
America |
18 |
| U.S. state name |
Ohio |
7 |
| Country or country reference |
Germany |
5 |
| Country or country reference |
Usa |
5 |
| Country or country reference |
France |
4 |
| Country or country reference |
South Africa |
4 |
| Country or country reference |
Ireland |
3 |
| U.S. state name |
Mississippi |
3 |
| U.S. state name |
Utah |
3 |
| Country or country reference |
Australia |
2 |
| Country or country reference |
Canada |
2 |
| U.S. state name |
California |
2 |
| Country or country reference |
Brazil |
1 |
| Country or country reference |
Denmark |
1 |
| Country or country reference |
Mexico |
1 |
| Country or country reference |
Poland |
1 |
| U.S. state name |
Arkansas |
1 |
| U.S. state name |
Delaware |
1 |
| U.S. state name |
Missouri |
1 |
| U.S. state name |
Nebraska |
1 |
# ================================================================
# VISUALIZATION 8: LOCATION-MENTION PROXY CHART
# ================================================================
if (nrow(location_counts) > 0) {
top_location_counts <- location_counts |>
dplyr::slice_max(n, n = min(15, nrow(location_counts))) |>
dplyr::arrange(n)
plot_location_mentions <- ggplot2::ggplot(
top_location_counts,
ggplot2::aes(x = n, y = forcats::fct_reorder(location, n), fill = location_type)
) +
ggplot2::geom_col() +
ggplot2::scale_fill_manual(values = c(hemp_green_dark, hemp_gold, hemp_sage)) +
ggplot2::labs(
title = "Locations Named Inside Comment Text",
subtitle = "This counts written location references, not where commenters are from",
x = "Number of comment mentions",
y = "Mentioned location",
fill = "Location type",
caption = "This is a text-reference proxy using a small state/country dictionary; it is not viewer geography."
) +
theme_lab()
save_report_plot(plot_location_mentions, "08_location_mentions_proxy.png")
plot_location_mentions
} else {
plot_location_mentions <- ggplot2::ggplot() +
ggplot2::annotate(
"text",
x = 0,
y = 0,
label = "No explicit state or country mentions were detected.\nYouTube comments do not provide viewer geography in this dataset.",
size = 5,
color = hemp_green_dark
) +
ggplot2::xlim(-1, 1) +
ggplot2::ylim(-1, 1) +
ggplot2::labs(
title = "Location References in Comment Text",
subtitle = "No detected state or country names in sampled comment text",
x = NULL,
y = NULL,
caption = "This is not evidence of where viewers are located; it only reflects text mentions."
) +
theme_lab() +
ggplot2::theme(axis.text = ggplot2::element_blank(), panel.grid = ggplot2::element_blank())
save_report_plot(plot_location_mentions, "08_location_mentions_proxy.png")
plot_location_mentions
}
