A common theme found in the table of the mentioned topics, Nike and Olympics, is that both are closely mentioned together. Common phrases ask about what will be introduced and worn during the 2024 Paris Olympics. The most common phrases mostly talks about what the women will be wearing besides the men’s track kits.
| Mentions of the topic | ||
| Topic | Count | Percent |
|---|---|---|
| 0 | 2405 | 43.9 |
| 1 | 3074 | 56.1 |
# Load packages
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
if (!require("plotly")) install.packages("plotly")
if (!require("gtExtras")) install.packages("gtExtras")
library(tidyverse)
library(tidytext)
library(gtExtras)
library(plotly)
library(lubridate)
# Read the data
mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/NikeUniforms.csv")
# Counting posts about 2024 Olympics Nike Uniforms
tidy_text <- mydata %>%
unnest_tokens(word,Full.Text) %>%
count(word, sort = TRUE)
# Deleting standard stop words
data("stop_words")
tidy_text <- tidy_text %>%
anti_join(stop_words)
# Deleting custom stop words
my_stopwords <- tibble(word = c("https",
"t.co",
"rt"))
tidy_text <- tidy_text %>%
anti_join(my_stopwords)
head(tidy_text, 25)
searchterms <- "nike|olympics"
mydata$Topic <- ifelse(grepl(searchterms,
mydata$Full.Text,
ignore.case = TRUE),1,0)
Topic <- mydata %>%
group_by(Topic) %>%
summarize(
Count = n(),
Percent = round(n() / nrow(mydata) * 100, 1)
)
TopicTable <- gt(Topic) %>%
tab_header("Mentions of the topic") %>%
cols_align(align = "left") %>%
gt_theme_538
TopicTable
# Counting phrases
# Extract phrases to a "tidy_phrases" data frame
tidy_phrases <- mydata %>%
unnest_tokens(phrase,Full.Text,token="ngrams", n=3) %>%
count(phrase, sort = TRUE)
# Delete custom stop words
my_stopphrases <- tibble(phrase = c("be worn by",
"at the 2024",
"field team at",
"nike kits that",
"olympics in paris"))
tidy_phrases <- tidy_phrases %>%
anti_join(my_stopphrases)
head(tidy_phrases, n=25)