Lab 12

A common theme found in the table of the mentioned topics, Nike and Olympics, is that both are closely mentioned together. Common phrases ask about what will be introduced and worn during the 2024 Paris Olympics. The most common phrases mostly talks about what the women will be wearing besides the men’s track kits.

Mentions of the topic
Topic Count Percent
0 2405 43.9
1 3074 56.1

Code:

# Load packages

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
if (!require("plotly")) install.packages("plotly")
if (!require("gtExtras")) install.packages("gtExtras")

library(tidyverse)
library(tidytext)
library(gtExtras) 
library(plotly)
library(lubridate)

# Read the data

mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/NikeUniforms.csv")

# Counting posts about 2024 Olympics Nike Uniforms

tidy_text <- mydata %>% 
  unnest_tokens(word,Full.Text) %>% 
  count(word, sort = TRUE)

# Deleting standard stop words

data("stop_words")
tidy_text <- tidy_text %>%
  anti_join(stop_words)

# Deleting custom stop words

my_stopwords <- tibble(word = c("https",
                                "t.co",
                                "rt"))
tidy_text <- tidy_text %>% 
  anti_join(my_stopwords)

head(tidy_text, 25)

searchterms <- "nike|olympics"

mydata$Topic <- ifelse(grepl(searchterms,
                             mydata$Full.Text,
                             ignore.case = TRUE),1,0)
Topic <- mydata %>%
  group_by(Topic) %>%
  summarize(
    Count = n(),
    Percent = round(n() / nrow(mydata) * 100, 1)
  )

TopicTable <- gt(Topic) %>% 
  tab_header("Mentions of the topic") %>%
  cols_align(align = "left") %>%
  gt_theme_538

TopicTable

# Counting phrases

# Extract phrases to a "tidy_phrases" data frame

tidy_phrases <- mydata %>% 
  unnest_tokens(phrase,Full.Text,token="ngrams", n=3) %>% 
  count(phrase, sort = TRUE)

# Delete custom stop words

my_stopphrases <- tibble(phrase = c("be worn by",
                                    "at the 2024",
                                    "field team at",
                                    "nike kits that",
                                    "olympics in paris"))
tidy_phrases <- tidy_phrases %>% 
  anti_join(my_stopphrases)

head(tidy_phrases, n=25)