The chart explains the number of tweets with and without the key words (Nike, Women, Wear). Tweets that included these three words took up almost 70%. Comparing the words used made me realize how often they were used and the difference they made.

Mentions of the topic
Topic Count Percent
0 1661 30.3
1 3818 69.7
# Load packages

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
if (!require("plotly")) install.packages("plotly")
if (!require("gtExtras")) install.packages("gtExtras")

library(tidyverse)
library(tidytext)
library(gtExtras) 
library(plotly)
library(lubridate)

# Read the data

mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/NikeUniforms.csv")

# Counting posts about Biden

tidy_text <- mydata %>% 
  unnest_tokens(word,Full.Text) %>% 
  count(word, sort = TRUE)

# Deleting standard stop words

data("stop_words")
tidy_text <- tidy_text %>%
  anti_join(stop_words)

# Deleting custom stop words

my_stopwords <- tibble(word = c("https",
                                "t.co",
                                "rt"))
tidy_text <- tidy_text %>% 
  anti_join(my_stopwords)

head(tidy_text, 25)

searchterms <- "Nike|Women|Wear"

mydata$Topic <- ifelse(grepl(searchterms,
                             mydata$Full.Text,
                             ignore.case = TRUE),1,0)
Topic <- mydata %>%
  group_by(Topic) %>%
  summarize(
    Count = n(),
    Percent = round(n() / nrow(mydata) * 100, 1)
  )

TopicTable <- gt(Topic) %>% 
  tab_header("Mentions of the topic") %>%
  cols_align(align = "left") %>%
  gt_theme_538

TopicTable

# Graphing Topic posts over time

# Step 1: Convert the Date column to proper Date format if it's not already

mydata$Date <- as.Date(mydata$Date)

# Step 2: Filter to only include rows where the topic is mentioned

topic_mentions <- mydata %>%
  filter(Topic == 1)

# Step 3: Count topic mentions per week starting on Monday

weekly_topic_mentions <- topic_mentions %>%
  mutate(Week = cut(Date, breaks = "week", start.on.monday = TRUE)) %>%
  group_by(Week) %>%
  summarise(Mentions = n()) %>%
  mutate(Week = as.Date(Week))  # Convert back to Date for plotting

# Step 4: Plot

Plot <- plot_ly(
  data = weekly_topic_mentions,
  x = ~Week,
  y = ~Mentions,
  type = 'bar',
  marker = list(color = '#2c7fb8')
) %>%
  layout(
    title = list(text = "Weekly Mentions of the Topic", font = list(size = 20)),
    xaxis = list(title = "Week Starting On", tickformat = "%Y-%m-%d"),
    yaxis = list(title = "Number of Mentions"),
    bargap = 0.2
  )

Plot

# Counting phrases

# Extract phrases to a "tidy_phrases" data frame

tidy_phrases <- mydata %>% 
  unnest_tokens(phrase,Full.Text,token="ngrams", n=3) %>% 
  count(phrase, sort = TRUE)

# Delete custom stop words

my_stopphrases <- tibble(phrase = c("this is a",
                                    "should not be",
                                    "this is the",
                                    "one of the",
                                    "in order to"))
tidy_phrases <- tidy_phrases %>% 
  anti_join(my_stopphrases)

head(tidy_phrases, n=25)