The chart explains the number of tweets with and without the key words (Nike, Women, Wear). Tweets that included these three words took up almost 70%. Comparing the words used made me realize how often they were used and the difference they made.
Mentions of the topic | ||
Topic | Count | Percent |
---|---|---|
0 | 1661 | 30.3 |
1 | 3818 | 69.7 |
# Load packages
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
if (!require("plotly")) install.packages("plotly")
if (!require("gtExtras")) install.packages("gtExtras")
library(tidyverse)
library(tidytext)
library(gtExtras)
library(plotly)
library(lubridate)
# Read the data
mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/NikeUniforms.csv")
# Counting posts about Biden
tidy_text <- mydata %>%
unnest_tokens(word,Full.Text) %>%
count(word, sort = TRUE)
# Deleting standard stop words
data("stop_words")
tidy_text <- tidy_text %>%
anti_join(stop_words)
# Deleting custom stop words
my_stopwords <- tibble(word = c("https",
"t.co",
"rt"))
tidy_text <- tidy_text %>%
anti_join(my_stopwords)
head(tidy_text, 25)
searchterms <- "Nike|Women|Wear"
mydata$Topic <- ifelse(grepl(searchterms,
mydata$Full.Text,
ignore.case = TRUE),1,0)
Topic <- mydata %>%
group_by(Topic) %>%
summarize(
Count = n(),
Percent = round(n() / nrow(mydata) * 100, 1)
)
TopicTable <- gt(Topic) %>%
tab_header("Mentions of the topic") %>%
cols_align(align = "left") %>%
gt_theme_538
TopicTable
# Graphing Topic posts over time
# Step 1: Convert the Date column to proper Date format if it's not already
mydata$Date <- as.Date(mydata$Date)
# Step 2: Filter to only include rows where the topic is mentioned
topic_mentions <- mydata %>%
filter(Topic == 1)
# Step 3: Count topic mentions per week starting on Monday
weekly_topic_mentions <- topic_mentions %>%
mutate(Week = cut(Date, breaks = "week", start.on.monday = TRUE)) %>%
group_by(Week) %>%
summarise(Mentions = n()) %>%
mutate(Week = as.Date(Week)) # Convert back to Date for plotting
# Step 4: Plot
Plot <- plot_ly(
data = weekly_topic_mentions,
x = ~Week,
y = ~Mentions,
type = 'bar',
marker = list(color = '#2c7fb8')
) %>%
layout(
title = list(text = "Weekly Mentions of the Topic", font = list(size = 20)),
xaxis = list(title = "Week Starting On", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Number of Mentions"),
bargap = 0.2
)
Plot
# Counting phrases
# Extract phrases to a "tidy_phrases" data frame
tidy_phrases <- mydata %>%
unnest_tokens(phrase,Full.Text,token="ngrams", n=3) %>%
count(phrase, sort = TRUE)
# Delete custom stop words
my_stopphrases <- tibble(phrase = c("this is a",
"should not be",
"this is the",
"one of the",
"in order to"))
tidy_phrases <- tidy_phrases %>%
anti_join(my_stopphrases)
head(tidy_phrases, n=25)