# Required packages
if (!require("tidyverse"))
install.packages("tidyverse")
if (!require("gmodels"))
install.packages("gmodels")
if (!require("gtsummary"))
install.packages("gtsummary")
library(tidyverse)
library(gmodels)
library(ggplot2)
library(gtsummary)
# Read from local file
SwiftData <- read.csv("SwiftData.csv")
# Getting all column names
ColumnNames <- as.data.frame(colnames(SwiftData))
# Keeping columns of interest after making a copy
# of the full dataset for reference
FullSwiftData <- SwiftData
SwiftData <- select(SwiftData,
Date,
Url,
Domain,
Page.Type,
Account.Type,
Author,
Full.Text,
X.Replies,
X.Reposts,
X.Likes,
Reach..new.)
# Removing any duplicate rows
SwiftData <- SwiftData %>%
distinct(Url, .keep_all = TRUE)
#Formatting "Date" as POSIXct object
SwiftData$Date <- as.POSIXct(SwiftData$Date, tz = "America/Chicago")
#Sorting by Date
SwiftData <- arrange(SwiftData,Date)
#Categorizing by source type
PageType <- SwiftData %>%
group_by(Page.Type,
Account.Type) %>%
summarize(
PageTypeCount = n())
PageType <- arrange(PageType,desc(PageTypeCount))
# Re-expressing "Date" as "Day," the day from "Date."
SwiftData <- SwiftData %>%
mutate(Day = floor_date(Date,
unit = "day"))
# Counting posts per day
PostsByDay <- SwiftData %>%
group_by(Day) %>%
summarize(
PostsByDay = n())
# Easter egg variables
# Check for "thanK you aIMee" egg
SwiftData <- SwiftData %>%
mutate(aIMee = case_when(grepl("thanK you aIMee", Full.Text) ~ "aIMee egg",
grepl("Kardashian", Full.Text)~ "aIMee egg",
grepl("diss track", Full.Text)~ "aIMee egg",
grepl("bully", Full.Text)~ "aIMee egg",
grepl("lessons learned", Full.Text)~ "aIMee egg",
grepl("Kim", Full.Text)~ "aIMee egg",
TRUE ~ "Other topic"))
# Check for "So High School" egg
SwiftData <- SwiftData %>%
mutate(HighSchool = case_when(grepl("So High School", Full.Text) ~ "High School egg",
grepl("Kelce", Full.Text)~ "High School egg",
grepl("Marry Kiss or Kill me", Full.Text)~ "High School egg",
grepl("how to ball", Full.Text)~ "High School egg",
grepl("I know Aristotle", Full.Text)~ "High School egg",
grepl("love track", Full.Text)~ "High School egg",
TRUE ~ "Other topic"))
# Check for "So Long, London" egg
SwiftData <- SwiftData %>%
mutate(London = case_when(grepl("So Long, London", Full.Text) ~ "London egg",
grepl("diss track", Full.Text)~ "London egg",
grepl("heartbreak song", Full.Text)~ "London egg",
grepl("Joe Alwyn", Full.Text)~ "London egg",
grepl("London Boy", Full.Text)~ "London egg",
grepl("Had a good run", Full.Text)~ "London egg",
grepl("demise of relationship", Full.Text)~ "London egg",
grepl("Every day of a love affair", Full.Text)~ "London egg",
TRUE ~ "Other topic"))
# Check for "The Black Dog" egg
SwiftData <- SwiftData %>%
mutate(BlackDog = case_when(grepl("The Black Dog", Full.Text) ~ "Black Dog egg",
grepl("pub", Full.Text)~ "Black Dog egg",
grepl("ex", Full.Text)~ "Black Dog egg",
grepl("bar", Full.Text)~ "Black Dog egg",
grepl("forgetting his location", Full.Text)~ "Black Dog egg",
TRUE ~ "Other topic"))
# Check for "The Alchemy" egg
SwiftData <- SwiftData %>%
mutate(Alchemy = case_when(grepl("The Alchemy", Full.Text) ~ "Alchemy egg",
grepl("Kelce", Full.Text)~ "Alchemy egg",
grepl("Travis", Full.Text)~ "Alchemy egg",
grepl("Cheifs", Full.Text)~ "Alchemy egg",
grepl("love song", Full.Text)~ "Alchemy egg",
grepl("Where's the trophy", Full.Text)~ "Alchemy egg",
grepl("he comes running over to me", Full.Text)~ "Alchemy egg",
grepl("So when I touch down, call the amateurs and cut em from the team", Full.Text)~ "Alchemy egg",
TRUE ~ "Other topic"))
# Check for "Cassandra" egg
SwiftData <- SwiftData %>%
mutate(Cassandra = case_when(grepl("Cassandra", Full.Text) ~ "Cassandra egg",
grepl("KimYe", Full.Text)~ "Cassandra egg",
grepl("Greek Mythology", Full.Text)~ "Cassandra egg",
grepl("snake imagery", Full.Text)~ "Cassandra egg",
grepl("doomed to see the future", Full.Text)~ "Cassandra egg",
grepl("no one believes her", Full.Text)~ "Cassandra egg",
grepl("greed", Full.Text)~ "Cassandra egg",
grepl("family", Full.Text)~ "Cassandra egg",
grepl("Kardashian-West diss track", Full.Text)~ "Cassandra egg",
TRUE ~ "Other topic"))
# Check for "I Hate it Here" egg
SwiftData <- SwiftData %>%
mutate(HateitHere = case_when(grepl("I Hate it Here", Full.Text) ~ "Hate it Here egg",
grepl("Alwyn", Full.Text)~ "Hate it Here egg",
grepl("ex", Full.Text)~ "Hate it Here egg",
grepl("relationship was painful", Full.Text)~ "Hate it Here egg",
grepl("hiding in her relationship", Full.Text)~ "Hate it Here egg",
TRUE ~ "Other topic"))
# Check for "2am Release of second album" egg
SwiftData <- SwiftData %>%
mutate(SecondAlbum = case_when(grepl("2am Release of second album", Full.Text) ~ "Second Album Time Release egg",
grepl("Clock in Midnights room was 2am", Full.Text)~ "Second Album Time Release egg",
grepl("2 fingers during album annoucement", Full.Text)~ "Second Album Time Release egg",
grepl("past mentions of 2am in past songs", Full.Text)~ "Second Album Time Release egg",
grepl("pocket watch in Bejewled video", Full.Text)~ "Second Album Time Release egg",
TRUE ~ "Other topic"))
# Check for "Album Release Date" egg
SwiftData <- SwiftData %>%
mutate(ReleaseDate = case_when(grepl("Album Release Date", Full.Text) ~ "Release Date egg",
grepl("famous dinner", Full.Text)~ "Release Date egg",
grepl("Blake Lively", Full.Text)~ "Release Date egg",
grepl("Ryan Reynolds", Full.Text)~ "Release Date egg",
grepl("annouced breakup", Full.Text)~ "Release Date egg",
grepl("April 19th", Full.Text)~ "Release Date egg",
grepl("all unfollowed Alwyn", Full.Text)~ "Release Date egg",
TRUE ~ "Other topic"))
# How many of each egg?
EggTable <- SwiftData %>%
select(aIMee,
HighSchool,
London,
BlackDog,
Alchemy,
Cassandra,
HateitHere,
SecondAlbum,
ReleaseDate) %>%
tbl_summary()
EggTable
# Quant transformation of Easter egg variables
SwiftData <- SwiftData %>%
mutate(aIMee = case_when(grepl("thanK you aIMee", Full.Text) ~ 1,
grepl("Kardashian", Full.Text)~ 1,
grepl("diss track", Full.Text)~ 1,
grepl("bully", Full.Text)~ 1,
grepl("lessons learned", Full.Text)~ 1,
grepl("Kim", Full.Text)~ 1,
TRUE ~ 0))
# Check for "So High School" egg
SwiftData <- SwiftData %>%
mutate(HighSchool = case_when(grepl("So High School", Full.Text) ~ 1,
grepl("Kelce", Full.Text)~ 1,
grepl("Marry Kiss or Kill me", Full.Text)~ 1,
grepl("how to ball", Full.Text)~ 1,
grepl("I know Aristotle", Full.Text)~ 1,
grepl("love track", Full.Text)~ 1,
TRUE ~ 0))
# Check for "So Long, London" egg
SwiftData <- SwiftData %>%
mutate(London = case_when(grepl("So Long, London", Full.Text) ~ 1,
grepl("diss track", Full.Text)~ 1,
grepl("heartbreak song", Full.Text)~ 1,
grepl("Joe Alwyn", Full.Text)~ 1,
grepl("London Boy", Full.Text)~ 1,
grepl("Had a good run", Full.Text)~ 1,
grepl("demise of relationship", Full.Text)~ 1,
grepl("Every day of a love affair", Full.Text)~ 1,
TRUE ~ 0))
# Check for "The Black Dog" egg
SwiftData <- SwiftData %>%
mutate(BlackDog = case_when(grepl("The Black Dog", Full.Text) ~ 1,
grepl("pub", Full.Text)~ 1,
grepl("ex", Full.Text)~ 1,
grepl("bar", Full.Text)~ 1,
grepl("forgetting his location", Full.Text)~ 1,
TRUE ~ 0))
# Check for "The Alchemy" egg
SwiftData <- SwiftData %>%
mutate(Alchemy = case_when(grepl("The Alchemy", Full.Text) ~ 1,
grepl("Kelce", Full.Text)~ 1,
grepl("Travis", Full.Text)~ 1,
grepl("Cheifs", Full.Text)~ 1,
grepl("love song", Full.Text)~ 1,
grepl("Where's the trophy", Full.Text)~ 1,
grepl("he comes running over to me", Full.Text)~ 1,
grepl("So when I touch down, call the amateurs and cut em from the team", Full.Text)~ 1,
TRUE ~ 0))
# Check for "Cassandra" egg
SwiftData <- SwiftData %>%
mutate(Cassandra = case_when(grepl("Cassandra", Full.Text) ~ 1,
grepl("KimYe", Full.Text)~ 1,
grepl("Greek Mythology", Full.Text)~ 1,
grepl("snake imagery", Full.Text)~ 1,
grepl("doomed to see the future", Full.Text)~ 1,
grepl("no one believes her", Full.Text)~ 1,
grepl("greed", Full.Text)~ 1,
grepl("family", Full.Text)~ 1,
grepl("Kardashian-West diss track", Full.Text)~ 1,
TRUE ~ 0))
# Check for "I Hate it Here" egg
SwiftData <- SwiftData %>%
mutate(HateitHere = case_when(grepl("I Hate it Here", Full.Text) ~ 1,
grepl("Alwyn", Full.Text)~ 1,
grepl("ex", Full.Text)~ 1,
grepl("relationship was painful", Full.Text)~ 1,
grepl("hiding in her relationship", Full.Text)~ 1,
TRUE ~ 0))
# Check for "2am Release of second album" egg
SwiftData <- SwiftData %>%
mutate(SecondAlbum = case_when(grepl("2am Release of second album", Full.Text) ~ 1,
grepl("Clock in Midnights room was 2am", Full.Text)~ 1,
grepl("2 fingers during album annoucement", Full.Text)~ 1,
grepl("past mentions of 2am in past songs", Full.Text)~ 1,
grepl("pocket watch in Bejewled video", Full.Text)~ 1,
TRUE ~ 0))
# Check for "Album Release Date" egg
SwiftData <- SwiftData %>%
mutate(ReleaseDate = case_when(grepl("Album Release Date", Full.Text) ~ 1,
grepl("famous dinner", Full.Text)~ 1,
grepl("Blake Lively", Full.Text)~ 1,
grepl("Ryan Reynolds", Full.Text)~ 1,
grepl("annouced breakup", Full.Text)~ 1,
grepl("April 19th", Full.Text)~ 1,
grepl("all unfollowed Alwyn", Full.Text)~ 1,
TRUE ~ 0))
# Exploring media vs. individuals
SwiftData <- SwiftData %>%
mutate(EasterEggs = aIMee +
HighSchool +
London +
BlackDog +
Alchemy+
Cassandra +
HateitHere +
SecondAlbum +
ReleaseDate) %>%
mutate(AnyEgg = case_when(
EasterEggs > 0 ~ "Egg",
TRUE ~ "No egg"))
# Chi-squared test
# Collapsing source categories into media and individual
SwiftData <- SwiftData %>%
mutate(SourceType =
case_when(
Page.Type == "news" ~ "News",
Page.Type == "twitter" &
Account.Type == "Organisational" ~ "News",
TRUE ~ "Individual"))
# Specify the DV and IV
SwiftData$DV <- SwiftData$AnyEgg #Edit YOURDVNAME
SwiftData$IV <- SwiftData$SourceType #Edit YOURIVNAME
# Look at the DV and IV
ggplot(SwiftData, aes(x = IV, fill = DV)) +
geom_bar(colour = "black") +
scale_fill_brewer(palette = "Paired")
# Make the crosstab table
CrossTable(
SwiftData$DV,
SwiftData$IV,
prop.chisq = FALSE,
prop.t = FALSE,
prop.r = FALSE
)
# Run the chi-squared test
options(scipen = 999)
chitestresults <- chisq.test(SwiftData$DV, SwiftData$IV)
chitestresults
The “SecondAlbum” criteria produced no hits, and the “ReleaseDate” criteria produced very few - less than 1 percent of the posts. Might the criteria need to be expanded? Here are the counts and percentages for each egg:
Characteristic | N = 168,8281 |
---|---|
aIMee | |
aIMee egg | 4,425 (2.6%) |
Other topic | 164,403 (97%) |
HighSchool | |
High School egg | 5,919 (3.5%) |
Other topic | 162,909 (96%) |
London | |
London egg | 5,617 (3.3%) |
Other topic | 163,211 (97%) |
BlackDog | |
Black Dog egg | 26,414 (16%) |
Other topic | 142,414 (84%) |
Alchemy | |
Alchemy egg | 6,711 (4.0%) |
Other topic | 162,117 (96%) |
Cassandra | |
Cassandra egg | 1,933 (1.1%) |
Other topic | 166,895 (99%) |
HateitHere | |
Hate it Here egg | 23,459 (14%) |
Other topic | 145,369 (86%) |
SecondAlbum | |
Other topic | 168,828 (100%) |
ReleaseDate | |
Other topic | 168,288 (100%) |
Release Date egg | 540 (0.3%) |
1 n (%) |
Also: With the posts categorized as having come from a “News” source or an “Individual” source, a chi-squared test indicates that “News” sources were significantly more likely mention an Easter egg than were “Individual” sources. See:
# Specify the DV and IV
SwiftData$DV <- SwiftData$AnyEgg #Edit YOURDVNAME
SwiftData$IV <- SwiftData$SourceType #Edit YOURIVNAME
# Look at the DV and IV
ggplot(SwiftData, aes(x = IV, fill = DV)) +
geom_bar(colour = "black") +
scale_fill_brewer(palette = "Paired")
# Make the crosstab table
CrossTable(
SwiftData$DV,
SwiftData$IV,
prop.chisq = FALSE,
prop.t = FALSE,
prop.r = FALSE
)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 168828
##
##
## | SwiftData$IV
## SwiftData$DV | Individual | News | Row Total |
## -------------|------------|------------|------------|
## Egg | 18947 | 19901 | 38848 |
## | 0.175 | 0.328 | |
## -------------|------------|------------|------------|
## No egg | 89132 | 40848 | 129980 |
## | 0.825 | 0.672 | |
## -------------|------------|------------|------------|
## Column Total | 108079 | 60749 | 168828 |
## | 0.640 | 0.360 | |
## -------------|------------|------------|------------|
##
##
# Run the chi-squared test
options(scipen = 999)
chitestresults <- chisq.test(SwiftData$DV, SwiftData$IV)
chitestresults
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: SwiftData$DV and SwiftData$IV
## X-squared = 5090.2, df = 1, p-value < 0.00000000000000022