# Loading Packages
library(tidyverse)
library(tidytext)
library(textdata)
library(openssl)
library(knitr)
library(ggthemr)
library(wordcloud)
library(wordcloud2)
library(rtweet)
# Loading Datasets
read.csv("all_twitter_data.csv") -> all_twitter_data
# Adding Color Themes
ggthemr('dust', type = 'outer')
set_swatch(c("#fc8d27", "#f28422", "#e97c1f", "#df741b", "#d66d18", "#cd6517", "#c45d10", "#ba550f", "#b14d0c", "#a74507", "#9e3d03"))
## The write.csv are commented out for Rmarkdown compatibility.
## This is just an example from one day that was repeated from November 6, 2022 to November 13, 2022
# Saturday November 6, 2022
# Jetblue
search_tweets("Jetblue", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> jetblue_airlines_Nov6
search_tweets("@Jetblue", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> jetblue_mentions_Nov6
subset(jetblue_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> jetblue_airlines_Nov6_reduced
subset(jetblue_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> jetblue_mentions_Nov6_reduced
jetblue_airlines_Nov6_reduced %>%
full_join(jetblue_mentions_Nov6_reduced) -> jetblue_Nov6
# write.csv(jetblue_Nov6, "jetblue_Nov6.csv", row.names = TRUE)
# American Airlines
search_tweets("American Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> AA_airlines_Nov6
search_tweets("@americanair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> AA_mentions_Nov6
subset(AA_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> AA_airlines_Nov6_reduced
subset(AA_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> AA_mentions_Nov6_reduced
AA_airlines_Nov6_reduced %>%
full_join(AA_mentions_Nov6_reduced) -> AA_Nov6
# write.csv(AA_Nov6, "AA_Nov6.csv", row.names = TRUE)
# Delta Airlines
search_tweets("Delta Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> delta_airlines_Nov6
search_tweets("@Delta", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> delta_mentions_Nov6
subset(delta_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> delta_airlines_Nov6_reduced
subset(delta_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> delta_mentions_Nov6_reduced
delta_airlines_Nov6_reduced %>%
full_join(delta_mentions_Nov6_reduced) -> delta_Nov6
# write.csv(delta_Nov6, "delta_Nov6.csv", row.names = TRUE)
# Southwest Airlines
search_tweets("Southwest Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> SW_airlines_Nov6
search_tweets("@southwestAir", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> SW_mentions_Nov6
subset(SW_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> SW_airlines_Nov6_reduced
subset(SW_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> SW_mentions_Nov6_reduced
SW_airlines_Nov6_reduced %>%
full_join(SW_mentions_Nov6_reduced) -> SW_Nov6
# write.csv(SW_Nov6, "Southwest_Nov6.csv", row.names = TRUE)
# Frontier Airlines
search_tweets("Frontier Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> frontier_airlines_Nov6
search_tweets("@flyfrontier", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> frontier_mentions_Nov6
subset(frontier_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> frontier_airlines_Nov6_reduced
subset(frontier_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> frontier_mentions_Nov6_reduced
frontier_airlines_Nov6_reduced %>%
full_join(frontier_mentions_Nov6_reduced) -> frontier_Nov6
# write.csv(frontier_Nov6, "frontier_Nov6.csv", row.names = TRUE)
# Alaska Airlines
search_tweets("Alaska Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> alaska_airlines_Nov6
search_tweets("@alaskaair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> alaska_mentions_Nov6
subset(alaska_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> alaska_airlines_Nov6_reduced
subset(alaska_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> alaska_mentions_Nov6_reduced
alaska_airlines_Nov6_reduced %>%
full_join(alaska_mentions_Nov6_reduced) -> alaska_Nov6
# write.csv(alaska_Nov6, "alaska_Nov6.csv", row.names = TRUE)
# Allegiant Airlines
search_tweets("Allegiant -stadium", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> allegiant_airlines_Nov6
search_tweets("@allegiant", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> allegiant_mentions_Nov6
subset(allegiant_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> allegiant_airlines_Nov6_reduced
subset(allegiant_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> allegiant_mentions_Nov6_reduced
allegiant_airlines_Nov6_reduced %>%
full_join(allegiant_mentions_Nov6_reduced) -> allegiant_Nov6
# write.csv(allegiant_Nov6, "Allegiant_Nov6.csv", row.names = TRUE)
# Hawaiian Airlines
search_tweets("Hawaiian Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> hawaiian_airlines_Nov6
search_tweets("@hawaiianair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> hawaiian_mentions_Nov6
subset(hawaiian_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> hawaiian_airlines_Nov6_reduced
subset(hawaiian_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> hawaiian_mentions_Nov6_reduced
hawaiian_airlines_Nov6_reduced %>%
full_join(hawaiian_mentions_Nov6_reduced) -> hawaiian_Nov6
# write.csv(hawaiian_Nov6, "Hawaiian_Nov6.csv", row.names = TRUE)
# Spirit Airlines
search_tweets("Spirit Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> spirit_airlines_Nov6
search_tweets("@SpiritAirlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> spirit_mentions_Nov6
subset(spirit_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> spirit_airlines_Nov6_reduced
subset(spirit_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> spirit_mentions_Nov6_reduced
spirit_airlines_Nov6_reduced %>%
full_join(spirit_mentions_Nov6_reduced) -> spirit_Nov6
# write.csv(spirit_Nov6, "Spirit_Nov6.csv", row.names = TRUE)
# United Airlines
search_tweets("United Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> united_airlines_Nov6
search_tweets("@united", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> united_mentions_Nov6
subset(united_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> united_airlines_Nov6_reduced
subset(united_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> united_mentions_Nov6_reduced
united_airlines_Nov6_reduced %>%
full_join(united_mentions_Nov6_reduced) -> united_Nov6
# write.csv(united_Nov6, "united_Nov6.csv", row.names = TRUE)
### ----------------------------------------------------------------------------------------------------------------------------------------------------------
### ----------------------------------------------------------------------------------------------------------------------------------------------------------
## Combining Twitter Sweep
# Jetblue
read_csv("twitter/twitter_sweep_datasets/Nov6/jetblue_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/jetblue_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/jetblue_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/jetblue_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/jetblue_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/jetblue_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/jetblue_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/jetblue_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "jetblue") -> jetblue_Nov13
jetblue_Nov6 %>%
full_join(jetblue_Nov7) %>%
full_join(jetblue_Nov8) %>%
full_join(jetblue_Nov9) %>%
full_join(jetblue_Nov10) %>%
full_join(jetblue_Nov11) %>%
full_join(jetblue_Nov12) %>%
full_join(jetblue_Nov13) -> jetblue_twitter_joined
# American
read_csv("twitter/twitter_sweep_datasets/Nov6/aa_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "american") -> aa_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/aa_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "american") -> aa_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/aa_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "american") -> aa_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/aa_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "american") -> aa_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/aa_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "american") -> aa_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/aa_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "american") -> aa_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/aa_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "american") -> aa_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/aa_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "american") -> aa_Nov13
aa_Nov6 %>%
full_join(aa_Nov7) %>%
full_join(aa_Nov8) %>%
full_join(aa_Nov9) %>%
full_join(aa_Nov10) %>%
full_join(aa_Nov11) %>%
full_join(aa_Nov12) %>%
full_join(aa_Nov13) -> aa_twitter_joined
# Delta
read_csv("twitter/twitter_sweep_datasets/Nov6/delta_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "delta") -> delta_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/delta_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "delta") -> delta_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/delta_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "delta") -> delta_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/delta_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "delta") -> delta_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/delta_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "delta") -> delta_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/delta_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "delta") -> delta_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/delta_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "delta") -> delta_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/delta_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "delta") -> delta_Nov13
delta_Nov6 %>%
full_join(delta_Nov7) %>%
full_join(delta_Nov8) %>%
full_join(delta_Nov9) %>%
full_join(delta_Nov10) %>%
full_join(delta_Nov11) %>%
full_join(delta_Nov12) %>%
full_join(delta_Nov13) -> delta_twitter_joined
# Spirit
read_csv("twitter/twitter_sweep_datasets/Nov6/spirit_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/spirit_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/spirit_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/spirit_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/spirit_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/spirit_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/spirit_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/spirit_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "spirit") -> spirit_Nov13
spirit_Nov6 %>%
full_join(spirit_Nov7) %>%
full_join(spirit_Nov8) %>%
full_join(spirit_Nov9) %>%
full_join(spirit_Nov10) %>%
full_join(spirit_Nov11) %>%
full_join(spirit_Nov12) %>%
full_join(spirit_Nov13) -> spirit_twitter_joined
# Frontier
read_csv("twitter/twitter_sweep_datasets/Nov6/frontier_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/frontier_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/frontier_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/frontier_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/frontier_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/frontier_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/frontier_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/frontier_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "frontier") -> frontier_Nov13
frontier_Nov6 %>%
full_join(frontier_Nov7) %>%
full_join(frontier_Nov8) %>%
full_join(frontier_Nov9) %>%
full_join(frontier_Nov10) %>%
full_join(frontier_Nov11) %>%
full_join(frontier_Nov12) %>%
full_join(frontier_Nov13) -> frontier_twitter_joined
# Alaska
read_csv("twitter/twitter_sweep_datasets/Nov6/alaska_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/alaska_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/alaska_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/alaska_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/alaska_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/alaska_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/alaska_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/alaska_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "alaska") -> alaska_Nov13
alaska_Nov6 %>%
full_join(alaska_Nov7) %>%
full_join(alaska_Nov8) %>%
full_join(alaska_Nov9) %>%
full_join(alaska_Nov10) %>%
full_join(alaska_Nov11) %>%
full_join(alaska_Nov12) %>%
full_join(alaska_Nov13) -> alaska_twitter_joined
# Allegiant
read_csv("twitter/twitter_sweep_datasets/Nov6/allegiant_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/allegiant_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/allegiant_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/allegiant_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/allegiant_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/allegiant_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/allegiant_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/allegiant_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "allegiant") -> allegiant_Nov13
allegiant_Nov6 %>%
full_join(allegiant_Nov7) %>%
full_join(allegiant_Nov8) %>%
full_join(allegiant_Nov9) %>%
full_join(allegiant_Nov10) %>%
full_join(allegiant_Nov11) %>%
full_join(allegiant_Nov12) %>%
full_join(allegiant_Nov13) -> allegiant_twitter_joined
# Southwest
read_csv("twitter/twitter_sweep_datasets/Nov6/southwest_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/southwest_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/southwest_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/southwest_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/southwest_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/southwest_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/southwest_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/southwest_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "southwest") -> southwest_Nov13
southwest_Nov6 %>%
full_join(southwest_Nov7) %>%
full_join(southwest_Nov8) %>%
full_join(southwest_Nov9) %>%
full_join(southwest_Nov10) %>%
full_join(southwest_Nov11) %>%
full_join(southwest_Nov12) %>%
full_join(southwest_Nov13) -> southwest_twitter_joined
# Hawaiian
read_csv("twitter/twitter_sweep_datasets/Nov6/hawaiian_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/hawaiian_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/hawaiian_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/hawaiian_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/hawaiian_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/hawaiian_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/hawaiian_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/hawaiian_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "hawaiian") -> hawaiian_Nov13
hawaiian_Nov6 %>%
full_join(hawaiian_Nov7) %>%
full_join(hawaiian_Nov8) %>%
full_join(hawaiian_Nov9) %>%
full_join(hawaiian_Nov10) %>%
full_join(hawaiian_Nov11) %>%
full_join(hawaiian_Nov12) %>%
full_join(hawaiian_Nov13) -> hawaiian_twitter_joined
# united
read_csv("twitter/twitter_sweep_datasets/Nov6/united_Nov6.csv") %>%
mutate(date = "November 6, 2022") %>%
mutate(airline = "united") -> united_Nov6
read_csv("twitter/twitter_sweep_datasets/Nov7/united_Nov7.csv") %>%
mutate(date = "November 7, 2022") %>%
mutate(airline = "united") -> united_Nov7
read_csv("twitter/twitter_sweep_datasets/Nov8/united_Nov8.csv") %>%
mutate(date = "November 8, 2022") %>%
mutate(airline = "united") -> united_Nov8
read_csv("twitter/twitter_sweep_datasets/Nov9/united_Nov9.csv") %>%
mutate(date = "November 9, 2022") %>%
mutate(airline = "united") -> united_Nov9
read_csv("twitter/twitter_sweep_datasets/Nov10/united_Nov10.csv") %>%
mutate(date = "November 10, 2022") %>%
mutate(airline = "united") -> united_Nov10
read_csv("twitter/twitter_sweep_datasets/Nov11/united_Nov11.csv") %>%
mutate(date = "November 11, 2022") %>%
mutate(airline = "united") -> united_Nov11
read_csv("twitter/twitter_sweep_datasets/Nov12/united_Nov12.csv") %>%
mutate(date = "November 12, 2022") %>%
mutate(airline = "united") -> united_Nov12
read_csv("twitter/twitter_sweep_datasets/Nov13/united_Nov13.csv") %>%
mutate(date = "November 13, 2022") %>%
mutate(airline = "united") -> united_Nov13
united_Nov6 %>%
full_join(united_Nov7) %>%
full_join(united_Nov8) %>%
full_join(united_Nov9) %>%
full_join(united_Nov10) %>%
full_join(united_Nov11) %>%
full_join(united_Nov12) %>%
full_join(united_Nov13) -> united_twitter_joined
# Combining all the twitter data
jetblue_twitter_joined %>%
full_join(alaska_twitter_joined) %>%
full_join(spirit_twitter_joined) %>%
full_join(aa_twitter_joined) %>%
full_join(hawaiian_twitter_joined) %>%
full_join(frontier_twitter_joined) %>%
full_join(united_twitter_joined) %>%
full_join(southwest_twitter_joined) %>%
full_join(delta_twitter_joined) %>%
full_join(allegiant_twitter_joined) -> all_twitter_data
# Here is where all_twitter_data is exported as "all_twitter_data.csv"
This analysis used data scraped from Twitter using the Rtweet package in R. This data was taken from twitter over a 8 day period from November 6th, 2022 to November 13th, 2022. Included in the code chunk is an example of one day of analysis for the ten domestic airlines that I worked with on this project. Once the data was all scraped it was combined together and exported as one full twitter data file which can be found here.
This wordcloud shows the words that show up the most across the analyzed tweets. Interestingly enough in a project where I search for on-time performance the most prevalent word is “delayed”. When looking at the wordcloud it is easy to see that there are a lot more negative words than positive words. While there are a few outliers such as “love” this negativity is likely the result of customers only tweeting about things when bad things happen. If things are going well customers don’t typically have motivation to react or vent, which is the opposite for if things go terribly such as a delay.
all_twitter_data %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt", "airlines", "united", "delta", "american", "jetblue", "frontier", "spirit", "approved", "broke")) %>%
inner_join(get_sentiments('afinn')) -> twitter_wordcloud
twitter_wordcloud %>%
group_by(word) %>%
count() %>%
arrange(desc(n)) %>%
wordcloud2(backgroundColor = "#faf7f2", size = 5, color = "#9e3d03")
all_twitter_data %>%
filter(airline == "american") -> american_twitter
all_twitter_data %>%
filter(airline == "jetblue") -> jetblue_twitter
all_twitter_data %>%
filter(airline == "spirit") -> spirit_twitter
all_twitter_data %>%
filter(airline == "frontier") -> frontier_twitter
all_twitter_data %>%
filter(airline == "united") -> united_twitter
all_twitter_data %>%
filter(airline == "alaska") -> alaska_twitter
all_twitter_data %>%
filter(airline == "hawaiian") -> hawaiian_twitter
all_twitter_data %>%
filter(airline == "allegiant") -> allegiant_twitter
all_twitter_data %>%
filter(airline == "southwest") -> southwest_twitter
all_twitter_data %>%
filter(airline == "delta") -> delta_twitter
## ---------------------------------------------------------------------------------------
# Filtering the data for sentiments by airline
jetblue_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> jetblue_filtered
american_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> american_filtered
delta_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> delta_filtered
united_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> united_filtered
hawaiian_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> hawaiian_filtered
spirit_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> spirit_filtered
allegiant_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> allegiant_filtered
frontier_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> frontier_filtered
southwest_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> southwest_filtered
alaska_twitter %>%
unnest_tokens(word, text) %>%
anti_join(stop_words) %>%
filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>%
inner_join(get_sentiments('afinn')) -> alaska_filtered
# -----------------------------------------------------------------------------------
# Finding Individual average sentiment of words by airline
mean(jetblue_filtered$value, na.rm = TRUE) -> jetblue_sentiment_avg
mean(alaska_filtered$value, na.rm = TRUE) -> alaska_sentiment_avg
mean(spirit_filtered$value, na.rm = TRUE) -> spirit_sentiment_avg
mean(hawaiian_filtered$value, na.rm = TRUE) -> hawaiian_sentiment_avg
mean(allegiant_filtered$value, na.rm = TRUE) -> allegiant_sentiment_avg
mean(frontier_filtered$value, na.rm = TRUE) -> frontier_sentiment_avg
mean(american_filtered$value, na.rm = TRUE) -> american_sentiment_avg
mean(united_filtered$value, na.rm = TRUE) -> united_sentiment_avg
mean(southwest_filtered$value, na.rm = TRUE) -> southwest_sentiment_avg
mean(delta_filtered$value, na.rm = TRUE) ->delta_sentiment_avg
# Creating new dataframe based on averages
sentiment_avg_numbers <- c(jetblue_sentiment_avg, alaska_sentiment_avg, spirit_sentiment_avg, hawaiian_sentiment_avg, allegiant_sentiment_avg,
frontier_sentiment_avg, american_sentiment_avg, united_sentiment_avg, southwest_sentiment_avg, delta_sentiment_avg)
names <- c('jetblue', 'alaska', 'spirit', 'hawaiian', 'allegiant', 'frontier', 'american', 'united', 'southwest', 'delta')
sentiment_avg <- data.frame(names, sentiment_avg_numbers)
sentiment_avg %>%
arrange(desc(sentiment_avg_numbers)) -> arranged_sentiment_avg
This plot shoes the average sentiment of each tweet by airline. There is a very large range with Frontier having a very negative average sentiment and Alaska having a very positive. When comparing this to other parts of this project of on-time performance and market share there does not seem to be any correlation with twitter sentiment. The perfect example of this are Delta and Allegiant which are both in the middle range of sentiment while they are drastically different in on-time performance and market share.
#Plotting the average sentiment by airline
ggplot(arranged_sentiment_avg, aes(reorder(names, sentiment_avg_numbers), sentiment_avg_numbers, fill = names)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Average Sentiment") + labs(title = "Average Sentiment by Airline") + theme(legend.position = "none")
# Table of average sentiment by airline
kable(arranged_sentiment_avg)
| names | sentiment_avg_numbers |
|---|---|
| alaska | 0.6922380 |
| hawaiian | 0.4797765 |
| united | 0.4084426 |
| southwest | 0.2475039 |
| delta | -0.0350613 |
| american | -0.1373984 |
| allegiant | -0.1461276 |
| spirit | -0.1571919 |
| jetblue | -0.2577367 |
| frontier | -0.5826468 |
This plot shows the delay words per tweet for each of the airlines. Words that were considered delay words are “delay”, “delayed”, “late”, “wait”, “long”, “hold up” and “forever”. This again seems to have no correlation with other parts of this project as Jetblue and Delta have the highest delay words per tweets while that is not representative of the on-time analysis.
# Separation of Delay Words
delay_words <- c("late", "delay", "wait", "long", "hold up", "wait", "forever", "delayed")
# Looking at the number of delay words by airline
all_twitter_data %>%
group_by(airline) %>%
unnest_tokens(word, text) %>%
filter(word %in% delay_words) %>%
count(word, sort = TRUE) -> all_twitter_data_sorted
# Finding delay words count by airline
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "jetblue"), 3]) -> delta_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "alaska"), 3]) -> alaska_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "spirit"), 3]) -> spirit_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "hawaiian"), 3]) -> hawaiian_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "allegiant"), 3]) -> allegiant_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "frontier"), 3]) -> frontier_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "american"), 3]) -> american_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "united"), 3]) -> united_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "southwest"), 3]) -> southwest_delaywords_count
sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "delta"), 3]) -> delta_delaywords_count
# Creating dataframe with delay words count and airlines
delaywords_count <- c(delta_delaywords_count, alaska_delaywords_count, spirit_delaywords_count, hawaiian_delaywords_count, allegiant_delaywords_count,
frontier_delaywords_count, american_delaywords_count, united_delaywords_count, southwest_delaywords_count, delta_delaywords_count)
airline <- c('jetblue', 'alaska', 'spirit', 'hawaiian', 'allegiant', 'frontier', 'american', 'united', 'southwest', 'delta')
delaywords_count_df <- data.frame(airline, delaywords_count)
delaywords_count_df %>%
arrange(desc(delaywords_count)) -> arranged_delaywords_count
# Finding Total Number of Tweets Per Airline
all_twitter_data %>%
group_by(airline) %>%
count(airline) %>%
arrange(desc(n)) -> tweets_count_airlines
# Combining the delay words and total count together
delaywords_count_df %>%
full_join(tweets_count_airlines, by = 'airline') -> expanded_delaywords_df
colnames(expanded_delaywords_df)[3] = "total_tweets"
# Adding the delay words per tweet column
expanded_delaywords_df$delaywords_per_tweet <- (expanded_delaywords_df$delaywords_count / expanded_delaywords_df$total_tweets)
#Plotting the average delay words per tweet
ggplot(expanded_delaywords_df, aes(reorder(airline, delaywords_per_tweet), delaywords_per_tweet, fill = airline)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Delay Words per Tweet") + labs(title = "Delay Words per Tweet by Airline") + theme(legend.position = "none")
#Table with all the info
kable(expanded_delaywords_df)
| airline | delaywords_count | total_tweets | delaywords_per_tweet |
|---|---|---|---|
| jetblue | 8972 | 36676 | 0.2446286 |
| alaska | 347 | 11524 | 0.0301111 |
| spirit | 1284 | 23503 | 0.0546313 |
| hawaiian | 258 | 5526 | 0.0466884 |
| allegiant | 218 | 5505 | 0.0396004 |
| frontier | 749 | 12594 | 0.0594728 |
| american | 7746 | 112043 | 0.0691342 |
| united | 1292 | 112609 | 0.0114733 |
| southwest | 1028 | 45371 | 0.0226576 |
| delta | 8972 | 72911 | 0.1230541 |
This twitter analysis has shown consistently that it does not match up with the findings of the rest of this project. This can be for a few reasons including that twitter data can be all over the place and unreliable in analysis. With that said, airlines that work very hard to have the best on-time performance should take this analysis into consideration and adapt their social media plan accordingly. Twitter has the power to change the consumers perception of a company and for an airline who has the best on-time performance like Delta, should try to get that message out there and mitigate any negativity directed towards them.
To return to the home page click here.