# Loading Packages
library(tidyverse)
library(tidytext)
library(textdata)
library(openssl)
library(knitr)
library(ggthemr)
library(wordcloud)
library(wordcloud2)
library(rtweet)

# Loading Datasets
read.csv("all_twitter_data.csv") -> all_twitter_data

# Adding Color Themes
ggthemr('dust', type = 'outer')
set_swatch(c("#fc8d27", "#f28422", "#e97c1f", "#df741b", "#d66d18", "#cd6517", "#c45d10", "#ba550f", "#b14d0c", "#a74507", "#9e3d03"))
## The write.csv are commented out for Rmarkdown compatibility. 
## This is just an example from one day that was repeated from November 6, 2022 to November 13, 2022

# Saturday November 6, 2022
# Jetblue
search_tweets("Jetblue", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> jetblue_airlines_Nov6
search_tweets("@Jetblue", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> jetblue_mentions_Nov6

subset(jetblue_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> jetblue_airlines_Nov6_reduced
subset(jetblue_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> jetblue_mentions_Nov6_reduced

jetblue_airlines_Nov6_reduced %>% 
  full_join(jetblue_mentions_Nov6_reduced) -> jetblue_Nov6

# write.csv(jetblue_Nov6, "jetblue_Nov6.csv", row.names = TRUE)

# American Airlines
search_tweets("American Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> AA_airlines_Nov6
search_tweets("@americanair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> AA_mentions_Nov6

subset(AA_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> AA_airlines_Nov6_reduced
subset(AA_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> AA_mentions_Nov6_reduced

AA_airlines_Nov6_reduced %>% 
  full_join(AA_mentions_Nov6_reduced) -> AA_Nov6

# write.csv(AA_Nov6, "AA_Nov6.csv", row.names = TRUE)

# Delta Airlines
search_tweets("Delta Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> delta_airlines_Nov6
search_tweets("@Delta", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> delta_mentions_Nov6

subset(delta_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> delta_airlines_Nov6_reduced
subset(delta_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo)) -> delta_mentions_Nov6_reduced

delta_airlines_Nov6_reduced %>% 
  full_join(delta_mentions_Nov6_reduced) -> delta_Nov6

# write.csv(delta_Nov6, "delta_Nov6.csv", row.names = TRUE)

# Southwest Airlines
search_tweets("Southwest Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> SW_airlines_Nov6
search_tweets("@southwestAir", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> SW_mentions_Nov6

subset(SW_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> SW_airlines_Nov6_reduced
subset(SW_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> SW_mentions_Nov6_reduced

SW_airlines_Nov6_reduced %>% 
  full_join(SW_mentions_Nov6_reduced) -> SW_Nov6

# write.csv(SW_Nov6, "Southwest_Nov6.csv", row.names = TRUE)

# Frontier Airlines
search_tweets("Frontier Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> frontier_airlines_Nov6
search_tweets("@flyfrontier", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> frontier_mentions_Nov6

subset(frontier_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> frontier_airlines_Nov6_reduced
subset(frontier_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> frontier_mentions_Nov6_reduced

frontier_airlines_Nov6_reduced %>% 
  full_join(frontier_mentions_Nov6_reduced) -> frontier_Nov6

# write.csv(frontier_Nov6, "frontier_Nov6.csv", row.names = TRUE)

# Alaska Airlines
search_tweets("Alaska Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> alaska_airlines_Nov6
search_tweets("@alaskaair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> alaska_mentions_Nov6

subset(alaska_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> alaska_airlines_Nov6_reduced
subset(alaska_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> alaska_mentions_Nov6_reduced

alaska_airlines_Nov6_reduced %>% 
  full_join(alaska_mentions_Nov6_reduced) -> alaska_Nov6

# write.csv(alaska_Nov6, "alaska_Nov6.csv", row.names = TRUE)

# Allegiant Airlines
search_tweets("Allegiant -stadium", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> allegiant_airlines_Nov6
search_tweets("@allegiant", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> allegiant_mentions_Nov6

subset(allegiant_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> allegiant_airlines_Nov6_reduced
subset(allegiant_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> allegiant_mentions_Nov6_reduced

allegiant_airlines_Nov6_reduced %>% 
  full_join(allegiant_mentions_Nov6_reduced) -> allegiant_Nov6

# write.csv(allegiant_Nov6, "Allegiant_Nov6.csv", row.names = TRUE)

# Hawaiian Airlines
search_tweets("Hawaiian Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> hawaiian_airlines_Nov6
search_tweets("@hawaiianair", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> hawaiian_mentions_Nov6

subset(hawaiian_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> hawaiian_airlines_Nov6_reduced
subset(hawaiian_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> hawaiian_mentions_Nov6_reduced

hawaiian_airlines_Nov6_reduced %>% 
  full_join(hawaiian_mentions_Nov6_reduced) -> hawaiian_Nov6

# write.csv(hawaiian_Nov6, "Hawaiian_Nov6.csv", row.names = TRUE)

# Spirit Airlines
search_tweets("Spirit Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> spirit_airlines_Nov6
search_tweets("@SpiritAirlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> spirit_mentions_Nov6

subset(spirit_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> spirit_airlines_Nov6_reduced
subset(spirit_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> spirit_mentions_Nov6_reduced

spirit_airlines_Nov6_reduced %>% 
  full_join(spirit_mentions_Nov6_reduced) -> spirit_Nov6

# write.csv(spirit_Nov6, "Spirit_Nov6.csv", row.names = TRUE)

# United Airlines
search_tweets("United Airlines", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> united_airlines_Nov6
search_tweets("@united", token = token, n=18000, retryonratelimit = TRUE, lang="en") -> united_mentions_Nov6

subset(united_airlines_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> united_airlines_Nov6_reduced
subset(united_mentions_Nov6, select = c(-metadata, -possibly_sensitive, -quoted_status, -coordinates, -place, -retweeted_status, -quoted_status, -entities, -geo, -withheld_in_countries)) -> united_mentions_Nov6_reduced

united_airlines_Nov6_reduced %>% 
  full_join(united_mentions_Nov6_reduced) -> united_Nov6

# write.csv(united_Nov6, "united_Nov6.csv", row.names = TRUE)



### ----------------------------------------------------------------------------------------------------------------------------------------------------------
### ----------------------------------------------------------------------------------------------------------------------------------------------------------
## Combining Twitter Sweep

# Jetblue
read_csv("twitter/twitter_sweep_datasets/Nov6/jetblue_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/jetblue_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/jetblue_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/jetblue_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/jetblue_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/jetblue_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/jetblue_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/jetblue_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "jetblue") -> jetblue_Nov13

jetblue_Nov6 %>% 
  full_join(jetblue_Nov7) %>% 
  full_join(jetblue_Nov8) %>% 
  full_join(jetblue_Nov9) %>% 
  full_join(jetblue_Nov10) %>% 
  full_join(jetblue_Nov11) %>% 
  full_join(jetblue_Nov12) %>% 
  full_join(jetblue_Nov13) -> jetblue_twitter_joined
  
# American
read_csv("twitter/twitter_sweep_datasets/Nov6/aa_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "american") -> aa_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/aa_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "american") -> aa_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/aa_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "american") -> aa_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/aa_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "american") -> aa_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/aa_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "american") -> aa_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/aa_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "american") -> aa_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/aa_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "american") -> aa_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/aa_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "american") -> aa_Nov13

aa_Nov6 %>% 
  full_join(aa_Nov7) %>% 
  full_join(aa_Nov8) %>% 
  full_join(aa_Nov9) %>% 
  full_join(aa_Nov10) %>% 
  full_join(aa_Nov11) %>% 
  full_join(aa_Nov12) %>% 
  full_join(aa_Nov13) -> aa_twitter_joined


# Delta
read_csv("twitter/twitter_sweep_datasets/Nov6/delta_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/delta_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/delta_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/delta_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/delta_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/delta_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/delta_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/delta_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "delta") -> delta_Nov13

delta_Nov6 %>% 
  full_join(delta_Nov7) %>% 
  full_join(delta_Nov8) %>% 
  full_join(delta_Nov9) %>% 
  full_join(delta_Nov10) %>% 
  full_join(delta_Nov11) %>% 
  full_join(delta_Nov12) %>% 
  full_join(delta_Nov13) -> delta_twitter_joined


# Spirit
read_csv("twitter/twitter_sweep_datasets/Nov6/spirit_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/spirit_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/spirit_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/spirit_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/spirit_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/spirit_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/spirit_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/spirit_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "spirit") -> spirit_Nov13

spirit_Nov6 %>% 
  full_join(spirit_Nov7) %>% 
  full_join(spirit_Nov8) %>% 
  full_join(spirit_Nov9) %>% 
  full_join(spirit_Nov10) %>% 
  full_join(spirit_Nov11) %>% 
  full_join(spirit_Nov12) %>% 
  full_join(spirit_Nov13) -> spirit_twitter_joined


# Frontier
read_csv("twitter/twitter_sweep_datasets/Nov6/frontier_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/frontier_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/frontier_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/frontier_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/frontier_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/frontier_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/frontier_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/frontier_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "frontier") -> frontier_Nov13

frontier_Nov6 %>% 
  full_join(frontier_Nov7) %>% 
  full_join(frontier_Nov8) %>% 
  full_join(frontier_Nov9) %>% 
  full_join(frontier_Nov10) %>% 
  full_join(frontier_Nov11) %>% 
  full_join(frontier_Nov12) %>% 
  full_join(frontier_Nov13) -> frontier_twitter_joined


# Alaska
read_csv("twitter/twitter_sweep_datasets/Nov6/alaska_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/alaska_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/alaska_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/alaska_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/alaska_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/alaska_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/alaska_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/alaska_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "alaska") -> alaska_Nov13

alaska_Nov6 %>% 
  full_join(alaska_Nov7) %>% 
  full_join(alaska_Nov8) %>% 
  full_join(alaska_Nov9) %>% 
  full_join(alaska_Nov10) %>% 
  full_join(alaska_Nov11) %>% 
  full_join(alaska_Nov12) %>% 
  full_join(alaska_Nov13) -> alaska_twitter_joined



# Allegiant
read_csv("twitter/twitter_sweep_datasets/Nov6/allegiant_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/allegiant_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/allegiant_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/allegiant_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/allegiant_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/allegiant_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/allegiant_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/allegiant_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "allegiant") -> allegiant_Nov13

allegiant_Nov6 %>% 
  full_join(allegiant_Nov7) %>% 
  full_join(allegiant_Nov8) %>% 
  full_join(allegiant_Nov9) %>% 
  full_join(allegiant_Nov10) %>% 
  full_join(allegiant_Nov11) %>% 
  full_join(allegiant_Nov12) %>% 
  full_join(allegiant_Nov13) -> allegiant_twitter_joined

# Southwest
read_csv("twitter/twitter_sweep_datasets/Nov6/southwest_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/southwest_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/southwest_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/southwest_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/southwest_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/southwest_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/southwest_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/southwest_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "southwest") -> southwest_Nov13

southwest_Nov6 %>% 
  full_join(southwest_Nov7) %>% 
  full_join(southwest_Nov8) %>% 
  full_join(southwest_Nov9) %>% 
  full_join(southwest_Nov10) %>% 
  full_join(southwest_Nov11) %>% 
  full_join(southwest_Nov12) %>% 
  full_join(southwest_Nov13) -> southwest_twitter_joined


# Hawaiian
read_csv("twitter/twitter_sweep_datasets/Nov6/hawaiian_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/hawaiian_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/hawaiian_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/hawaiian_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/hawaiian_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/hawaiian_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/hawaiian_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/hawaiian_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "hawaiian") -> hawaiian_Nov13

hawaiian_Nov6 %>% 
  full_join(hawaiian_Nov7) %>% 
  full_join(hawaiian_Nov8) %>% 
  full_join(hawaiian_Nov9) %>% 
  full_join(hawaiian_Nov10) %>% 
  full_join(hawaiian_Nov11) %>% 
  full_join(hawaiian_Nov12) %>% 
  full_join(hawaiian_Nov13) -> hawaiian_twitter_joined


# united
read_csv("twitter/twitter_sweep_datasets/Nov6/united_Nov6.csv") %>% 
  mutate(date = "November 6, 2022") %>% 
  mutate(airline = "united") -> united_Nov6

read_csv("twitter/twitter_sweep_datasets/Nov7/united_Nov7.csv") %>% 
  mutate(date = "November 7, 2022") %>% 
  mutate(airline = "united") -> united_Nov7

read_csv("twitter/twitter_sweep_datasets/Nov8/united_Nov8.csv") %>% 
  mutate(date = "November 8, 2022") %>% 
  mutate(airline = "united") -> united_Nov8

read_csv("twitter/twitter_sweep_datasets/Nov9/united_Nov9.csv") %>% 
  mutate(date = "November 9, 2022") %>% 
  mutate(airline = "united") -> united_Nov9

read_csv("twitter/twitter_sweep_datasets/Nov10/united_Nov10.csv") %>% 
  mutate(date = "November 10, 2022") %>% 
  mutate(airline = "united") -> united_Nov10

read_csv("twitter/twitter_sweep_datasets/Nov11/united_Nov11.csv") %>% 
  mutate(date = "November 11, 2022") %>% 
  mutate(airline = "united") -> united_Nov11

read_csv("twitter/twitter_sweep_datasets/Nov12/united_Nov12.csv") %>% 
  mutate(date = "November 12, 2022") %>% 
  mutate(airline = "united") -> united_Nov12

read_csv("twitter/twitter_sweep_datasets/Nov13/united_Nov13.csv") %>% 
  mutate(date = "November 13, 2022") %>% 
  mutate(airline = "united") -> united_Nov13

united_Nov6 %>% 
  full_join(united_Nov7) %>% 
  full_join(united_Nov8) %>% 
  full_join(united_Nov9) %>% 
  full_join(united_Nov10) %>% 
  full_join(united_Nov11) %>% 
  full_join(united_Nov12) %>% 
  full_join(united_Nov13) -> united_twitter_joined

# Combining all the twitter data

jetblue_twitter_joined %>% 
  full_join(alaska_twitter_joined) %>% 
  full_join(spirit_twitter_joined) %>% 
  full_join(aa_twitter_joined) %>% 
  full_join(hawaiian_twitter_joined) %>% 
  full_join(frontier_twitter_joined) %>% 
  full_join(united_twitter_joined) %>% 
  full_join(southwest_twitter_joined) %>% 
  full_join(delta_twitter_joined) %>% 
  full_join(allegiant_twitter_joined) -> all_twitter_data

# Here is where all_twitter_data is exported as "all_twitter_data.csv" 

This analysis used data scraped from Twitter using the Rtweet package in R. This data was taken from twitter over a 8 day period from November 6th, 2022 to November 13th, 2022. Included in the code chunk is an example of one day of analysis for the ten domestic airlines that I worked with on this project. Once the data was all scraped it was combined together and exported as one full twitter data file which can be found here.

Wordcloud

This wordcloud shows the words that show up the most across the analyzed tweets. Interestingly enough in a project where I search for on-time performance the most prevalent word is “delayed”. When looking at the wordcloud it is easy to see that there are a lot more negative words than positive words. While there are a few outliers such as “love” this negativity is likely the result of customers only tweeting about things when bad things happen. If things are going well customers don’t typically have motivation to react or vent, which is the opposite for if things go terribly such as a delay.

all_twitter_data %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt", "airlines", "united", "delta", "american", "jetblue", "frontier", "spirit", "approved", "broke")) %>% 
  inner_join(get_sentiments('afinn')) -> twitter_wordcloud

twitter_wordcloud %>% 
  group_by(word) %>% 
  count() %>% 
  arrange(desc(n)) %>% 
  wordcloud2(backgroundColor = "#faf7f2", size = 5, color = "#9e3d03")
all_twitter_data %>% 
  filter(airline == "american") -> american_twitter

all_twitter_data %>% 
  filter(airline == "jetblue") -> jetblue_twitter

all_twitter_data %>% 
  filter(airline == "spirit") -> spirit_twitter

all_twitter_data %>% 
  filter(airline == "frontier") -> frontier_twitter

all_twitter_data %>% 
  filter(airline == "united") -> united_twitter

all_twitter_data %>% 
  filter(airline == "alaska") -> alaska_twitter

all_twitter_data %>% 
  filter(airline == "hawaiian") -> hawaiian_twitter

all_twitter_data %>% 
  filter(airline == "allegiant") -> allegiant_twitter

all_twitter_data %>% 
  filter(airline == "southwest") -> southwest_twitter

all_twitter_data %>% 
  filter(airline == "delta") -> delta_twitter


## ---------------------------------------------------------------------------------------
# Filtering the data for sentiments by airline
jetblue_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> jetblue_filtered

american_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> american_filtered

delta_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> delta_filtered

united_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> united_filtered

hawaiian_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> hawaiian_filtered

spirit_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> spirit_filtered

allegiant_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> allegiant_filtered

frontier_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> frontier_filtered

southwest_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> southwest_filtered

alaska_twitter %>% 
  unnest_tokens(word, text) %>% 
  anti_join(stop_words) %>% 
  filter(!word %in% c("t.co", "https", "de", "el", "la", "rt")) %>% 
  inner_join(get_sentiments('afinn')) -> alaska_filtered

# -----------------------------------------------------------------------------------

# Finding Individual average sentiment of words by airline
 mean(jetblue_filtered$value, na.rm = TRUE) -> jetblue_sentiment_avg
 mean(alaska_filtered$value, na.rm = TRUE) -> alaska_sentiment_avg
 mean(spirit_filtered$value, na.rm = TRUE) -> spirit_sentiment_avg
 mean(hawaiian_filtered$value, na.rm = TRUE) -> hawaiian_sentiment_avg
 mean(allegiant_filtered$value, na.rm = TRUE) -> allegiant_sentiment_avg
 mean(frontier_filtered$value, na.rm = TRUE) -> frontier_sentiment_avg
 mean(american_filtered$value, na.rm = TRUE) -> american_sentiment_avg
 mean(united_filtered$value, na.rm = TRUE) -> united_sentiment_avg
 mean(southwest_filtered$value, na.rm = TRUE) -> southwest_sentiment_avg
 mean(delta_filtered$value, na.rm = TRUE) ->delta_sentiment_avg
 
 # Creating new dataframe based on averages
sentiment_avg_numbers <- c(jetblue_sentiment_avg, alaska_sentiment_avg, spirit_sentiment_avg, hawaiian_sentiment_avg, allegiant_sentiment_avg, 
                   frontier_sentiment_avg, american_sentiment_avg, united_sentiment_avg, southwest_sentiment_avg, delta_sentiment_avg)
 
names <- c('jetblue', 'alaska', 'spirit', 'hawaiian', 'allegiant', 'frontier', 'american', 'united', 'southwest', 'delta')

sentiment_avg <- data.frame(names, sentiment_avg_numbers)

sentiment_avg %>% 
  arrange(desc(sentiment_avg_numbers)) -> arranged_sentiment_avg


Average Sentiment of Tweets

This plot shoes the average sentiment of each tweet by airline. There is a very large range with Frontier having a very negative average sentiment and Alaska having a very positive. When comparing this to other parts of this project of on-time performance and market share there does not seem to be any correlation with twitter sentiment. The perfect example of this are Delta and Allegiant which are both in the middle range of sentiment while they are drastically different in on-time performance and market share.

#Plotting the average sentiment by airline
ggplot(arranged_sentiment_avg, aes(reorder(names, sentiment_avg_numbers), sentiment_avg_numbers, fill = names)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Average Sentiment") + labs(title = "Average Sentiment by Airline") + theme(legend.position = "none")
# Table of average sentiment by airline
kable(arranged_sentiment_avg)
names sentiment_avg_numbers
alaska 0.6922380
hawaiian 0.4797765
united 0.4084426
southwest 0.2475039
delta -0.0350613
american -0.1373984
allegiant -0.1461276
spirit -0.1571919
jetblue -0.2577367
frontier -0.5826468


Delay Words Analysis

This plot shows the delay words per tweet for each of the airlines. Words that were considered delay words are “delay”, “delayed”, “late”, “wait”, “long”, “hold up” and “forever”. This again seems to have no correlation with other parts of this project as Jetblue and Delta have the highest delay words per tweets while that is not representative of the on-time analysis.

# Separation of Delay Words
delay_words <- c("late", "delay", "wait", "long", "hold up", "wait", "forever", "delayed")

# Looking at the number of delay words by airline 
all_twitter_data %>% 
  group_by(airline) %>% 
  unnest_tokens(word, text) %>% 
  filter(word %in% delay_words) %>% 
  count(word, sort = TRUE) -> all_twitter_data_sorted

# Finding delay words count by airline
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "jetblue"), 3]) -> delta_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "alaska"), 3]) -> alaska_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "spirit"), 3]) -> spirit_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "hawaiian"), 3]) -> hawaiian_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "allegiant"), 3]) -> allegiant_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "frontier"), 3]) -> frontier_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "american"), 3]) -> american_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "united"), 3]) -> united_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "southwest"), 3]) -> southwest_delaywords_count
  sum(all_twitter_data_sorted[which(all_twitter_data_sorted$airline == "delta"), 3]) -> delta_delaywords_count
  
# Creating dataframe with delay words count and airlines
delaywords_count <- c(delta_delaywords_count, alaska_delaywords_count, spirit_delaywords_count, hawaiian_delaywords_count, allegiant_delaywords_count, 
                             frontier_delaywords_count, american_delaywords_count, united_delaywords_count, southwest_delaywords_count, delta_delaywords_count)
  
airline <- c('jetblue', 'alaska', 'spirit', 'hawaiian', 'allegiant', 'frontier', 'american', 'united', 'southwest', 'delta')
  
delaywords_count_df <- data.frame(airline, delaywords_count)

delaywords_count_df %>% 
  arrange(desc(delaywords_count)) -> arranged_delaywords_count

# Finding Total Number of Tweets Per Airline
all_twitter_data %>% 
  group_by(airline) %>% 
  count(airline) %>% 
  arrange(desc(n)) -> tweets_count_airlines
  
# Combining the delay words and total count together
delaywords_count_df %>% 
  full_join(tweets_count_airlines, by = 'airline') -> expanded_delaywords_df

colnames(expanded_delaywords_df)[3] = "total_tweets"

# Adding the delay words per tweet column
expanded_delaywords_df$delaywords_per_tweet <- (expanded_delaywords_df$delaywords_count / expanded_delaywords_df$total_tweets)


#Plotting the average delay words per tweet 
ggplot(expanded_delaywords_df, aes(reorder(airline, delaywords_per_tweet), delaywords_per_tweet, fill = airline)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Delay Words per Tweet") + labs(title = "Delay Words per Tweet by Airline") + theme(legend.position = "none")

#Table with all the info
kable(expanded_delaywords_df)
airline delaywords_count total_tweets delaywords_per_tweet
jetblue 8972 36676 0.2446286
alaska 347 11524 0.0301111
spirit 1284 23503 0.0546313
hawaiian 258 5526 0.0466884
allegiant 218 5505 0.0396004
frontier 749 12594 0.0594728
american 7746 112043 0.0691342
united 1292 112609 0.0114733
southwest 1028 45371 0.0226576
delta 8972 72911 0.1230541


Conclusion of Twitter Analysis

This twitter analysis has shown consistently that it does not match up with the findings of the rest of this project. This can be for a few reasons including that twitter data can be all over the place and unreliable in analysis. With that said, airlines that work very hard to have the best on-time performance should take this analysis into consideration and adapt their social media plan accordingly. Twitter has the power to change the consumers perception of a company and for an airline who has the best on-time performance like Delta, should try to get that message out there and mitigate any negativity directed towards them.

To return to the home page click here.