library(httr)
library(httpuv)
library(twitteR)
library(tidytext)
library(stringr)
library(dplyr)
library(textdata)
library(rtweet)
app <-"Keenan_Hendricks"
api_key <-"nJj5STJc0NlfXoCQjynwFZkiW"
api_secret_key <- "mQHmyLzIVvsg2cTpZloewcQ3SbwFxBoYCQCG3eqMbgGMndjZL1"
access_token <-"519122175-TCmSSEl4RockwUxyQLHGTOEaKedmJ0ckui46oVnM"
access_secret <-"feSNIp0pruUGJcZGadUkqqfKpWIogiHpByMfSFCAG6pwB"
my_token <- create_token(app = app, consumer_key=api_key, consumer_secret = api_secret_key, access_token= access_token, access_secret= access_secret)

Covid-19

What better data to analyze then the current global pandemic we are all dealing with. I investigated the sentiments attached to hashtagging Covid-19. As you can see below surprisingly the positive sentiment is leading the way, however it is followed by negativity, trust, fear, anticipation, and sadness. The ones to follow positivity you can suspect being there as this is unknown territory and everyone has the right to feel scared.

cv <- search_tweets('#covid19', n = num_tweets, include_rts = FALSE)
## 
Downloading [=======>---------------------------------]  20%
Downloading [===========>-----------------------------]  30%
Downloading [===============>-------------------------]  40%
Downloading [===================>---------------------]  50%
Downloading [========================>----------------]  60%
Downloading [============================>------------]  70%
Downloading [================================>--------]  80%
Downloading [====================================>----]  90%
Downloading [=========================================] 100%
cv_platform <- cv %>% group_by(source) %>%
                summarize(n = n()) %>%
                mutate(percent_of_tweets = n/sum(n)) %>%
                arrange(desc(n))

covid_words <- cv %>% select(status_id, text) %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

nrc <- get_sentiments("nrc") %>%
  select(word, sentiment)

covid_words_sentiments <- covid_words %>% inner_join(nrc, by = "word")

covid_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
##    sentiment        n
##    <chr>        <int>
##  1 positive       745
##  2 negative       539
##  3 trust          489
##  4 fear           390
##  5 anticipation   373
##  6 sadness        282
##  7 joy            233
##  8 anger          225
##  9 disgust        142
## 10 surprise       141

Trump

With connection to the pandemic created by Covid-19, Presidet Donald Trump has been exploding the media with news and statements. I wanted to investigate the sentiment attached to hashtagging Trump throughout the Twitter database. Down below you can view the results.

tr <- search_tweets('#Trump', n = num_tweets,include_rts = FALSE)
## 
Downloading [=======>---------------------------------]  20%
Downloading [===========>-----------------------------]  30%
Downloading [===============>-------------------------]  40%
Downloading [===================>---------------------]  50%
Downloading [========================>----------------]  60%
Downloading [============================>------------]  70%
Downloading [================================>--------]  80%
Downloading [====================================>----]  90%
Downloading [=========================================] 100%
tr_platform <- tr %>% group_by(source) %>%
                summarize(n = n()) %>%
                mutate(percent_of_tweets = n / sum(n)) %>%
                arrange(desc(n))


tr_words <- tr %>% select(status_id, text) %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

tr_words_sentiments <- tr_words %>% inner_join(nrc, by = "word")
tr_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
##    sentiment        n
##    <chr>        <int>
##  1 negative       751
##  2 positive       721
##  3 trust          519
##  4 fear           402
##  5 sadness        363
##  6 anger          338
##  7 anticipation   311
##  8 surprise       293
##  9 disgust        286
## 10 joy            245

China

China, the origin of Covid-19, has been receiving some serious hate from all over globe for potentially creating this madness. I investigated the sentimate attached to hashtagging China throughout the Twitter databases. Below you can view the results.

ch <- search_tweets('#China', n = num_tweets, include_rts = FALSE)
## 
Downloading [=======>---------------------------------]  20%
Downloading [===========>-----------------------------]  30%
Downloading [===============>-------------------------]  40%
Downloading [===================>---------------------]  50%
Downloading [========================>----------------]  60%
Downloading [============================>------------]  70%
Downloading [================================>--------]  80%
Downloading [====================================>----]  90%
Downloading [=========================================] 100%
ch_platform <- ch %>% group_by(source) %>%
                summarize(n = n()) %>%
                mutate(percent_of_tweets = n / sum(n)) %>%
                arrange(desc(n))


ch_words <- ch %>% select(status_id, text) %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

ch_words_sentiments <- ch_words %>% inner_join(nrc, by = "word")

ch_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
##    sentiment        n
##    <chr>        <int>
##  1 positive       792
##  2 negative       780
##  3 trust          591
##  4 fear           536
##  5 anticipation   402
##  6 anger          361
##  7 sadness        344
##  8 disgust        268
##  9 joy            211
## 10 surprise       157

Correlation Between the Three

Finally, I thought it would be great to compare and contrast each segment of sentiment across the three hashtags: Covid-19, Trump, and China. These three have been receiving some serious publicity and use all over Twitter so it was interesting to compare how people feel emotionally when posting about one of the three. Oddly enough, Covid-19 is receiving the highest amount of positivity, while also the lowest amount of negativity. On the other hand, China leads the way for negativity sentiment followed by Trump, and they are both about equal for positvity percentages. Another shocking find, was that more people had an emotion of fear attached to China then they did Covid-19, and also seemed to have a more sense of trust when hashtagging Covid-19 then China.

cv_platform$state <- "covid19"
tr_platform$state <- "Trump"
ch_platform$state <- "China"
covid_words_sentiments$state <- "covid19"
tr_words_sentiments$state <-"Trump"
ch_words_sentiments$state <- "China"
cvtrch_Bind <- rbind(cv_platform, tr_platform, ch_platform)
words_sentiments <- rbind(covid_words_sentiments, tr_words_sentiments, ch_words_sentiments)

sent_df <- words_sentiments2 %>%
  group_by(state, sentiment) %>%
  summarize(n = n()) %>%
  mutate(frequency = n/sum(n))

cvtrch_plot <- ggplot(sent_df, aes(x = sentiment, y = frequency, fill = state)) +
  geom_bar(stat = "identity", position = "dodge") +
  xlab("Sentiment") +
  ylab("Percent of tweets") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
cvtrch_plot