library(httr)
library(httpuv)
library(twitteR)
library(tidytext)
library(stringr)
library(dplyr)
library(textdata)
library(rtweet)
app <-"Keenan_Hendricks"
api_key <-"nJj5STJc0NlfXoCQjynwFZkiW"
api_secret_key <- "mQHmyLzIVvsg2cTpZloewcQ3SbwFxBoYCQCG3eqMbgGMndjZL1"
access_token <-"519122175-TCmSSEl4RockwUxyQLHGTOEaKedmJ0ckui46oVnM"
access_secret <-"feSNIp0pruUGJcZGadUkqqfKpWIogiHpByMfSFCAG6pwB"
my_token <- create_token(app = app, consumer_key=api_key, consumer_secret = api_secret_key, access_token= access_token, access_secret= access_secret)
What better data to analyze then the current global pandemic we are all dealing with. I investigated the sentiments attached to hashtagging Covid-19. As you can see below surprisingly the positive sentiment is leading the way, however it is followed by negativity, trust, fear, anticipation, and sadness. The ones to follow positivity you can suspect being there as this is unknown territory and everyone has the right to feel scared.
cv <- search_tweets('#covid19', n = num_tweets, include_rts = FALSE)
##
Downloading [=======>---------------------------------] 20%
Downloading [===========>-----------------------------] 30%
Downloading [===============>-------------------------] 40%
Downloading [===================>---------------------] 50%
Downloading [========================>----------------] 60%
Downloading [============================>------------] 70%
Downloading [================================>--------] 80%
Downloading [====================================>----] 90%
Downloading [=========================================] 100%
cv_platform <- cv %>% group_by(source) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n/sum(n)) %>%
arrange(desc(n))
covid_words <- cv %>% select(status_id, text) %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
nrc <- get_sentiments("nrc") %>%
select(word, sentiment)
covid_words_sentiments <- covid_words %>% inner_join(nrc, by = "word")
covid_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 745
## 2 negative 539
## 3 trust 489
## 4 fear 390
## 5 anticipation 373
## 6 sadness 282
## 7 joy 233
## 8 anger 225
## 9 disgust 142
## 10 surprise 141
With connection to the pandemic created by Covid-19, Presidet Donald Trump has been exploding the media with news and statements. I wanted to investigate the sentiment attached to hashtagging Trump throughout the Twitter database. Down below you can view the results.
tr <- search_tweets('#Trump', n = num_tweets,include_rts = FALSE)
##
Downloading [=======>---------------------------------] 20%
Downloading [===========>-----------------------------] 30%
Downloading [===============>-------------------------] 40%
Downloading [===================>---------------------] 50%
Downloading [========================>----------------] 60%
Downloading [============================>------------] 70%
Downloading [================================>--------] 80%
Downloading [====================================>----] 90%
Downloading [=========================================] 100%
tr_platform <- tr %>% group_by(source) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
tr_words <- tr %>% select(status_id, text) %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
tr_words_sentiments <- tr_words %>% inner_join(nrc, by = "word")
tr_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 negative 751
## 2 positive 721
## 3 trust 519
## 4 fear 402
## 5 sadness 363
## 6 anger 338
## 7 anticipation 311
## 8 surprise 293
## 9 disgust 286
## 10 joy 245
China, the origin of Covid-19, has been receiving some serious hate from all over globe for potentially creating this madness. I investigated the sentimate attached to hashtagging China throughout the Twitter databases. Below you can view the results.
ch <- search_tweets('#China', n = num_tweets, include_rts = FALSE)
##
Downloading [=======>---------------------------------] 20%
Downloading [===========>-----------------------------] 30%
Downloading [===============>-------------------------] 40%
Downloading [===================>---------------------] 50%
Downloading [========================>----------------] 60%
Downloading [============================>------------] 70%
Downloading [================================>--------] 80%
Downloading [====================================>----] 90%
Downloading [=========================================] 100%
ch_platform <- ch %>% group_by(source) %>%
summarize(n = n()) %>%
mutate(percent_of_tweets = n / sum(n)) %>%
arrange(desc(n))
ch_words <- ch %>% select(status_id, text) %>%
filter(!str_detect(text, '^"')) %>%
mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
unnest_tokens(word, text, token = "regex", pattern = reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
ch_words_sentiments <- ch_words %>% inner_join(nrc, by = "word")
ch_words_sentiments %>% group_by(sentiment) %>% summarize(n = n()) %>% arrange(desc(n))
## # A tibble: 10 x 2
## sentiment n
## <chr> <int>
## 1 positive 792
## 2 negative 780
## 3 trust 591
## 4 fear 536
## 5 anticipation 402
## 6 anger 361
## 7 sadness 344
## 8 disgust 268
## 9 joy 211
## 10 surprise 157
Finally, I thought it would be great to compare and contrast each segment of sentiment across the three hashtags: Covid-19, Trump, and China. These three have been receiving some serious publicity and use all over Twitter so it was interesting to compare how people feel emotionally when posting about one of the three. Oddly enough, Covid-19 is receiving the highest amount of positivity, while also the lowest amount of negativity. On the other hand, China leads the way for negativity sentiment followed by Trump, and they are both about equal for positvity percentages. Another shocking find, was that more people had an emotion of fear attached to China then they did Covid-19, and also seemed to have a more sense of trust when hashtagging Covid-19 then China.
cv_platform$state <- "covid19"
tr_platform$state <- "Trump"
ch_platform$state <- "China"
covid_words_sentiments$state <- "covid19"
tr_words_sentiments$state <-"Trump"
ch_words_sentiments$state <- "China"
cvtrch_Bind <- rbind(cv_platform, tr_platform, ch_platform)
words_sentiments <- rbind(covid_words_sentiments, tr_words_sentiments, ch_words_sentiments)
sent_df <- words_sentiments2 %>%
group_by(state, sentiment) %>%
summarize(n = n()) %>%
mutate(frequency = n/sum(n))
cvtrch_plot <- ggplot(sent_df, aes(x = sentiment, y = frequency, fill = state)) +
geom_bar(stat = "identity", position = "dodge") +
xlab("Sentiment") +
ylab("Percent of tweets") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
cvtrch_plot