library(RCurl)
library(tidytext)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(janeaustenr)
library(stringr)
library(textdata)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
library(ggplot2)
library(rjson)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
library(httr)
library(XML)
library(rvest)
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
library(RColorBrewer)
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:jsonlite':
##
## flatten
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
##
## content
## The following object is masked from 'package:ggplot2':
##
## annotate
Source: (https://www.tidytextmining.com/sentiment.html).
# get_sentiments("afinn")
# get_sentiments("nrc")
# get_sentiments("bing")
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
Chunks of code, above, were provided by https://www.tidytextmining.com/sentiment.html
# twitter_token <- create_token(app = "Data607",
# consumer_key = "",
# consumer_secret = "",
# set_renv = TRUE)
bearer_token <- Sys.setenv(BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAHbybAEAAAAAR%2F%2BruDfW2EDq4ke22576ezGoe7Y%3DrEuVwVpejXYDJb9aBLRGs9WyBbQ9SokoKLR6q0WPAhtKQNJ11O")
headers <- c(`Authorization` = sprintf('Bearer %s', bearer_token))
tweets <- search_tweets("49ers", n=5000, include_rts=FALSE, lang="en")
tweets$text <- gsub("https\\S*", "", tweets$text)
tweets$text <- gsub("@\\S*", "", tweets$text)
tweets$text <- gsub("amp", "", tweets$text)
tweets$text <- gsub("[\r\n]", "", tweets$text)
tweets$text <- gsub("[[:punct:]]", "", tweets$text)
tweets_words <- tweets %>%
select(text) %>%
unnest_tokens(word, text)
#Add custom stop words
# custom_stop <- bind_rows(tibble, word=c("49ers","deebo","kevin","jimmy","samuel"),lexicon=c("custom"),stop_words)
tweets_words <- tweets_words %>%
anti_join(stop_words)
## Joining, by = "word"
tweets_words %>%
count(word, sort = TRUE) %>%
top_n(20) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(y = "Count",
x = "Unique words",
title = "Most frequent words found in the tweets of San Francisco 49ers",
subtitle = "Stop words removed from the list")
## Selecting by n
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tweets_words %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
fortyniners_sentiment_total <- tweets_words %>%
inner_join(get_sentiments("bing")) %>%
count(sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
fortyniners_sentiment_total
fortyniners_sentiment_byword <- tweets_words %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
head(fortyniners_sentiment_byword,20)
# ggplot(fortyniners_sentiment_byword, aes(word, sentiment)) +
# geom_col(show.legend = FALSE)
library(syuzhet)
##
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
##
## get_tokens
# Converting tweets to ASCII to trackle strange characters
tweets <- iconv(tweets, from="UTF-8", to="ASCII", sub="")
# removing retweets, in case needed
tweets <-gsub("(RT|via)((?:\\b\\w*@\\w+)+)","",tweets)
# removing mentions, in case needed
tweets <-gsub("@\\w+","",tweets)
ew_sentiment<-get_nrc_sentiment((tweets))
sentimentscores<-data.frame(colSums(ew_sentiment[,]))
names(sentimentscores) <- "Score"
sentimentscores <-cbind("sentiment"=rownames(sentimentscores),sentimentscores)
rownames(sentimentscores) <- NULL
ggplot(data=sentimentscores,aes(x=reorder(sentiment,-Score),y=Score))+
geom_bar(aes(fill=sentiment),stat = "identity")+
theme(legend.position="none")+
xlab("Sentiments")+ylab("Scores")+
ggtitle("Total sentiment based on scores")+
theme_minimal()
Overall, it appears the 49ers are trending slightly negatively. This correlates with the latest news that Deebo Samuel, the 49ers pro bowl wide receiver has unfollowed the team and removed any team-related posts on his social media due to breakdowns in contract extension negotiations. Of course, limitations include that the sentiment analysis does not pick terms such as “not bad” which would not rate negatively, but instead separates “not” and “bad” and two negative words. Additional steps would include refining the sentiment analysis to pick up such cases.