################# Harvest Tweets about Universal basic income ###############
# load packages
library(twitteR)
library(ROAuth)
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(scales)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(stringr)
library(wordcloud)
## Loading required package: RColorBrewer
library(syuzhet)
##
## Attaching package: 'syuzhet'
## The following object is masked from 'package:scales':
##
## rescale
library(reshape2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:twitteR':
##
## id, location
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
#Twitter API Authorization
setup_twitter_oauth('4GLxyq5LmomwbAtVmHV4tTdMJ',
'wq4FSa1zC0dLYYMygWk6wTxra1Vrpx4luMnW2Z92XNHfmpwjGD',
'174544158-Zn9NARAAZCOoQBtsCXtzqDpDk4eHvJK6fj8ykfvA',
'I8uIbwuoZL9A1xjJ0hdDUrVsl1bdBuNAfBNURvvzyqk0i')
## [1] "Using direct authentication"
basicincome_tweets = searchTwitter("#basicincome", n=3200, lang="en") # Tweets inlcuding "#basicincome"
basicincome <- twListToDF(basicincome_tweets)
write.csv(basicincome, file="#basicincome_tweets1030.csv")
UBI_tweets = searchTwitter("#UBI", n=3200, lang="en") # Tweets inlcuding "#UBI"
## Warning in doRppAPICall("search/tweets", n, params = params,
## retryOnRateLimit = retryOnRateLimit, : 3200 tweets were requested but the
## API can only return 2100
UBI <- twListToDF(UBI_tweets)
write.csv(UBI, file="#UBI_tweets1030.csv")
#################################################################################################
##########################################################################################################################
# Convert the timestamps of the tweets to the same time zone
basicincome$created <- ymd_hms(basicincome$created)
basicincome$created <- with_tz(basicincome$created, "America/New_York")
# Show twitter volume by month
ggplot(data = basicincome, aes(x = month(created, label = TRUE))) +
geom_bar(aes(fill = ..count..)) +
theme(legend.position = "none") +
xlab("Month") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

# Show twitter volume by weekday
ggplot(data = basicincome, aes(x = wday(created, label = TRUE))) +
geom_bar(aes(fill = ..count..)) +
theme(legend.position = "none") +
xlab("Day of the Week") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

# Show twitter volume by hour
#extract days
basicincome$timeonly <- as.numeric(basicincome$created - trunc(basicincome$created, "days"))
class(basicincome$timeonly) <- "POSIXct"
ggplot(data = basicincome, aes(x = timeonly)) +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "none") +
xlab("Time") + ylab("Number of tweets") +
scale_x_datetime(breaks = date_breaks("2 hours"),
labels = date_format("%H:00")) +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#################################################################################################
#################################################################################################
############################## Word cloud of frequent terms ##############################
nohandles <- str_replace_all(basicincome$text, "@\\w+", "") #remove special characters, in particular, the symbol @
wordCorpus <- Corpus(VectorSource(nohandles))
wordCorpus <- tm_map(wordCorpus, removePunctuation) #remove punctuations
wordCorpus <- tm_map(wordCorpus, removeNumbers) #remove numbers
wordCorpus <- tm_map(wordCorpus, content_transformer(tolower)) #converted to lower case
wordCorpus <- tm_map(wordCorpus, removeWords, stopwords("english")) #remove english common stopwords
wordCorpus <- tm_map(wordCorpus, removeWords, c("https", "http", "4yo")) # specify your stopwords as a character vector
wordCorpus <- tm_map(wordCorpus, stripWhitespace) # Eliminate extra white spaces
pal <- brewer.pal(9,"YlGnBu")
pal <- pal[-(1:4)]
set.seed(123)
wordcloud(words = wordCorpus, scale=c(10,0.3), max.words=100, random.order=FALSE,
rot.per=0.35, use.r.layout=FALSE, colors=pal)
## Warning in wordcloud(words = wordCorpus, scale = c(10, 0.3), max.words =
## 100, : basicincome could not be fit on page. It will not be plotted.

# Produce a word cloud of frequent twitter mentions
friends <- str_extract_all(basicincome$text, "@\\w+")
namesCorpus <- Corpus(VectorSource(friends))
set.seed(146)
wordcloud(words = namesCorpus, scale=c(3,0.5), max.words=50, random.order=FALSE,
rot.per=0.10, use.r.layout=FALSE, colors=pal)

#################################################################################################
#################################################################################################
############################ sentiment analysis #################################################
# Generate sentiment score for each tweet
basicincome$clean_text <- str_replace_all(basicincome$text, "@\\w+", "")
Sentiment <- get_nrc_sentiment(basicincome$clean_text)
basicincome_senti <- cbind(basicincome, Sentiment)
sentimentTotals <- data.frame(colSums(basicincome_senti[,c(19:26)]))
names(sentimentTotals) <- "count"
sentimentTotals <- cbind("sentiment" = rownames(sentimentTotals), sentimentTotals)
rownames(sentimentTotals) <- NULL
## Twitter volume in each sentiment category
ggplot(data = sentimentTotals, aes(x = sentiment, y = count)) +
geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiment") + ylab("Total Count") + ggtitle("Total Sentiment Score for All Tweets")

# Positive and negative sentiment over time
posnegtime <- basicincome_senti %>%
group_by(created = cut(created, breaks="2 days")) %>%
summarise(negative = mean(negative),
positive = mean(positive)) %>% melt
## Using created as id variables
# Using created as id variables
names(posnegtime) <- c("timestamp", "sentiment", "meanvalue")
posnegtime$sentiment = factor(posnegtime$sentiment,levels(posnegtime$sentiment)[c(2,1)])
ggplot(data = posnegtime, aes(x = as.Date(timestamp), y = meanvalue, group = sentiment)) +
geom_line(size = 1.5, alpha = 0.7, aes(color = sentiment)) +
geom_point(size = 0.3) +
ylim(0, NA) +
scale_colour_manual(values = c("springgreen4", "firebrick3")) +
theme(legend.title=element_blank(), axis.title.x = element_blank()) +
scale_x_date(breaks = date_breaks("3 months"),
labels = date_format("%Y-%b")) +
ylab("Average sentiment score") +
ggtitle("Sentiment Over Time")

# Various sentiment type over time
basicincome_senti$month <- month(basicincome_senti$created, label = TRUE)
monthlysentiment <- basicincome_senti %>% group_by(month) %>%
summarise(anger = mean(anger),
anticipation = mean(anticipation),
disgust = mean(disgust),
fear = mean(fear),
joy = mean(joy),
sadness = mean(sadness),
surprise = mean(surprise),
trust = mean(trust)) %>% melt
## Using month as id variables
## Using month as id variables
names(monthlysentiment) <- c("month", "sentiment", "meanvalue")
ggplot(data = monthlysentiment, aes(x = month, y = meanvalue, group = sentiment)) +
geom_line(size = 2.5, alpha = 0.7, aes(color = sentiment)) +
geom_point(size = 0.5) +
ylim(0, NA) +
theme(legend.title=element_blank(), axis.title.x = element_blank()) +
ylab("Average sentiment score") +
ggtitle("Sentiment During the Year")
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
