Plotting Sentiments on Maps
if (!require("rtweet")) install.packages("rtweet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("tidytext", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("maps", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("plotly", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("ggmap", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("rgeos", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("maptools", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("plyr", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
library(rtweet) # Twitter search
library(tidytext) # sentiment analysis
library(maps) # draw geo-maps
library(plotly) # ploting
library(ggmap) # geocoding with Google API
library(rgeos) # for maptools
library(maptools) # handle spatial objects
library(plyr) # data wrangling
# STEP 1: Data collection -----------------------------------------------------
# Obtain longitude and latitude
geocode('Atlanta, GA') # Google API
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Atlanta,%20GA&sensor=false
## lon lat
## 1 -84.38798 33.749
# Search Twitter by cities
N<-500 # number of tweets to request from each query
S<-200 # radius in miles
cityNames <- c("Atlanta", "Boston", "Chicago", "Columbus", "Houston", "New York City",
"Phoenix", "Salt Lake City", "San Francisco", "Washington DC")
lons <- c(-84.3, -71.1, -87.6, -82.9, -95.3, -73.9,
-112, -111.9, -122.4, -77)
lats <- c(33.7, 42.3, 41.8, 39.9, 29.7, 40.7,
33.5, 40.7, 37.7, 38.9)
# we need to re-run this process 10 times, so we write a search function
searchFunction <- function(i,st){
test <- search_tweets(st, n=N, lang='en', include_rts=FALSE,
geocode=paste(lats[i],lons[i],paste0(S,"mi"),sep=","))
}
# search with two terms and save them in two data frames
DAT1<-data.frame()
DAT2<-data.frame()
for (i in 1:10){
dat1 <- searchFunction(i,st='monday')
print(nrow(dat1))
if (nrow(dat1)!=0){
dat1$city<-i
dat1$text <- gsub('[^[:alnum:]]', ' ', dat1$text)
dat1$text <- tolower(dat1$text)
DAT1<-rbind(DAT1,dat1)
}
dat2 <- searchFunction(i,st='friday')
print(nrow(dat2))
if (nrow(dat2)!=0){
dat2$city<-i
dat2$text <- gsub("[^[:alnum:]]", ' ', dat2$text)
dat2$text <- tolower(dat2$text)
DAT2<-rbind(DAT2,dat2)
}
print (i)
Sys.sleep(10)
}
setwd('YOUR WORKING DIRECTORY')
save(DAT1,file='DAT1.Rdata')
save(DAT2,file='DAT2.Rdata')
# STEP2: Sentiment Analysis ---------------------------------------------------
# get sentiment terms
sentiment_term <- get_sentiments('bing')
# add words and their sentiments to the list
sentiment_term <- rbind(sentiment_term,
c("epicfail", "negative"), c("wtf", "negative"))
# calculate "sentiment score" of each tweet
score.sent <- function(t){
words <- unlist(strsplit(t, ' '))
pos.words <- sentiment_term$word[sentiment_term$sentiment=="positive"] %in% words
neg.words <- sentiment_term$word[sentiment_term$sentiment=="negative"] %in% words
score <- sum(pos.words) - sum(neg.words)
score
}
DAT1$s.score <- sapply(DAT1$text, score.sent)
DAT2$s.score <- sapply(DAT2$text, score.sent)
# STEP3: Mapping --------------------------------------------------------------
# --- scatter points on maps --- #
# Plot tweets as points on map
# get latitude and longitude variables with lat_lng()
DAT1 <- lat_lng(DAT1)
with(DAT1, plot(lng, lat))

sum(complete.cases(DAT1$lng)) # not every tweet came with geo info
## [1] 552
# fill in lons/lats with cities' lons/lats
for (i in 1:10){
DAT1$lng <- ifelse(is.na(DAT1$lng) & DAT1$city==i, lons[i], DAT1$lng)
DAT1$lat <- ifelse(is.na(DAT1$lat) & DAT1$city==i, lats[i], DAT1$lat)
}
# 1) create the map with "maps" package
map('state',bg='lightblue',fill=T,col='wheat',
mar = c(1, 1, par("mar")[3], 0.1), resolution = 0)
points(DAT1$lng, DAT1$lat, col = 'black', cex = .6, pch=1)
points(DAT1$lng[DAT1$s.score>0], DAT1$lat[DAT1$s.score>0],
col = 'red', cex = .6, pch='P')
points(DAT1$lng[DAT1$s.score<0], DAT1$lat[DAT1$s.score<0],
col = 'blue', cex = .6, pch='N')

# 2) do it again with Plotly
# Plotly setting
# https://plot.ly/r/getting-started/
Sys.setenv("plotly_username"="YOUR PLOTLY USERNAME")
Sys.setenv("plotly_api_key"="YOUR API KEY")
# geo scope and projection
g <- list(scope = 'usa', projection = list(type = 'albers usa'))
# plot the map
p1 <- plot_geo(DAT1, lat = ~lat, lon = ~lng) %>%
add_markers(
text = ~paste(screen_name, created_at, sep = "<br />"), hoverinfo="text") %>%
layout(geo=g, title="Tweets about Monday<br />(Hover for more info)")
p1
# send it to Plotly website
api_create(p1, filename = "p1") # interactive maps on the web
# Plot points by category
# create a var. indicating pos/neg of each tweet
DAT1$pn <- ifelse(DAT1$s.score>0, 1, 0)
DAT1$pn <- factor(DAT1$pn, levels = c(1,0), labels = c("positive", "negative"))
print.data.frame(DAT1[1:6,c(44,47)]) # view first few rows
## s.score pn
## 1 0 negative
## 2 0 negative
## 3 -1 negative
## 4 0 negative
## 5 -1 negative
## 6 1 positive
# More geo styling
g <- list(
scope = 'usa', projection = list(type = 'albers usa'),
showland = TRUE, landcolor = toRGB("gray95"),
subunitcolor = toRGB("gray85"), subunitwidth = 0.5,
showlakes = TRUE, lakecolor = toRGB('white')
)
# plot the map
p2 <- plot_geo(DAT1, lat = ~lat, lon = ~lng) %>%
add_markers(symbol=~pn, symbols=c("o","x"), color=I('black'),
text = ~paste(screen_name, paste("Faforites: ", favorite_count), sep = "<br />"),
hoverinfo="text") %>%
layout(geo=g, title="Tweets about Monday: emotions")
api_create(p2, filename = "p2") # send it to Plotly
# --- Choropleth maps --- #
# Plot tweets by states
# get states from lons/lats
# 1) easy but rely on Google API
geoinfo <- revgeocode(c(-78.47, 38.03), output = "more")
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?latlng=38.03,-78.47&sensor=false
geoinfo$locality
## [1] Charlottesville
## Levels: Charlottesville
geoinfo$administrative_area_level_1
## [1] Virginia
## Levels: Virginia
# 2) define a function "latlong2state" to convert
# The single argument to this function, pointsDF, is a data.frame in which:
# - column 1 contains the longitude in degrees
# - column 2 contains the latitude in degrees
latlong2state <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon per state
# (plus DC, minus HI & AK)
states <- map('state', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
states_sp <- map2SpatialPolygons(states, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use sp::over() to get indices of the Polygons object containing each point
indices <- over(pointsSP, states_sp)
# Return the state names of the Polygons object containing each point
stateNames <- sapply(states_sp@polygons, function(x) x@ID)
stateNames[indices]
}
# Test the function
testPoints <- data.frame(x = c(-122.4, -71.1), y = c(37.7, 42.3))
latlong2state(testPoints)
## [1] "california" "massachusetts"
# get state names for each pair of lon/lat
DAT1$state <- latlong2state(DAT1[ ,c("lng","lat")])
DAT1 <- DAT1[!is.na(DAT1$state), ] # remove those failed to be converted
# calculate % of negative tweets by state
dfbystate <- ddply(DAT1, .(state, pn), summarise, count=length(user_id))
dfbystate <- ddply(dfbystate, .(state), mutate, total=sum(count))
dfbystate$negativity <- dfbystate$count/dfbystate$total
dfbystate <- dfbystate[which(dfbystate$pn=='negative'),]
# get state abbriviation
state <- list()
state[['name']] <- c(state.name,"District Of Columbia")
state[['abb']] <- c(state.abb,"DC")
dfbystate$abb <- state[['abb']][match(dfbystate$state, tolower(state[['name']]))]
# draw the map
p3 <- plot_geo(dfbystate, locationmode = 'USA-states') %>%
add_trace(
z = ~negativity, locations = ~abb,
color = ~negativity, colors = 'Blues'
) %>%
colorbar(title = "Negatitivity") %>%
layout(geo=g,
title = "How's your Monday?"
)
api_create(p3, filename = "p3")
# --- Bubble maps --- #
# get pos/neg tweets count by city
cdf <- data.frame(city=1:10, cityN=cityNames, lon=lons, lat=lats)
dfbycity <- ddply(DAT1, .(city, pn), summarise, count=length(user_id))
dfbycity <- merge(cdf, dfbycity, by='city')
# mapping
p4 <- plot_geo(dfbycity, locationmode = 'USA-states') %>%
add_markers(x = ~lon, y = ~lat, size = ~count, color = ~pn, "opacity"=0.5) %>%
layout(geo=g, title = "Positive vs. Negative Tweets about Monday")
api_create(p4, filename = "p4")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# deal with overlapping "bubbles"
dfbycity$lon2 <- ifelse(dfbycity$pn=='positive',
dfbycity$lon-0.25, dfbycity$lon+0.25)
p4.1 <- plot_geo(dfbycity, locationmode = 'USA-states') %>%
add_markers(x = ~lon2, y = ~lat, size = ~count, color= ~pn, colors='Set1',
hoverinfo = "text", text = ~paste(cityN,"<br />", paste(pn," tweets:",count))
) %>%
layout(geo=g,
title = 'Positive vs. Negative Tweets about Monday <br /> (hover for city info)')
api_create(p4.1, filename = "p4.1")
# grouping plots
# make a bar chart
p5 <- plot_ly(dfbycity, x=~cityN, y=~count, type='bar',
transforms=list(list(
type='groupby',
groups=~pn,
styles=list(
list(target = "positive", value = list(marker =list(color = 'red'))),
list(target = "negative", value = list(marker =list(color = 'black')))
)))) %>% layout(title="Positive vs. Negative Tweets by City")
# group it with the map we just created
p6 <- subplot(p4.1, p5, nrows = 2)
api_create(p6, filename = "p6")