JMSC 6116 Lecture 7: Mapping

Plotting Sentiments on Maps

if (!require("rtweet")) install.packages("rtweet", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("tidytext", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("maps", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("plotly", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("ggmap", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("rgeos", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("maptools", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("rtweet")) install.packages("plyr", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)

library(rtweet)    # Twitter search
library(tidytext)  # sentiment analysis
library(maps)      # draw geo-maps
library(plotly)    # ploting
library(ggmap)     # geocoding with Google API
library(rgeos)     # for maptools
library(maptools)  # handle spatial objects
library(plyr)      # data wrangling

# STEP 1: Data collection -----------------------------------------------------

# Obtain longitude and latitude
geocode('Atlanta, GA') # Google API

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Atlanta,%20GA&sensor=false

##         lon    lat
## 1 -84.38798 33.749

# Search Twitter by cities
N<-500 # number of tweets to request from each query 
S<-200 # radius in miles 
cityNames <- c("Atlanta", "Boston", "Chicago", "Columbus", "Houston", "New York City", 
               "Phoenix", "Salt Lake City", "San Francisco", "Washington DC")
lons <- c(-84.3, -71.1, -87.6, -82.9, -95.3, -73.9, 
          -112, -111.9, -122.4, -77)
lats <- c(33.7, 42.3, 41.8, 39.9, 29.7, 40.7, 
          33.5, 40.7, 37.7, 38.9)

# we need to re-run this process 10 times, so we write a search function
searchFunction <- function(i,st){
  test <- search_tweets(st, n=N, lang='en', include_rts=FALSE,
                        geocode=paste(lats[i],lons[i],paste0(S,"mi"),sep=","))
}

# search with two terms and save them in two data frames
DAT1<-data.frame() 
DAT2<-data.frame()
for (i in 1:10){ 
  dat1 <- searchFunction(i,st='monday')
  print(nrow(dat1))
  if (nrow(dat1)!=0){
    dat1$city<-i
    dat1$text <- gsub('[^[:alnum:]]', ' ', dat1$text)  
    dat1$text <- tolower(dat1$text)
    DAT1<-rbind(DAT1,dat1)
  }
  
  dat2 <- searchFunction(i,st='friday')
  print(nrow(dat2))
  if (nrow(dat2)!=0){
    dat2$city<-i
    dat2$text <- gsub("[^[:alnum:]]", ' ', dat2$text)
    dat2$text <- tolower(dat2$text)
    DAT2<-rbind(DAT2,dat2)
  }
  print (i)
  Sys.sleep(10)
}

setwd('YOUR WORKING DIRECTORY')
save(DAT1,file='DAT1.Rdata')
save(DAT2,file='DAT2.Rdata')

# STEP2: Sentiment Analysis ---------------------------------------------------

# get sentiment terms
sentiment_term <- get_sentiments('bing')

# add words and their sentiments to the list
sentiment_term <- rbind(sentiment_term, 
                        c("epicfail", "negative"), c("wtf", "negative"))

# calculate "sentiment score" of each tweet
score.sent <- function(t){
  words <- unlist(strsplit(t, ' '))
  pos.words <- sentiment_term$word[sentiment_term$sentiment=="positive"] %in% words
  neg.words <- sentiment_term$word[sentiment_term$sentiment=="negative"] %in% words
  score <- sum(pos.words) - sum(neg.words)
  score
}

DAT1$s.score <- sapply(DAT1$text, score.sent)
DAT2$s.score <- sapply(DAT2$text, score.sent)

# STEP3: Mapping --------------------------------------------------------------

# --- scatter points on maps --- #
# Plot tweets as points on map

# get latitude and longitude variables with lat_lng()
DAT1 <- lat_lng(DAT1)
with(DAT1, plot(lng, lat))

sum(complete.cases(DAT1$lng)) # not every tweet came with geo info

## [1] 552

# fill in lons/lats with cities' lons/lats 
for (i in 1:10){
  DAT1$lng <- ifelse(is.na(DAT1$lng) & DAT1$city==i, lons[i], DAT1$lng)
  DAT1$lat <- ifelse(is.na(DAT1$lat) & DAT1$city==i, lats[i], DAT1$lat)
}

# 1) create the map with "maps" package
map('state',bg='lightblue',fill=T,col='wheat',
    mar = c(1, 1, par("mar")[3], 0.1), resolution = 0)
points(DAT1$lng, DAT1$lat, col = 'black', cex = .6, pch=1)
points(DAT1$lng[DAT1$s.score>0], DAT1$lat[DAT1$s.score>0],
       col = 'red', cex = .6, pch='P')
points(DAT1$lng[DAT1$s.score<0], DAT1$lat[DAT1$s.score<0],
       col = 'blue', cex = .6, pch='N')

# 2) do it again with Plotly

# Plotly setting 
# https://plot.ly/r/getting-started/

Sys.setenv("plotly_username"="YOUR PLOTLY USERNAME")
Sys.setenv("plotly_api_key"="YOUR API KEY")

# geo scope and projection
g <- list(scope = 'usa', projection = list(type = 'albers usa'))

# plot the map
p1 <- plot_geo(DAT1, lat = ~lat, lon = ~lng) %>%
  add_markers(
    text = ~paste(screen_name, created_at, sep = "<br />"), hoverinfo="text") %>%
  layout(geo=g, title="Tweets about Monday<br />(Hover for more info)")
p1

# send it to Plotly website
api_create(p1, filename = "p1") # interactive maps on the web

# Plot points by category

# create a var. indicating pos/neg of each tweet
DAT1$pn <- ifelse(DAT1$s.score>0, 1, 0)
DAT1$pn <- factor(DAT1$pn, levels = c(1,0), labels = c("positive", "negative"))
print.data.frame(DAT1[1:6,c(44,47)]) # view first few rows

##   s.score       pn
## 1       0 negative
## 2       0 negative
## 3      -1 negative
## 4       0 negative
## 5      -1 negative
## 6       1 positive

# More geo styling
g <- list(
  scope = 'usa', projection = list(type = 'albers usa'),
  showland = TRUE, landcolor = toRGB("gray95"),
  subunitcolor = toRGB("gray85"), subunitwidth = 0.5,
  showlakes = TRUE, lakecolor = toRGB('white')
)

# plot the map
p2 <- plot_geo(DAT1, lat = ~lat, lon = ~lng) %>% 
  add_markers(symbol=~pn, symbols=c("o","x"), color=I('black'),
    text = ~paste(screen_name, paste("Faforites: ", favorite_count), sep = "<br />"), 
    hoverinfo="text") %>%
  layout(geo=g, title="Tweets about Monday: emotions")

api_create(p2, filename = "p2") # send it to Plotly

# --- Choropleth maps --- #
# Plot tweets by states 

# get states from lons/lats
# 1) easy but rely on Google API
geoinfo <- revgeocode(c(-78.47, 38.03), output = "more")

## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?latlng=38.03,-78.47&sensor=false

geoinfo$locality

## [1] Charlottesville
## Levels: Charlottesville

geoinfo$administrative_area_level_1

## [1] Virginia
## Levels: Virginia

# 2) define a function "latlong2state" to convert
# The single argument to this function, pointsDF, is a data.frame in which:
#   - column 1 contains the longitude in degrees 
#   - column 2 contains the latitude in degrees

latlong2state <- function(pointsDF) {
  # Prepare SpatialPolygons object with one SpatialPolygon per state 
  # (plus DC, minus HI & AK)
  states <- map('state', fill=TRUE, col="transparent", plot=FALSE)
  IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
  states_sp <- map2SpatialPolygons(states, IDs=IDs,
                                   proj4string=CRS("+proj=longlat +datum=WGS84"))
  # Convert pointsDF to a SpatialPoints object 
  pointsSP <- SpatialPoints(pointsDF, 
                            proj4string=CRS("+proj=longlat +datum=WGS84"))
  # Use sp::over() to get indices of the Polygons object containing each point 
  indices <- over(pointsSP, states_sp)
  # Return the state names of the Polygons object containing each point
  stateNames <- sapply(states_sp@polygons, function(x) x@ID)
  stateNames[indices]
}

# Test the function 
testPoints <- data.frame(x = c(-122.4, -71.1), y = c(37.7, 42.3))
latlong2state(testPoints)

## [1] "california"    "massachusetts"

# get state names for each pair of lon/lat
DAT1$state <- latlong2state(DAT1[ ,c("lng","lat")])
DAT1 <- DAT1[!is.na(DAT1$state), ] # remove those failed to be converted 

# calculate % of negative tweets by state 
dfbystate <- ddply(DAT1, .(state, pn), summarise, count=length(user_id)) 
dfbystate <- ddply(dfbystate, .(state), mutate, total=sum(count))
dfbystate$negativity <- dfbystate$count/dfbystate$total
dfbystate <- dfbystate[which(dfbystate$pn=='negative'),]

# get state abbriviation
state <- list()
state[['name']] <-  c(state.name,"District Of Columbia")
state[['abb']] <-  c(state.abb,"DC")  
dfbystate$abb <- state[['abb']][match(dfbystate$state, tolower(state[['name']]))]

# draw the map
p3 <- plot_geo(dfbystate, locationmode = 'USA-states') %>%
  add_trace(
    z = ~negativity, locations = ~abb,
    color = ~negativity, colors = 'Blues'
  ) %>%
  colorbar(title = "Negatitivity") %>%
  layout(geo=g,
    title = "How's your Monday?"
  )

api_create(p3, filename = "p3")

# --- Bubble maps --- # 

# get pos/neg tweets count by city
cdf <- data.frame(city=1:10, cityN=cityNames, lon=lons, lat=lats)
dfbycity <- ddply(DAT1, .(city, pn), summarise, count=length(user_id))
dfbycity <- merge(cdf, dfbycity, by='city')

# mapping
p4 <- plot_geo(dfbycity, locationmode = 'USA-states') %>%
  add_markers(x = ~lon, y = ~lat, size = ~count, color = ~pn, "opacity"=0.5) %>%
  layout(geo=g, title = "Positive vs. Negative Tweets about Monday")

api_create(p4, filename = "p4")

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

# deal with overlapping "bubbles"
dfbycity$lon2 <- ifelse(dfbycity$pn=='positive', 
                        dfbycity$lon-0.25, dfbycity$lon+0.25)

p4.1 <- plot_geo(dfbycity, locationmode = 'USA-states') %>%
  add_markers(x = ~lon2, y = ~lat, size = ~count, color= ~pn, colors='Set1', 
    hoverinfo = "text", text = ~paste(cityN,"<br />", paste(pn," tweets:",count))
  ) %>%
  layout(geo=g,
    title = 'Positive vs. Negative Tweets about Monday <br /> (hover for city info)')

api_create(p4.1, filename = "p4.1")

# grouping plots

# make a bar chart
p5 <- plot_ly(dfbycity, x=~cityN, y=~count, type='bar', 
              transforms=list(list(
                type='groupby', 
                groups=~pn,
                styles=list(
                  list(target = "positive", value = list(marker =list(color = 'red'))),
                  list(target = "negative", value = list(marker =list(color = 'black')))
)))) %>% layout(title="Positive vs. Negative Tweets by City")

# group it with the map we just created
p6 <- subplot(p4.1, p5, nrows = 2)

api_create(p6, filename = "p6")

JMSC 6116 Lecture 7: Mapping

Yun Tai

March 12, 2018

Plotting Sentiments on Maps