This document is made for the Course Data Project of week 2 (Developing Data Products). It has to cointain almost this two things: – A published webmap made with leaflet. – A date of production.

RESUME

Here we are going to create a map of tweets where word “stupid” or “estupid@” where said.
This are the main steps:

  1. Donwload data from Twitter API.
  2. Subset just those tweets with geolocation.
  3. Configure map with leaflet where:
    • Points are geolocation.
    • Icons are the profile photo of the person who tweet.
    • Popup is @profile –> “text of the tweet”.
    • The popup contain the link to the tweet.

PROCESS

Go to the R file Twitter token and use it to connecto to Twitter API. Then load libraries.

# load libraries
library(devtools)
library(ROAuth)
library(rtweet)
library(ggplot2)
library(dplyr)
library(tidytext)
library(rlist)
library(leaflet)

Search for tweets with the word “stupid” or “estupid”

#get current system time values in order to define the csv where tweets will be saved and also avoid 
#redo petitions to twitter if there ir a petition was made within the last hour
t <- toString(Sys.time())
Y <- substr(t,1,4); M <- substr(t,6,7); D <- substr(t,9,10)
h <- substr(t,12,13); m <- substr(t,15,16); s <- substr(t, 18,19)
file.Rda <- paste0(getwd(),"//Twitter_data//Twitter_data_",Y,M,D,"_",h,m,s,".Rda")

## check file within the folder
paths <- dir(paste0(getwd(),"//Twitter_data"), full.names=TRUE)
last_tweet_download <- tail(file.info(paths)$ctime,1)

#if last tweet petition was within the last hour dont't download again and use the last csv file saved
#time difference from last tweets
#careful this way just works working in the same computer
dif <- difftime(Sys.time(),last_tweet_download,units='hours')

#now conditions
if (dif<1) {
    load(paths[length(paths)])
} else {
    q <- "stupid OR estupid" #query word that you want to look for
    n <- 25000 #is the number of tweets to be returned
    tweets <- search_tweets(q=q, #query word that you want to look for
                   n=n,      
                   #lang = "en",
                   include_rts = F #exclude retweets
                   )
    df <- data.frame(tweets)
    save(df,file=file.Rda)
}

Subset tweets keeping just those with coordinates

tgeo <- df["c(NA, NA)" != df$geo_coords,] #pick tweets with geo_coords
tcoords <- df["c(NA, NA)" != df$coords_coords,]

## % of tweets with geo_coords
Nt <- dim(df)[1]
Ngeo <- dim(tgeo)[1]
Pgeo <- (dim(tgeo)[1]/dim(df)[1])*100

Create a map of geotagged tweets related anyhow with ‘stupidity’.

## points to be mapped
y <- c()
x <- c()
for (i in 1:nrow(tgeo)) {
    y[[i]] <- tgeo$geo_coords[[i]][1]
    x[[i]] <- tgeo$geo_coords[[i]][2]
}
tweet_coords <- data.frame(
  lat = y,
  lng = x)

#icons to be mapped
#url to thumnail of the profile is in $profile_image_url
jpg_profile_list <- character()
for (i in 1:nrow(tgeo)){
    jpg_URL <- tgeo[i,]$profile_image_url
    jpg_profile_list[[i]] <- jpg_URL
}
stupidIcon <- makeIcon(
  iconUrl = jpg_profile_list,
  iconWidth = 31*215/230, iconHeight = 31,
  iconAnchorX = 31*215/230/2, iconAnchorY = 16
)

#tweets text to be mapped
#it shows the tweet on the map and the link directs you to the author page
#link to tweet follows this # schema:"https://twitter.com/thecardiffcwtch/status/1052632880655060992"
#"https://twitter.com/$screen_name/status/$status_id"
tweet_text_list <- character()
for (i in 1:nrow(tgeo)){
    tweet_URL <- paste0("https://twitter.com/",tgeo[i,]$screen_name,"/status/",tgeo[i,]$status_id)
    tweet_profile <- paste0("@",tgeo[i,]$screen_name)
    tweet_text <- tgeo[i,]$text
    html_URL_text <- paste0("<a href='",tweet_URL,"'>",paste0(tweet_profile," --> ", tweet_text),"</a>")
    tweet_text_list[[i]] <- html_URL_text
}

#MAP
tweet_coords %>%
    leaflet(width="100%") %>%
    addTiles(group="OSM") %>%
    # Base groups
    addProviderTiles(providers$Stamen.Toner, group = "Toner") %>%
    addProviderTiles(providers$Stamen.TonerLite, group = "Toner Lite") %>%
    addProviderTiles('Esri.WorldImagery', group = "ESRI") %>%
    # add markers
    addMarkers(icon = stupidIcon,
               popup = tweet_text_list,
               group = "Tweets") %>%
    # Layers control
    addLayersControl(
        baseGroups = c("OSM", "Toner", "Toner Lite", "ESRI"),
        overlayGroups = c("Tweets"),
        options = layersControlOptions(collapsed = FALSE))

This map was produced from 23905 tweets downloaded from Twitter API on 18/10/2018where just 12 where geolocated (0.0501987% out of total number of tweets).