This document is made for the Course Data Project of week 2 (Developing Data Products). It has to cointain almost this two things: – A published webmap made with leaflet. – A date of production.
Here we are going to create a map of tweets where word “stupid” or “estupid@” where said.
This are the main steps:
Go to the R file Twitter token and use it to connecto to Twitter API. Then load libraries.
# load libraries
library(devtools)
library(ROAuth)
library(rtweet)
library(ggplot2)
library(dplyr)
library(tidytext)
library(rlist)
library(leaflet)
Search for tweets with the word “stupid” or “estupid”
#get current system time values in order to define the csv where tweets will be saved and also avoid
#redo petitions to twitter if there ir a petition was made within the last hour
t <- toString(Sys.time())
Y <- substr(t,1,4); M <- substr(t,6,7); D <- substr(t,9,10)
h <- substr(t,12,13); m <- substr(t,15,16); s <- substr(t, 18,19)
file.Rda <- paste0(getwd(),"//Twitter_data//Twitter_data_",Y,M,D,"_",h,m,s,".Rda")
## check file within the folder
paths <- dir(paste0(getwd(),"//Twitter_data"), full.names=TRUE)
last_tweet_download <- tail(file.info(paths)$ctime,1)
#if last tweet petition was within the last hour dont't download again and use the last csv file saved
#time difference from last tweets
#careful this way just works working in the same computer
dif <- difftime(Sys.time(),last_tweet_download,units='hours')
#now conditions
if (dif<1) {
load(paths[length(paths)])
} else {
q <- "stupid OR estupid" #query word that you want to look for
n <- 25000 #is the number of tweets to be returned
tweets <- search_tweets(q=q, #query word that you want to look for
n=n,
#lang = "en",
include_rts = F #exclude retweets
)
df <- data.frame(tweets)
save(df,file=file.Rda)
}
Subset tweets keeping just those with coordinates
tgeo <- df["c(NA, NA)" != df$geo_coords,] #pick tweets with geo_coords
tcoords <- df["c(NA, NA)" != df$coords_coords,]
## % of tweets with geo_coords
Nt <- dim(df)[1]
Ngeo <- dim(tgeo)[1]
Pgeo <- (dim(tgeo)[1]/dim(df)[1])*100
Create a map of geotagged tweets related anyhow with ‘stupidity’.
## points to be mapped
y <- c()
x <- c()
for (i in 1:nrow(tgeo)) {
y[[i]] <- tgeo$geo_coords[[i]][1]
x[[i]] <- tgeo$geo_coords[[i]][2]
}
tweet_coords <- data.frame(
lat = y,
lng = x)
#icons to be mapped
#url to thumnail of the profile is in $profile_image_url
jpg_profile_list <- character()
for (i in 1:nrow(tgeo)){
jpg_URL <- tgeo[i,]$profile_image_url
jpg_profile_list[[i]] <- jpg_URL
}
stupidIcon <- makeIcon(
iconUrl = jpg_profile_list,
iconWidth = 31*215/230, iconHeight = 31,
iconAnchorX = 31*215/230/2, iconAnchorY = 16
)
#tweets text to be mapped
#it shows the tweet on the map and the link directs you to the author page
#link to tweet follows this # schema:"https://twitter.com/thecardiffcwtch/status/1052632880655060992"
#"https://twitter.com/$screen_name/status/$status_id"
tweet_text_list <- character()
for (i in 1:nrow(tgeo)){
tweet_URL <- paste0("https://twitter.com/",tgeo[i,]$screen_name,"/status/",tgeo[i,]$status_id)
tweet_profile <- paste0("@",tgeo[i,]$screen_name)
tweet_text <- tgeo[i,]$text
html_URL_text <- paste0("<a href='",tweet_URL,"'>",paste0(tweet_profile," --> ", tweet_text),"</a>")
tweet_text_list[[i]] <- html_URL_text
}
#MAP
tweet_coords %>%
leaflet(width="100%") %>%
addTiles(group="OSM") %>%
# Base groups
addProviderTiles(providers$Stamen.Toner, group = "Toner") %>%
addProviderTiles(providers$Stamen.TonerLite, group = "Toner Lite") %>%
addProviderTiles('Esri.WorldImagery', group = "ESRI") %>%
# add markers
addMarkers(icon = stupidIcon,
popup = tweet_text_list,
group = "Tweets") %>%
# Layers control
addLayersControl(
baseGroups = c("OSM", "Toner", "Toner Lite", "ESRI"),
overlayGroups = c("Tweets"),
options = layersControlOptions(collapsed = FALSE))
This map was produced from 23905 tweets downloaded from Twitter API on 18/10/2018where just 12 where geolocated (0.0501987% out of total number of tweets).