10/05/2020

TRABAJO PRACTICO N°3

CIENCIA DE DATOS PARA CIUDADES II: Descargando y analizando datos de redes sociales

Snyders, Federico / Vargas, Juan

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.3     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(leaflet)
library(rtweet)
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
## 
##     flatten
twitter_token <- create_token(
  app = "UrbanaTweets",
  consumer_key = "ZHR5NrUO3WaPrRhYOQaQ2Y31W",
  consumer_secret = "ym7M6Iy2euHw9ptQ8OpsK4CKiftuKHjhUt5Cbt3T2fUKvw1vuJ",
  access_token = "1257472379095302152-CLsbhnqKyYbBKO6ix4PnUFxG5Hmpnw", 
  access_secret = "fZt2EQu4iptDqJnoMZD1Vd1lwZHT8HwjHUTzR5wEZzkWy")

#Buscamos los tweets en un radio de 20 millas del estadio Stinky Socks Hockey of BOSTON

hockey_BOSTON <- search_tweets(q = "hockey",
              geocode = "42.338795,-71.093804,20mi",
              include_rts = FALSE,
              n = 10000,
              retryonratelimit = TRUE)
users_data(hockey_BOSTON) %>% head()
## # A tibble: 6 x 20
##   user_id screen_name name  location description url   protected followers_count
##   <chr>   <chr>       <chr> <chr>    <chr>       <chr> <lgl>               <int>
## 1 322598… msheehan01… Mich… Boston,… ""          <NA>  FALSE                 183
## 2 387253… IceHockeyD… Mont… Boston,… ""          <NA>  FALSE                 222
## 3 387253… IceHockeyD… Mont… Boston,… ""          <NA>  FALSE                 222
## 4 387253… IceHockeyD… Mont… Boston,… ""          <NA>  FALSE                 222
## 5 387253… IceHockeyD… Mont… Boston,… ""          <NA>  FALSE                 222
## 6 352509… skg_18      Sarah Boston,… "Co-host @… http… FALSE                8200
## # … with 12 more variables: friends_count <int>, listed_count <int>,
## #   statuses_count <int>, favourites_count <int>, account_created_at <dttm>,
## #   verified <lgl>, profile_url <chr>, profile_expanded_url <chr>,
## #   account_lang <lgl>, profile_banner_url <chr>, profile_background_url <chr>,
## #   profile_image_url <chr>

#Los 5 usuarios con mas seguidores.

hockey_BOSTON %>% 
    top_n(5, followers_count) %>% 
    arrange(desc(followers_count)) %>% 
    select(screen_name, followers_count, location, text)
## # A tibble: 6 x 4
##   screen_name followers_count location  text                                    
##   <chr>                 <int> <chr>     <chr>                                   
## 1 NHLBruins           1522583 Boston, … "If you missed #WhenHockeyRuledTheHub, …
## 2 NHLBruins           1522583 Boston, … "\"Ladies and gentlemen, you are having…
## 3 NHLBruins           1522583 Boston, … "“It got to the point where they put st…
## 4 BostonGlobe          776160 Boston, … "Hockey bloodlines intersected on that …
## 5 BostonGlobe          776160 Boston, … "Inside the 'NASA of hockey.’ A look be…
## 6 BostonGlobe          776160 Boston, … "A first-timer’s first impressions of c…
ggplot(filter(hockey_BOSTON, !is_retweet))+
    geom_histogram(aes(x = retweet_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

hockey_BOSTON %>% 
    filter(!is_retweet) %>% 
    filter(retweet_count == max(retweet_count)) %>% 
    select(screen_name, retweet_count, followers_count, location, text)
## # A tibble: 1 x 5
##   screen_name retweet_count followers_count location  text                      
##   <chr>               <int>           <int> <chr>     <chr>                     
## 1 NHLBruins              40         1522583 Boston, … "\"Ladies and gentlemen, …

#Frecuencia de los tweets por hora

ts_plot(hockey_BOSTON, "hours")+
  theme_minimal()+
  labs(title= "Frecuencia de tweets por hora",
       subtitle = "Menciones de la palabra ´HOCKEY´ en Boston",
       caption = "Fuente: twitter",
       x = "horas",
       y = "cantidad")

#Procedencia de los usuarios de Boston y cercanias

hockey_BOSTON %>%
    filter(location != "", !is.na(location)) %>% 
    count(location) %>% 
    top_n(20, n) %>% 
    ggplot() +
      geom_col(aes(x = reorder(location, n), y = n)) + 
      coord_flip() +
      labs(title = "Procedencia de los usuarios",
           subtitle = "Boston y cercanías",
           x = "ubicación",
           y = "cantidad",
           caption= "Fuente: twitter")+
  theme_minimal()

Para trabajar con un cvs eliminamos las observaciones que genera twitter y agregamos dos columnas al dataset con infromación para georreferenciar.

hockey_BOSTON2 <- lat_lng(hockey_BOSTON)
hockey_BOSTON2 <- hockey_BOSTON2 %>% 
    select(-geo_coords, -coords_coords, -bbox_coords)
hockey_BOSTON_GEO <- hockey_BOSTON2 %>% 
    filter(!is.na(lat), !is.na(lng))
nrow(hockey_BOSTON_GEO)
## [1] 49

#Creamos un box con logitudes y latitudes

bbox_tweets <- make_bbox(lon = hockey_BOSTON_GEO$lng, lat = hockey_BOSTON_GEO$lat)

bbox_tweets
##      left    bottom     right       top 
## -71.46959  42.10538 -70.77248  42.61592

#Lo descargamos los datos usando un zoom de x11 y después lo graficamos

mapa_tweets <- get_stamenmap(bbox_tweets, zoom = 11)
## Source : http://tile.stamen.com/terrain/11/617/755.png
## Source : http://tile.stamen.com/terrain/11/618/755.png
## Source : http://tile.stamen.com/terrain/11/619/755.png
## Source : http://tile.stamen.com/terrain/11/620/755.png
## Source : http://tile.stamen.com/terrain/11/621/755.png
## Source : http://tile.stamen.com/terrain/11/617/756.png
## Source : http://tile.stamen.com/terrain/11/618/756.png
## Source : http://tile.stamen.com/terrain/11/619/756.png
## Source : http://tile.stamen.com/terrain/11/620/756.png
## Source : http://tile.stamen.com/terrain/11/621/756.png
## Source : http://tile.stamen.com/terrain/11/617/757.png
## Source : http://tile.stamen.com/terrain/11/618/757.png
## Source : http://tile.stamen.com/terrain/11/619/757.png
## Source : http://tile.stamen.com/terrain/11/620/757.png
## Source : http://tile.stamen.com/terrain/11/621/757.png
## Source : http://tile.stamen.com/terrain/11/617/758.png
## Source : http://tile.stamen.com/terrain/11/618/758.png
## Source : http://tile.stamen.com/terrain/11/619/758.png
## Source : http://tile.stamen.com/terrain/11/620/758.png
## Source : http://tile.stamen.com/terrain/11/621/758.png
## Source : http://tile.stamen.com/terrain/11/617/759.png
## Source : http://tile.stamen.com/terrain/11/618/759.png
## Source : http://tile.stamen.com/terrain/11/619/759.png
## Source : http://tile.stamen.com/terrain/11/620/759.png
## Source : http://tile.stamen.com/terrain/11/621/759.png
ggmap(mapa_tweets)

#Ahora usamos Toner Lite como interface de statemap

hockey_BOSTON_line<- get_stamenmap(bbox_tweets, maptype = "terrain-lines", zoom = 11)
## Source : http://tile.stamen.com/terrain-lines/11/617/755.png
## Source : http://tile.stamen.com/terrain-lines/11/618/755.png
## Source : http://tile.stamen.com/terrain-lines/11/619/755.png
## Source : http://tile.stamen.com/terrain-lines/11/620/755.png
## Source : http://tile.stamen.com/terrain-lines/11/621/755.png
## Source : http://tile.stamen.com/terrain-lines/11/617/756.png
## Source : http://tile.stamen.com/terrain-lines/11/618/756.png
## Source : http://tile.stamen.com/terrain-lines/11/619/756.png
## Source : http://tile.stamen.com/terrain-lines/11/620/756.png
## Source : http://tile.stamen.com/terrain-lines/11/621/756.png
## Source : http://tile.stamen.com/terrain-lines/11/617/757.png
## Source : http://tile.stamen.com/terrain-lines/11/618/757.png
## Source : http://tile.stamen.com/terrain-lines/11/619/757.png
## Source : http://tile.stamen.com/terrain-lines/11/620/757.png
## Source : http://tile.stamen.com/terrain-lines/11/621/757.png
## Source : http://tile.stamen.com/terrain-lines/11/617/758.png
## Source : http://tile.stamen.com/terrain-lines/11/618/758.png
## Source : http://tile.stamen.com/terrain-lines/11/619/758.png
## Source : http://tile.stamen.com/terrain-lines/11/620/758.png
## Source : http://tile.stamen.com/terrain-lines/11/621/758.png
## Source : http://tile.stamen.com/terrain-lines/11/617/759.png
## Source : http://tile.stamen.com/terrain-lines/11/618/759.png
## Source : http://tile.stamen.com/terrain-lines/11/619/759.png
## Source : http://tile.stamen.com/terrain-lines/11/620/759.png
## Source : http://tile.stamen.com/terrain-lines/11/621/759.png
ggmap(hockey_BOSTON_line)

hockey_BOSTON_TL <- get_stamenmap(bbox_tweets, maptype = "toner-lite", zoom = 11)
## Source : http://tile.stamen.com/toner-lite/11/617/755.png
## Source : http://tile.stamen.com/toner-lite/11/618/755.png
## Source : http://tile.stamen.com/toner-lite/11/619/755.png
## Source : http://tile.stamen.com/toner-lite/11/620/755.png
## Source : http://tile.stamen.com/toner-lite/11/621/755.png
## Source : http://tile.stamen.com/toner-lite/11/617/756.png
## Source : http://tile.stamen.com/toner-lite/11/618/756.png
## Source : http://tile.stamen.com/toner-lite/11/619/756.png
## Source : http://tile.stamen.com/toner-lite/11/620/756.png
## Source : http://tile.stamen.com/toner-lite/11/621/756.png
## Source : http://tile.stamen.com/toner-lite/11/617/757.png
## Source : http://tile.stamen.com/toner-lite/11/618/757.png
## Source : http://tile.stamen.com/toner-lite/11/619/757.png
## Source : http://tile.stamen.com/toner-lite/11/620/757.png
## Source : http://tile.stamen.com/toner-lite/11/621/757.png
## Source : http://tile.stamen.com/toner-lite/11/617/758.png
## Source : http://tile.stamen.com/toner-lite/11/618/758.png
## Source : http://tile.stamen.com/toner-lite/11/619/758.png
## Source : http://tile.stamen.com/toner-lite/11/620/758.png
## Source : http://tile.stamen.com/toner-lite/11/621/758.png
## Source : http://tile.stamen.com/toner-lite/11/617/759.png
## Source : http://tile.stamen.com/toner-lite/11/618/759.png
## Source : http://tile.stamen.com/toner-lite/11/619/759.png
## Source : http://tile.stamen.com/toner-lite/11/620/759.png
## Source : http://tile.stamen.com/toner-lite/11/621/759.png
ggmap(hockey_BOSTON_TL)

ggmap(hockey_BOSTON_TL) +
    geom_point(data = hockey_BOSTON_GEO, aes(x = lng, y = lat))+
  labs(title = "Tweets que mencionan la palabra HOCKEY",
           subtitle = "Boston y cercanías",
           x = "longitud",
           y = "latitud",
           caption= "Fuente: twitter")

ggmap(hockey_BOSTON_line) + 
    geom_point(data = hockey_BOSTON_GEO, 
               aes(x = lng, y = lat, color = followers_count)) +
    scale_color_distiller(palette = "Spectral")

#Mostramos con mayor claridad los usuarios con mas seguidores

hockey_BOSTON_Ag <- arrange(hockey_BOSTON_GEO, followers_count)
ggmap(hockey_BOSTON_TL) + 
    geom_point(data = hockey_BOSTON_Ag, 
               aes(x = lng, y = lat, color = followers_count)) +
    scale_color_distiller(palette = "Spectral")

#Tambien observamos tweets más retwitteados

ggmap(hockey_BOSTON_TL) + 
    geom_point(data = hockey_BOSTON_Ag, 
               aes(x = lng, y = lat, color = followers_count, size = retweet_count),
               alpha = .5) +
    scale_color_distiller(palette = "Spectral")

#Cremos un mapa interactivo para mostrar la informacion de TWITTER

paleta <- colorNumeric(
  palette = "viridis",
  domain = hockey_BOSTON_Ag$followers_count)
leaflet(hockey_BOSTON_Ag) %>% 
    addTiles() %>% 
    addCircleMarkers(popup = ~text,
                     color = ~paleta(followers_count)) %>% 
  
    addLegend(title = "seguidores", pal = paleta, values = ~followers_count)
## Assuming "lng" and "lat" are longitude and latitude, respectively