10/05/2020
TRABAJO PRACTICO N°3
CIENCIA DE DATOS PARA CIUDADES II: Descargando y analizando datos de redes sociales
Snyders, Federico / Vargas, Juan
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.3 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(leaflet)
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
twitter_token <- create_token(
app = "UrbanaTweets",
consumer_key = "ZHR5NrUO3WaPrRhYOQaQ2Y31W",
consumer_secret = "ym7M6Iy2euHw9ptQ8OpsK4CKiftuKHjhUt5Cbt3T2fUKvw1vuJ",
access_token = "1257472379095302152-CLsbhnqKyYbBKO6ix4PnUFxG5Hmpnw",
access_secret = "fZt2EQu4iptDqJnoMZD1Vd1lwZHT8HwjHUTzR5wEZzkWy")
#Buscamos los tweets en un radio de 20 millas del estadio Stinky Socks Hockey of BOSTON
hockey_BOSTON <- search_tweets(q = "hockey",
geocode = "42.338795,-71.093804,20mi",
include_rts = FALSE,
n = 10000,
retryonratelimit = TRUE)
users_data(hockey_BOSTON) %>% head()
## # A tibble: 6 x 20
## user_id screen_name name location description url protected followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <int>
## 1 322598… msheehan01… Mich… Boston,… "" <NA> FALSE 183
## 2 387253… IceHockeyD… Mont… Boston,… "" <NA> FALSE 222
## 3 387253… IceHockeyD… Mont… Boston,… "" <NA> FALSE 222
## 4 387253… IceHockeyD… Mont… Boston,… "" <NA> FALSE 222
## 5 387253… IceHockeyD… Mont… Boston,… "" <NA> FALSE 222
## 6 352509… skg_18 Sarah Boston,… "Co-host @… http… FALSE 8200
## # … with 12 more variables: friends_count <int>, listed_count <int>,
## # statuses_count <int>, favourites_count <int>, account_created_at <dttm>,
## # verified <lgl>, profile_url <chr>, profile_expanded_url <chr>,
## # account_lang <lgl>, profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
#Los 5 usuarios con mas seguidores.
hockey_BOSTON %>%
top_n(5, followers_count) %>%
arrange(desc(followers_count)) %>%
select(screen_name, followers_count, location, text)
## # A tibble: 6 x 4
## screen_name followers_count location text
## <chr> <int> <chr> <chr>
## 1 NHLBruins 1522583 Boston, … "If you missed #WhenHockeyRuledTheHub, …
## 2 NHLBruins 1522583 Boston, … "\"Ladies and gentlemen, you are having…
## 3 NHLBruins 1522583 Boston, … "“It got to the point where they put st…
## 4 BostonGlobe 776160 Boston, … "Hockey bloodlines intersected on that …
## 5 BostonGlobe 776160 Boston, … "Inside the 'NASA of hockey.’ A look be…
## 6 BostonGlobe 776160 Boston, … "A first-timer’s first impressions of c…
ggplot(filter(hockey_BOSTON, !is_retweet))+
geom_histogram(aes(x = retweet_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
hockey_BOSTON %>%
filter(!is_retweet) %>%
filter(retweet_count == max(retweet_count)) %>%
select(screen_name, retweet_count, followers_count, location, text)
## # A tibble: 1 x 5
## screen_name retweet_count followers_count location text
## <chr> <int> <int> <chr> <chr>
## 1 NHLBruins 40 1522583 Boston, … "\"Ladies and gentlemen, …
#Frecuencia de los tweets por hora
ts_plot(hockey_BOSTON, "hours")+
theme_minimal()+
labs(title= "Frecuencia de tweets por hora",
subtitle = "Menciones de la palabra ´HOCKEY´ en Boston",
caption = "Fuente: twitter",
x = "horas",
y = "cantidad")
#Procedencia de los usuarios de Boston y cercanias
hockey_BOSTON %>%
filter(location != "", !is.na(location)) %>%
count(location) %>%
top_n(20, n) %>%
ggplot() +
geom_col(aes(x = reorder(location, n), y = n)) +
coord_flip() +
labs(title = "Procedencia de los usuarios",
subtitle = "Boston y cercanías",
x = "ubicación",
y = "cantidad",
caption= "Fuente: twitter")+
theme_minimal()
hockey_BOSTON2 <- lat_lng(hockey_BOSTON)
hockey_BOSTON2 <- hockey_BOSTON2 %>%
select(-geo_coords, -coords_coords, -bbox_coords)
hockey_BOSTON_GEO <- hockey_BOSTON2 %>%
filter(!is.na(lat), !is.na(lng))
nrow(hockey_BOSTON_GEO)
## [1] 49
#Creamos un box con logitudes y latitudes
bbox_tweets <- make_bbox(lon = hockey_BOSTON_GEO$lng, lat = hockey_BOSTON_GEO$lat)
bbox_tweets
## left bottom right top
## -71.46959 42.10538 -70.77248 42.61592
#Lo descargamos los datos usando un zoom de x11 y después lo graficamos
mapa_tweets <- get_stamenmap(bbox_tweets, zoom = 11)
## Source : http://tile.stamen.com/terrain/11/617/755.png
## Source : http://tile.stamen.com/terrain/11/618/755.png
## Source : http://tile.stamen.com/terrain/11/619/755.png
## Source : http://tile.stamen.com/terrain/11/620/755.png
## Source : http://tile.stamen.com/terrain/11/621/755.png
## Source : http://tile.stamen.com/terrain/11/617/756.png
## Source : http://tile.stamen.com/terrain/11/618/756.png
## Source : http://tile.stamen.com/terrain/11/619/756.png
## Source : http://tile.stamen.com/terrain/11/620/756.png
## Source : http://tile.stamen.com/terrain/11/621/756.png
## Source : http://tile.stamen.com/terrain/11/617/757.png
## Source : http://tile.stamen.com/terrain/11/618/757.png
## Source : http://tile.stamen.com/terrain/11/619/757.png
## Source : http://tile.stamen.com/terrain/11/620/757.png
## Source : http://tile.stamen.com/terrain/11/621/757.png
## Source : http://tile.stamen.com/terrain/11/617/758.png
## Source : http://tile.stamen.com/terrain/11/618/758.png
## Source : http://tile.stamen.com/terrain/11/619/758.png
## Source : http://tile.stamen.com/terrain/11/620/758.png
## Source : http://tile.stamen.com/terrain/11/621/758.png
## Source : http://tile.stamen.com/terrain/11/617/759.png
## Source : http://tile.stamen.com/terrain/11/618/759.png
## Source : http://tile.stamen.com/terrain/11/619/759.png
## Source : http://tile.stamen.com/terrain/11/620/759.png
## Source : http://tile.stamen.com/terrain/11/621/759.png
ggmap(mapa_tweets)
#Ahora usamos Toner Lite como interface de statemap
hockey_BOSTON_line<- get_stamenmap(bbox_tweets, maptype = "terrain-lines", zoom = 11)
## Source : http://tile.stamen.com/terrain-lines/11/617/755.png
## Source : http://tile.stamen.com/terrain-lines/11/618/755.png
## Source : http://tile.stamen.com/terrain-lines/11/619/755.png
## Source : http://tile.stamen.com/terrain-lines/11/620/755.png
## Source : http://tile.stamen.com/terrain-lines/11/621/755.png
## Source : http://tile.stamen.com/terrain-lines/11/617/756.png
## Source : http://tile.stamen.com/terrain-lines/11/618/756.png
## Source : http://tile.stamen.com/terrain-lines/11/619/756.png
## Source : http://tile.stamen.com/terrain-lines/11/620/756.png
## Source : http://tile.stamen.com/terrain-lines/11/621/756.png
## Source : http://tile.stamen.com/terrain-lines/11/617/757.png
## Source : http://tile.stamen.com/terrain-lines/11/618/757.png
## Source : http://tile.stamen.com/terrain-lines/11/619/757.png
## Source : http://tile.stamen.com/terrain-lines/11/620/757.png
## Source : http://tile.stamen.com/terrain-lines/11/621/757.png
## Source : http://tile.stamen.com/terrain-lines/11/617/758.png
## Source : http://tile.stamen.com/terrain-lines/11/618/758.png
## Source : http://tile.stamen.com/terrain-lines/11/619/758.png
## Source : http://tile.stamen.com/terrain-lines/11/620/758.png
## Source : http://tile.stamen.com/terrain-lines/11/621/758.png
## Source : http://tile.stamen.com/terrain-lines/11/617/759.png
## Source : http://tile.stamen.com/terrain-lines/11/618/759.png
## Source : http://tile.stamen.com/terrain-lines/11/619/759.png
## Source : http://tile.stamen.com/terrain-lines/11/620/759.png
## Source : http://tile.stamen.com/terrain-lines/11/621/759.png
ggmap(hockey_BOSTON_line)
hockey_BOSTON_TL <- get_stamenmap(bbox_tweets, maptype = "toner-lite", zoom = 11)
## Source : http://tile.stamen.com/toner-lite/11/617/755.png
## Source : http://tile.stamen.com/toner-lite/11/618/755.png
## Source : http://tile.stamen.com/toner-lite/11/619/755.png
## Source : http://tile.stamen.com/toner-lite/11/620/755.png
## Source : http://tile.stamen.com/toner-lite/11/621/755.png
## Source : http://tile.stamen.com/toner-lite/11/617/756.png
## Source : http://tile.stamen.com/toner-lite/11/618/756.png
## Source : http://tile.stamen.com/toner-lite/11/619/756.png
## Source : http://tile.stamen.com/toner-lite/11/620/756.png
## Source : http://tile.stamen.com/toner-lite/11/621/756.png
## Source : http://tile.stamen.com/toner-lite/11/617/757.png
## Source : http://tile.stamen.com/toner-lite/11/618/757.png
## Source : http://tile.stamen.com/toner-lite/11/619/757.png
## Source : http://tile.stamen.com/toner-lite/11/620/757.png
## Source : http://tile.stamen.com/toner-lite/11/621/757.png
## Source : http://tile.stamen.com/toner-lite/11/617/758.png
## Source : http://tile.stamen.com/toner-lite/11/618/758.png
## Source : http://tile.stamen.com/toner-lite/11/619/758.png
## Source : http://tile.stamen.com/toner-lite/11/620/758.png
## Source : http://tile.stamen.com/toner-lite/11/621/758.png
## Source : http://tile.stamen.com/toner-lite/11/617/759.png
## Source : http://tile.stamen.com/toner-lite/11/618/759.png
## Source : http://tile.stamen.com/toner-lite/11/619/759.png
## Source : http://tile.stamen.com/toner-lite/11/620/759.png
## Source : http://tile.stamen.com/toner-lite/11/621/759.png
ggmap(hockey_BOSTON_TL)
ggmap(hockey_BOSTON_TL) +
geom_point(data = hockey_BOSTON_GEO, aes(x = lng, y = lat))+
labs(title = "Tweets que mencionan la palabra HOCKEY",
subtitle = "Boston y cercanías",
x = "longitud",
y = "latitud",
caption= "Fuente: twitter")
ggmap(hockey_BOSTON_line) +
geom_point(data = hockey_BOSTON_GEO,
aes(x = lng, y = lat, color = followers_count)) +
scale_color_distiller(palette = "Spectral")
#Mostramos con mayor claridad los usuarios con mas seguidores
hockey_BOSTON_Ag <- arrange(hockey_BOSTON_GEO, followers_count)
ggmap(hockey_BOSTON_TL) +
geom_point(data = hockey_BOSTON_Ag,
aes(x = lng, y = lat, color = followers_count)) +
scale_color_distiller(palette = "Spectral")
#Tambien observamos tweets más retwitteados
ggmap(hockey_BOSTON_TL) +
geom_point(data = hockey_BOSTON_Ag,
aes(x = lng, y = lat, color = followers_count, size = retweet_count),
alpha = .5) +
scale_color_distiller(palette = "Spectral")
#Cremos un mapa interactivo para mostrar la informacion de TWITTER
paleta <- colorNumeric(
palette = "viridis",
domain = hockey_BOSTON_Ag$followers_count)
leaflet(hockey_BOSTON_Ag) %>%
addTiles() %>%
addCircleMarkers(popup = ~text,
color = ~paleta(followers_count)) %>%
addLegend(title = "seguidores", pal = paleta, values = ~followers_count)
## Assuming "lng" and "lat" are longitude and latitude, respectively