#TP3 Capturando y explorando datos de Twitter
Antes de comenzar el TP3 procedemos a instalar los paquetes necesarios para su análisis:
#install.packages("rtweet")
library(rtweet)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
Luego creamos el Twitter Token, obtenido a través de las distintas claves provistas por la cuenta de Tweiter desarrollador tramitada en la pagina de la aplicación:
twitter_token <- create_token(
app = "adanage",
consumer_key = "0g1wo8TannvPUNFM6BFIrT1VG",
consumer_secret = "dlGxMevHE7IBvyyMhtumLuazSz1ZzBMAXAk0U9IcHVvA5N2yRw",
access_token = "155912627-mFhFHXNnfAHQayiJm8I9J8f4Xiy7zl8VO55lUA49",
access_secret = "0U06CDJZdn5RNTcH7hr0aoqMEdpgbKjYIj4gl2eZnkmVR")
tweets <- search_tweets(q = "independentzia", n = 3000)
users_data(tweets) %>% head()
## # A tibble: 6 x 20
## user_id screen_name name location description url protected followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <int>
## 1 154794… bixotzlurr… carx… "Mundua" "Indudable… <NA> FALSE 231
## 2 122751… Iker_Bagli… Iker "" "" <NA> FALSE 74
## 3 337763… melkorober… Mike… "Euskal… "Aita,athl… <NA> FALSE 880
## 4 888490… mariaamorv… Marí… "Mexico… "Yo tambié… <NA> FALSE 672
## 5 796463… SortuEibar Sort… "Eibar,… "" http… FALSE 102
## 6 947632… Mystic_Riv… Pabl… "Tabarn… "...navarr… <NA> FALSE 215
## # … with 12 more variables: friends_count <int>, listed_count <int>,
## # statuses_count <int>, favourites_count <int>, account_created_at <dttm>,
## # verified <lgl>, profile_url <chr>, profile_expanded_url <chr>,
## # account_lang <lgl>, profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
names(tweets)
## [1] "user_id" "status_id"
## [3] "created_at" "screen_name"
## [5] "text" "source"
## [7] "display_text_width" "reply_to_status_id"
## [9] "reply_to_user_id" "reply_to_screen_name"
## [11] "is_quote" "is_retweet"
## [13] "favorite_count" "retweet_count"
## [15] "quote_count" "reply_count"
## [17] "hashtags" "symbols"
## [19] "urls_url" "urls_t.co"
## [21] "urls_expanded_url" "media_url"
## [23] "media_t.co" "media_expanded_url"
## [25] "media_type" "ext_media_url"
## [27] "ext_media_t.co" "ext_media_expanded_url"
## [29] "ext_media_type" "mentions_user_id"
## [31] "mentions_screen_name" "lang"
## [33] "quoted_status_id" "quoted_text"
## [35] "quoted_created_at" "quoted_source"
## [37] "quoted_favorite_count" "quoted_retweet_count"
## [39] "quoted_user_id" "quoted_screen_name"
## [41] "quoted_name" "quoted_followers_count"
## [43] "quoted_friends_count" "quoted_statuses_count"
## [45] "quoted_location" "quoted_description"
## [47] "quoted_verified" "retweet_status_id"
## [49] "retweet_text" "retweet_created_at"
## [51] "retweet_source" "retweet_favorite_count"
## [53] "retweet_retweet_count" "retweet_user_id"
## [55] "retweet_screen_name" "retweet_name"
## [57] "retweet_followers_count" "retweet_friends_count"
## [59] "retweet_statuses_count" "retweet_location"
## [61] "retweet_description" "retweet_verified"
## [63] "place_url" "place_name"
## [65] "place_full_name" "place_type"
## [67] "country" "country_code"
## [69] "geo_coords" "coords_coords"
## [71] "bbox_coords" "status_url"
## [73] "name" "location"
## [75] "description" "url"
## [77] "protected" "followers_count"
## [79] "friends_count" "listed_count"
## [81] "statuses_count" "favourites_count"
## [83] "account_created_at" "verified"
## [85] "profile_url" "profile_expanded_url"
## [87] "account_lang" "profile_banner_url"
## [89] "profile_background_url" "profile_image_url"
tweets %>%
top_n(5, followers_count) %>%
arrange(desc(followers_count)) %>%
select(screen_name, followers_count, location, text)
## # A tibble: 6 x 4
## screen_name followers_count location text
## <chr> <int> <chr> <chr>
## 1 jpermach 20468 Euskal Herria "41 urte betetzen dira JARRAI so…
## 2 MartxeloDiaz 12442 Iruñea-Donost… "41 urte betetzen dira JARRAI so…
## 3 matalaz 7011 Basque Country "@Herripublika Independentzia es…
## 4 General_RE_L… 6068 Huarte - Uhar… "Kaixo, ongi etorriak EAJ-PNV al…
## 5 bakunin69 4977 Madrid, Españ… "Frantziar eta espainiar estatu …
## 6 bakunin69 4977 Madrid, Españ… "Espainiar eta Frantziar estatue…
options(scipen = 20)
ggplot(tweets) +
geom_histogram(aes(x = followers_count)) + labs(title = "Distribución de los usuarios mas populares en Twitter",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()+scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Histogramas de Tweets mas populares:
ggplot(filter(tweets, !is_retweet))+
geom_histogram(aes(x = retweet_count))+ labs(title = "Distribución de los tweets mas populares",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Tweet mas popular:
tweets %>%
filter(!is_retweet) %>%
filter(retweet_count == max(retweet_count)) %>%
select(screen_name, retweet_count, followers_count, location, text)
## # A tibble: 1 x 5
## screen_name retweet_count followers_count location text
## <chr> <int> <int> <chr> <chr>
## 1 Herriexisten… 25 311 "" "Espainiar eta Frantziar…
Traducido en Español desde Euskera: “Los estados español y francés deciden quién, cuándo, cómo y dónde podemos ir en nuestro país. ¿DÓNDE ESTÁN LAS ESTRUCTURAS ESTATALES AHORA QUE LAS ESTRUCTURAS ESTATALES SE VENDIERON? ¿Dónde está nuestro autogobierno? Es suficiente”
ts_plot(tweets, "hours") +
labs(title = "Distribución diaria en la que se realiza mayor cantidad de Tweets",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
Procedencia de los usuarios:
tweets %>%
filter(location != "", !is.na(location)) %>%
count(location) %>%
top_n(10, n) %>%
ggplot() +
geom_col(aes(x = reorder(location, n), y = n)) +
coord_flip() +
labs(title = "Procedencia de los usuarios",
x = "ubicación",
y = "cantidad", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
Primero instalamos los las funciones necesarias para trabajar los datos y luego mapearlos:
#install.packages("osmdata")
#install.packages("leaflet")
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(leaflet)
library(tidyverse)
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
Tomamos las coordenadas del Museo Guggenheim Bilbao como centro de la búsqueda.
tweets_fanaticos <- search_tweets(q = "athletic",
geocode = "43.268270,-2.933573,20mi",
include_rts = FALSE,
n = 100000,
retryonratelimit = TRUE)
Extraemos las cordenadas
tweets_fanaticos <- lat_lng(tweets_fanaticos)
tweets_fanaticos <- tweets_fanaticos %>%
select(-geo_coords, -coords_coords, -bbox_coords)
tweets_fanaticos_geo <- tweets_fanaticos %>%
filter(!is.na(lat), !is.na(lng))
nrow(tweets_fanaticos_geo)
## [1] 61
Cargamos en paquete necesario
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
bbox <- make_bbox(lon = tweets_fanaticos_geo$lng, lat = tweets_fanaticos_geo$lat)
bbox
## left bottom right top
## -3.037672 42.999960 -2.709831 43.368245
mapa_bilbao <- get_stamenmap(bbox, zoom = 11)
## Source : http://tile.stamen.com/terrain/11/1006/749.png
## Source : http://tile.stamen.com/terrain/11/1007/749.png
## Source : http://tile.stamen.com/terrain/11/1008/749.png
## Source : http://tile.stamen.com/terrain/11/1006/750.png
## Source : http://tile.stamen.com/terrain/11/1007/750.png
## Source : http://tile.stamen.com/terrain/11/1008/750.png
## Source : http://tile.stamen.com/terrain/11/1006/751.png
## Source : http://tile.stamen.com/terrain/11/1007/751.png
## Source : http://tile.stamen.com/terrain/11/1008/751.png
## Source : http://tile.stamen.com/terrain/11/1006/752.png
## Source : http://tile.stamen.com/terrain/11/1007/752.png
## Source : http://tile.stamen.com/terrain/11/1008/752.png
ggmap(mapa_bilbao)
mapa_Bilbao <- get_stamenmap(bbox, maptype = "toner-lite", zoom = 11)
## Source : http://tile.stamen.com/toner-lite/11/1006/749.png
## Source : http://tile.stamen.com/toner-lite/11/1007/749.png
## Source : http://tile.stamen.com/toner-lite/11/1008/749.png
## Source : http://tile.stamen.com/toner-lite/11/1006/750.png
## Source : http://tile.stamen.com/toner-lite/11/1007/750.png
## Source : http://tile.stamen.com/toner-lite/11/1008/750.png
## Source : http://tile.stamen.com/toner-lite/11/1006/751.png
## Source : http://tile.stamen.com/toner-lite/11/1007/751.png
## Source : http://tile.stamen.com/toner-lite/11/1008/751.png
## Source : http://tile.stamen.com/toner-lite/11/1006/752.png
## Source : http://tile.stamen.com/toner-lite/11/1007/752.png
## Source : http://tile.stamen.com/toner-lite/11/1008/752.png
ggmap(mapa_Bilbao)
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo, aes(x = lng, y = lat))
tweets_fanaticos_geo <- arrange(tweets_fanaticos_geo, followers_count)
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo,
aes(x = lng, y = lat, color = followers_count)) +
labs(title = "Tweet según popularidad del usuario",
x = "Longitud",
y = "Latitud", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter")+
scale_color_distiller(palette = "Spectral")
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo,
aes(x = lng, y = lat, color = followers_count, size = retweet_count),
alpha = .5) +
labs(title = "Tweet según popularidad del usuario y cantidad de retweets",
x = "Longitud",
y = "Latitud", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter")+
scale_color_distiller(palette = "Spectral")
Realizamos mapas interactivos con los tweets descargados
paleta <- colorNumeric(
palette = "viridis",
domain = tweets_fanaticos_geo$followers_count)
leaflet(tweets_fanaticos_geo) %>%
addTiles() %>%
addCircleMarkers(radius = ~retweet_count,
popup = ~text,
color = ~paleta(followers_count)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addLegend(title = "seguidores", pal = paleta, values = ~followers_count)
## Assuming "lng" and "lat" are longitude and latitude, respectively