#TP3 Capturando y explorando datos de Twitter
Antes de comenzar el TP3 procedemos a instalar los paquetes necesarios para su análisis:
#install.packages("rtweet")
library(rtweet)
## Warning: package 'rtweet' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
Luego creamos el Twitter Token, obtenido a través de las distintas claves provistas por la cuenta de Tweiter desarrollador tramitada en la pagina de la aplicación:
twitter_token <- create_token(
app = "adanage",
consumer_key = "0g1wo8TannvPUNFM6BFIrT1VG",
consumer_secret = "dlGxMevHE7IBvyyMhtumLuazSz1ZzBMAXAk0U9IcHVvA5N2yRw",
access_token = "155912627-mFhFHXNnfAHQayiJm8I9J8f4Xiy7zl8VO55lUA49",
access_secret = "0U06CDJZdn5RNTcH7hr0aoqMEdpgbKjYIj4gl2eZnkmVR")
tweets <- search_tweets(q = "independentzia", n = 3000)
users_data(tweets) %>% head()
## # A tibble: 6 x 20
## user_id screen_name name location description url protected followers_count
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl> <int>
## 1 125232~ TomasBeraza "Sab~ "Baraka~ "Beldur be~ <NA> FALSE 33
## 2 144190~ Igor_XIV "Igo~ "Bizkai~ "Taking a ~ <NA> FALSE 316
## 3 266300~ desdemireb~ "Des~ "PLENTZ~ "Aquí a ve~ http~ FALSE 327
## 4 768128~ arkait_z "Ark~ "Oreret~ "\U0001f59~ <NA> FALSE 289
## 5 869180~ mikel100779 "Mik~ "" "Maialen e~ <NA> FALSE 251
## 6 769896~ nusky2011 "Ore~ "Gipuzk~ "Abertzale~ <NA> FALSE 143
## # ... with 12 more variables: friends_count <int>, listed_count <int>,
## # statuses_count <int>, favourites_count <int>, account_created_at <dttm>,
## # verified <lgl>, profile_url <chr>, profile_expanded_url <chr>,
## # account_lang <lgl>, profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
names(tweets)
## [1] "user_id" "status_id"
## [3] "created_at" "screen_name"
## [5] "text" "source"
## [7] "display_text_width" "reply_to_status_id"
## [9] "reply_to_user_id" "reply_to_screen_name"
## [11] "is_quote" "is_retweet"
## [13] "favorite_count" "retweet_count"
## [15] "quote_count" "reply_count"
## [17] "hashtags" "symbols"
## [19] "urls_url" "urls_t.co"
## [21] "urls_expanded_url" "media_url"
## [23] "media_t.co" "media_expanded_url"
## [25] "media_type" "ext_media_url"
## [27] "ext_media_t.co" "ext_media_expanded_url"
## [29] "ext_media_type" "mentions_user_id"
## [31] "mentions_screen_name" "lang"
## [33] "quoted_status_id" "quoted_text"
## [35] "quoted_created_at" "quoted_source"
## [37] "quoted_favorite_count" "quoted_retweet_count"
## [39] "quoted_user_id" "quoted_screen_name"
## [41] "quoted_name" "quoted_followers_count"
## [43] "quoted_friends_count" "quoted_statuses_count"
## [45] "quoted_location" "quoted_description"
## [47] "quoted_verified" "retweet_status_id"
## [49] "retweet_text" "retweet_created_at"
## [51] "retweet_source" "retweet_favorite_count"
## [53] "retweet_retweet_count" "retweet_user_id"
## [55] "retweet_screen_name" "retweet_name"
## [57] "retweet_followers_count" "retweet_friends_count"
## [59] "retweet_statuses_count" "retweet_location"
## [61] "retweet_description" "retweet_verified"
## [63] "place_url" "place_name"
## [65] "place_full_name" "place_type"
## [67] "country" "country_code"
## [69] "geo_coords" "coords_coords"
## [71] "bbox_coords" "status_url"
## [73] "name" "location"
## [75] "description" "url"
## [77] "protected" "followers_count"
## [79] "friends_count" "listed_count"
## [81] "statuses_count" "favourites_count"
## [83] "account_created_at" "verified"
## [85] "profile_url" "profile_expanded_url"
## [87] "account_lang" "profile_banner_url"
## [89] "profile_background_url" "profile_image_url"
tweets %>%
top_n(5, followers_count) %>%
arrange(desc(followers_count)) %>%
select(screen_name, followers_count, location, text)
## # A tibble: 5 x 4
## screen_name followers_count location text
## <chr> <int> <chr> <chr>
## 1 jpermach 20472 Euskal Herr~ "41 urte betetzen dira JARRAI sortu~
## 2 MartxeloDiaz 12443 Iruñea-Dono~ "41 urte betetzen dira JARRAI sortu~
## 3 matalaz 7012 Basque Coun~ "@Herripublika Independentzia esate~
## 4 bakunin69 4974 Madrid, Esp~ "Frantziar eta espainiar estatu inp~
## 5 bakunin69 4974 Madrid, Esp~ "Espainiar eta Frantziar estatuek e~
options(scipen = 20)
ggplot(tweets) +
geom_histogram(aes(x = followers_count)) + labs(title = "Distribución de los usuarios mas populares en Twitter",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light() +
scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Histogramas de Tweets mas populares:
ggplot(filter(tweets, !is_retweet))+
geom_histogram(aes(x = retweet_count))+ labs(title = "Distribución de los tweets mas populares",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Tweet mas popular:
tweets %>%
filter(!is_retweet) %>%
filter(retweet_count == max(retweet_count)) %>%
select(screen_name, retweet_count, followers_count, location, text)
## # A tibble: 1 x 5
## screen_name retweet_count followers_count location text
## <chr> <int> <int> <chr> <chr>
## 1 Herriexisten~ 25 311 "" "Espainiar eta Frantziar~
Traducido en Español desde Euskera: “Los estados español y francés deciden quién, cuándo, cómo y dónde podemos ir en nuestro país. ¿DÓNDE ESTÁN LAS ESTRUCTURAS ESTATALES AHORA QUE LAS ESTRUCTURAS ESTATALES SE VENDIERON? ¿Dónde está nuestro autogobierno? Es suficiente”
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
tweets <- tweets %>%
mutate(created_at=ymd_hms(created_at))
ts_plot(tweets, "hours") +
labs(title = "Distribución diaria en la que se realiza mayor cantidad de Tweets",
subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
Procedencia de los usuarios:
tweets %>%
filter(location != "", !is.na(location)) %>%
count(location) %>%
top_n(10, n) %>%
ggplot() +
geom_col(aes(x = reorder(location, n), y = n)) +
coord_flip() +
labs(title = "Procedencia de los usuarios",
x = "ubicación",
y = "cantidad", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter") +
theme_light()
Primero instalamos los las funciones necesarias para trabajar los datos y luego mapearlos:
#install.packages("osmdata")
#install.packages("leaflet")
library(osmdata)
## Warning: package 'osmdata' was built under R version 3.6.3
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.3
library(tidyverse)
library(sf)
## Warning: package 'sf' was built under R version 3.6.3
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
Tomamos las coordenadas del Museo Guggenheim Bilbao como centro de la búsqueda. La palabra buscada en los tweets el athletic en relación al club de futbol local.
tweets_fanaticos <- search_tweets(q = "athletic",
geocode = "43.268270,-2.933573,30mi",
include_rts = FALSE,
n = 100000,
retryonratelimit = TRUE)
Extraemos las cordenadas
tweets_fanaticos <- lat_lng(tweets_fanaticos)
tweets_fanaticos <- tweets_fanaticos %>%
select(-geo_coords, -coords_coords, -bbox_coords)
tweets_fanaticos_geo <- tweets_fanaticos %>%
filter(!is.na(lat), !is.na(lng))
nrow(tweets_fanaticos_geo)
## [1] 56
Cargamos en paquete necesario
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.6.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
bbox <- make_bbox(lon = tweets_fanaticos_geo$lng, lat = tweets_fanaticos_geo$lat)
bbox
## left bottom right top
## -3.054023 42.999960 -2.366474 43.368245
mapa_bilbao <- get_stamenmap(bbox, zoom = 11)
## Source : http://tile.stamen.com/terrain/11/1006/749.png
## Source : http://tile.stamen.com/terrain/11/1007/749.png
## Source : http://tile.stamen.com/terrain/11/1008/749.png
## Source : http://tile.stamen.com/terrain/11/1009/749.png
## Source : http://tile.stamen.com/terrain/11/1010/749.png
## Source : http://tile.stamen.com/terrain/11/1006/750.png
## Source : http://tile.stamen.com/terrain/11/1007/750.png
## Source : http://tile.stamen.com/terrain/11/1008/750.png
## Source : http://tile.stamen.com/terrain/11/1009/750.png
## Source : http://tile.stamen.com/terrain/11/1010/750.png
## Source : http://tile.stamen.com/terrain/11/1006/751.png
## Source : http://tile.stamen.com/terrain/11/1007/751.png
## Source : http://tile.stamen.com/terrain/11/1008/751.png
## Source : http://tile.stamen.com/terrain/11/1009/751.png
## Source : http://tile.stamen.com/terrain/11/1010/751.png
## Source : http://tile.stamen.com/terrain/11/1006/752.png
## Source : http://tile.stamen.com/terrain/11/1007/752.png
## Source : http://tile.stamen.com/terrain/11/1008/752.png
## Source : http://tile.stamen.com/terrain/11/1009/752.png
## Source : http://tile.stamen.com/terrain/11/1010/752.png
ggmap(mapa_bilbao)
mapa_Bilbao <- get_stamenmap(bbox, maptype = "toner-lite", zoom = 11)
## Source : http://tile.stamen.com/toner-lite/11/1006/749.png
## Source : http://tile.stamen.com/toner-lite/11/1007/749.png
## Source : http://tile.stamen.com/toner-lite/11/1008/749.png
## Source : http://tile.stamen.com/toner-lite/11/1009/749.png
## Source : http://tile.stamen.com/toner-lite/11/1010/749.png
## Source : http://tile.stamen.com/toner-lite/11/1006/750.png
## Source : http://tile.stamen.com/toner-lite/11/1007/750.png
## Source : http://tile.stamen.com/toner-lite/11/1008/750.png
## Source : http://tile.stamen.com/toner-lite/11/1009/750.png
## Source : http://tile.stamen.com/toner-lite/11/1010/750.png
## Source : http://tile.stamen.com/toner-lite/11/1006/751.png
## Source : http://tile.stamen.com/toner-lite/11/1007/751.png
## Source : http://tile.stamen.com/toner-lite/11/1008/751.png
## Source : http://tile.stamen.com/toner-lite/11/1009/751.png
## Source : http://tile.stamen.com/toner-lite/11/1010/751.png
## Source : http://tile.stamen.com/toner-lite/11/1006/752.png
## Source : http://tile.stamen.com/toner-lite/11/1007/752.png
## Source : http://tile.stamen.com/toner-lite/11/1008/752.png
## Source : http://tile.stamen.com/toner-lite/11/1009/752.png
## Source : http://tile.stamen.com/toner-lite/11/1010/752.png
ggmap(mapa_Bilbao)
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo, aes(x = lng, y = lat), size=4, color="salmon", alpha=.5)
tweets_fanaticos_geo <- arrange(tweets_fanaticos_geo, followers_count)
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo,
aes(x = lng, y = lat, color = followers_count), size=5, alpha=.5) +
labs(title = "Tweet según popularidad del usuario",
x = "Longitud",
y = "Latitud", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter")+
scale_color_distiller(palette = "Spectral")
ggmap(mapa_Bilbao) +
geom_point(data = tweets_fanaticos_geo,
aes(x = lng, y = lat, color = followers_count, size = retweet_count),
alpha = .5) +
labs(title = "Tweet según popularidad del usuario y cantidad de retweets",
x = "Longitud",
y = "Latitud", subtitle = "Ciudad de Bilbao, España",
caption = "Fuente: Database Desarrollo de Twitter")+
scale_color_distiller(palette = "Spectral")
Realizamos mapas interactivos con los tweets descargados
paleta <- colorNumeric(
palette = "viridis",
domain = tweets_fanaticos_geo$followers_count)
leaflet(tweets_fanaticos_geo) %>%
addTiles() %>%
addCircleMarkers(radius = ~retweet_count,
popup = ~text,
color = ~paleta(followers_count)) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addLegend(title = "seguidores", pal = paleta, values = ~followers_count)
## Assuming "lng" and "lat" are longitude and latitude, respectively
## damos por finalizado el TP3.-