install.packages(“rtweet”)
library(rtweet)
## Warning: package 'rtweet' was built under R version 3.6.1
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.0 v purrr 0.3.2
## v tibble 2.1.1 v dplyr 0.8.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::lag() masks stats::lag()
(borro el chunk porque no corre bien)
appname <- “JDucos” consumer_key <- “Y1iM4WCRCz3fm0D04Z6T7jT7o” consumer_secret <- “3EfpTuUVAbRnDYscOavbaiZshCnkJuaH7ZRhIqsuH6fO46tr9N”
twitter_token <- create_token( app = appname, consumer_key = consumer_key, consumer_secret = consumer_secret)
tweets2 <- search_tweets(q = "PASO", geocode = "-32.958759,-60.693922,20mi",
include_rts = FALSE,
n = 100000,
retryonratelimit = TRUE)
## Registered S3 method overwritten by 'openssl':
## method from
## print.bytes Rcpp
users_data(tweets2) %>% head()
## # A tibble: 6 x 20
## user_id screen_name name location description url protected
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 116401~ MatiasHern~ Mati~ Rosario~ Soy de aqu~ <NA> FALSE
## 2 199407~ CaroLepros~ "\U0~ "" "<U+27A1> Fanátic~ <NA> FALSE
## 3 199407~ CaroLepros~ "\U0~ "" "<U+27A1> Fanátic~ <NA> FALSE
## 4 831485~ priicentra~ Pris~ Rosario~ "Y cuando ~ <NA> FALSE
## 5 104116~ MackyArico Maca~ "Capitá~ Quiero ser~ <NA> FALSE
## 6 937776~ Florcentra~ "Flo~ Rosario~ "Rosario C~ <NA> FALSE
## # ... with 13 more variables: followers_count <int>, friends_count <int>,
## # listed_count <int>, statuses_count <int>, favourites_count <int>,
## # account_created_at <dttm>, verified <lgl>, profile_url <chr>,
## # profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
names(tweets2)
## [1] "user_id" "status_id"
## [3] "created_at" "screen_name"
## [5] "text" "source"
## [7] "display_text_width" "reply_to_status_id"
## [9] "reply_to_user_id" "reply_to_screen_name"
## [11] "is_quote" "is_retweet"
## [13] "favorite_count" "retweet_count"
## [15] "quote_count" "reply_count"
## [17] "hashtags" "symbols"
## [19] "urls_url" "urls_t.co"
## [21] "urls_expanded_url" "media_url"
## [23] "media_t.co" "media_expanded_url"
## [25] "media_type" "ext_media_url"
## [27] "ext_media_t.co" "ext_media_expanded_url"
## [29] "ext_media_type" "mentions_user_id"
## [31] "mentions_screen_name" "lang"
## [33] "quoted_status_id" "quoted_text"
## [35] "quoted_created_at" "quoted_source"
## [37] "quoted_favorite_count" "quoted_retweet_count"
## [39] "quoted_user_id" "quoted_screen_name"
## [41] "quoted_name" "quoted_followers_count"
## [43] "quoted_friends_count" "quoted_statuses_count"
## [45] "quoted_location" "quoted_description"
## [47] "quoted_verified" "retweet_status_id"
## [49] "retweet_text" "retweet_created_at"
## [51] "retweet_source" "retweet_favorite_count"
## [53] "retweet_retweet_count" "retweet_user_id"
## [55] "retweet_screen_name" "retweet_name"
## [57] "retweet_followers_count" "retweet_friends_count"
## [59] "retweet_statuses_count" "retweet_location"
## [61] "retweet_description" "retweet_verified"
## [63] "place_url" "place_name"
## [65] "place_full_name" "place_type"
## [67] "country" "country_code"
## [69] "geo_coords" "coords_coords"
## [71] "bbox_coords" "status_url"
## [73] "name" "location"
## [75] "description" "url"
## [77] "protected" "followers_count"
## [79] "friends_count" "listed_count"
## [81] "statuses_count" "favourites_count"
## [83] "account_created_at" "verified"
## [85] "profile_url" "profile_expanded_url"
## [87] "account_lang" "profile_banner_url"
## [89] "profile_background_url" "profile_image_url"
Usuarios más populares Según la cantidad de seguidores:
options(scipen = 20)
ggplot(tweets2) +
geom_histogram(aes(x = followers_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Top 5 de los usuarios más populares
tweets2 %>%
top_n(5, followers_count) %>%
arrange(desc(followers_count)) %>%
select(screen_name, followers_count, location, text)
## # A tibble: 20 x 4
## screen_name followers_count location text
## <chr> <int> <chr> <chr>
## 1 MuniRosario 319950 Rosario, Ar~ "Hoy celebramos el paso a la i~
## 2 lacapital 280306 Rosario, Ar~ "\U0001f5f3 #Opinión En el gob~
## 3 lacapital 280306 Rosario, Ar~ "<U+26BD>\U0001f536\U0001f537 #Centra~
## 4 lacapital 280306 Rosario, Ar~ "\U0001f4e6\U0001f5e3 #Eleccio~
## 5 lacapital 280306 Rosario, Ar~ "\U0001f62f\U0001f5f3 Dady Bri~
## 6 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f5e3\U0001f4b~
## 7 lacapital 280306 Rosario, Ar~ "\U0001f4e3\U0001f4b0 #economi~
## 8 lacapital 280306 Rosario, Ar~ "\U0001f5f3\U0001f4fa #VIDEO P~
## 9 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f4a5\U0001f69~
## 10 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f1e6\U0001f1f~
## 11 lacapital 280306 Rosario, Ar~ "<U+26BD><U+26AB>\U0001f534 Otra clarísima d~
## 12 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f4da\U0001f91~
## 13 lacapital 280306 Rosario, Ar~ "\U0001f62f\U0001f933 #SupleMá~
## 14 lacapital 280306 Rosario, Ar~ "\U0001f5de<U+FE0F>\U0001f3cd<U+FE0F>\U0001f~
## 15 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f1e6\U0001f1f~
## 16 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f4da\U0001f91~
## 17 lacapital 280306 Rosario, Ar~ "\U0001f534 La madre de Marcos~
## 18 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f5e3 Carrió d~
## 19 lacapital 280306 Rosario, Ar~ "<U+26AB><U+FE0F> <U+26BD><U+FE0F> A los 59 años falleció ~
## 20 lacapital 280306 Rosario, Ar~ "\U0001f534\U0001f4b0 #economí~
Descartar retweets
ggplot(filter(tweets2, !is_retweet))+
geom_histogram(aes(x = retweet_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Identifiquemos el tweet original más que sumó más retweets:
tweets2 %>%
filter(!is_retweet) %>%
filter(retweet_count == max(retweet_count)) %>%
select(screen_name, retweet_count, followers_count, location, text)
## # A tibble: 1 x 5
## screen_name retweet_count followers_count location text
## <chr> <int> <int> <chr> <chr>
## 1 capcentral 1618 21501 "Rosario, \U000~ "El Ministro ~
Horarios con más cantidad de tweets
ts_plot(tweets2, "minutes")
Día con más cantidad de tweets
ts_plot(tweets2, "hours")
Los días con más tweets fueron el Domingo 11 de Agosto a la noche y el Lunes 12 a la mañana, coincidiendo con las PASO
Procedencia
tweets2 %>%
ggplot() +
geom_bar(aes(location)) +
coord_flip() +
labs(title = "Procedencia de los usuarios",
x = "cantidad",
y = "ubicación")
tweets2 %>%
filter(location != "", !is.na(location)) %>%
count(location) %>%
top_n(10, n) %>%
ggplot() +
geom_col(aes(x = reorder(location, n), y = n)) +
coord_flip() +
labs(title = "Procedencia de los usuarios",
x = "ubicación",
y = "cantidad")
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.6.1
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
Extraer la coordenadas para ver tweets georeferenciados
coordenadas <- function(campo_coordenadas) {
extraer_coordenadas <- function(lista_coords) {
data_frame(lon = lista_coords[1],
lat = lista_coords[2])
}
map_df(campo_coordenadas, extraer_coordenadas)
}
tweets2_geor <- tweets2 %>%
cbind(coordenadas(tweets2$coords_coords)) %>%
select(-geo_coords, -coords_coords, -bbox_coords)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
tweets2_geor <- tweets2_geor %>%
filter(!is.na(lat), !is.na(lon))
nrow(tweets2_geor)
## [1] 12
bbox <- c(min(tweets2_geor$lon),
min(tweets2_geor$lat),
max(tweets2_geor$lon),
max(tweets2_geor$lat))
mapabbox <- get_stamenmap(bbox)
## Source : http://tile.stamen.com/terrain/10/338/610.png
## Source : http://tile.stamen.com/terrain/10/339/610.png
## Source : http://tile.stamen.com/terrain/10/338/611.png
## Source : http://tile.stamen.com/terrain/10/339/611.png
## Source : http://tile.stamen.com/terrain/10/338/612.png
## Source : http://tile.stamen.com/terrain/10/339/612.png
ggmap(mapabbox)
mapabbox_toner_lite <- get_stamenmap(bbox, maptype = "toner-lite")
## Source : http://tile.stamen.com/toner-lite/10/338/610.png
## Source : http://tile.stamen.com/toner-lite/10/339/610.png
## Source : http://tile.stamen.com/toner-lite/10/338/611.png
## Source : http://tile.stamen.com/toner-lite/10/339/611.png
## Source : http://tile.stamen.com/toner-lite/10/338/612.png
## Source : http://tile.stamen.com/toner-lite/10/339/612.png
ggmap(mapabbox_toner_lite)
Posición de los Tweets
ggmap(mapabbox_toner_lite) +
geom_point(data = tweets2_geor, aes(x = lon, y = lat))
Popularidad del usuario de cada Tweet
ggmap(mapabbox_toner_lite) +
geom_point(data = tweets2_geor,
aes(x = lon, y = lat, color = followers_count)) +
scale_color_distiller(palette = "Spectral")