###TP4_“ANALIZANDO DATOS ESPACIOTEMPORALES”
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(leaflet)
devtools::install_github("yutannihilation/ggsflabel")
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install Rtools custom from http://cran.r-project.org/bin/windows/Rtools/.
## Skipping install of 'ggsflabel' from a github remote, the SHA1 (a489481b) has not changed since last install.
## Use `force = TRUE` to force installation
library(ggsflabel)
##
## Attaching package: 'ggsflabel'
## The following objects are masked from 'package:ggplot2':
##
## geom_sf_label, geom_sf_text, StatSfCoordinates
tweets_rmba <- fread("tweets_rmba.csv",
encoding = "UTF-8",
header=TRUE,
sep=",",
showProgress=TRUE,
data.table=FALSE)
tweets_rmba <- sample_frac(tweets_rmba, 0.05)
nrow(tweets_rmba)
## [1] 43187
summary(tweets_rmba)
## X Y id in_reply_to_status_id
## Min. :-59.30 Min. :-35.19 Min. :9.182e+17 Min. :3.617e+17
## 1st Qu.:-58.52 1st Qu.:-34.65 1st Qu.:9.337e+17 1st Qu.:9.273e+17
## Median :-58.41 Median :-34.61 Median :9.527e+17 Median :9.431e+17
## Mean :-58.45 Mean :-34.62 Mean :9.548e+17 Mean :9.487e+17
## 3rd Qu.:-58.37 3rd Qu.:-34.58 3rd Qu.:9.755e+17 3rd Qu.:9.687e+17
## Max. :-57.84 Max. :-33.97 Max. :1.003e+18 Max. :1.003e+18
## NA's :41588
## in_reply_to_user_id text created lang
## Min. :8.071e+05 Length:43187 Length:43187 Length:43187
## 1st Qu.:5.065e+08 Class :character Class :character Class :character
## Median :1.180e+09 Mode :character Mode :character Mode :character
## Mean :7.648e+16
## 3rd Qu.:1.180e+09
## Max. :9.979e+17
## NA's :41204
## source user_name user_id user_created
## Length:43187 Length:43187 Min. :1.059e+03 Length:43187
## Class :character Class :character 1st Qu.:1.765e+08 Class :character
## Mode :character Mode :character Median :4.950e+08 Mode :character
## Mean :5.295e+16
## 3rd Qu.:1.383e+09
## Max. :9.960e+17
##
## user_description user_location user_followers user_followed
## Length:43187 Length:43187 Min. : 0 Min. : 0
## Class :character Class :character 1st Qu.: 195 1st Qu.: 83
## Mode :character Mode :character Median : 492 Median : 1213
## Mean : 8380 Mean : 5571
## 3rd Qu.: 1416 3rd Qu.: 5958
## Max. :4924516 Max. :439346
##
####Convertimos el campo “fecha” del dataset de tweets_rmba , nombrado como “created” con la funcion “ymd_hms()”.
tweets_rmba <- tweets_rmba %>% mutate(created = ymd_hms(created))
head(tweets_rmba)
## X Y id in_reply_to_status_id in_reply_to_user_id
## 1 -58.46670 -34.60000 9.337071e+17 NA NA
## 2 -58.39022 -34.60438 9.596084e+17 NA NA
## 3 -58.44256 -34.59396 9.889600e+17 NA NA
## 4 -58.38583 -34.60056 9.902515e+17 NA NA
## 5 -58.38170 -34.60330 9.711877e+17 NA NA
## 6 -58.50743 -34.55959 9.361971e+17 NA NA
## text
## 1 Mi #tbt de hoy es viajando a la eterna primavera noteseme la felicidad de iniciar unas buenas… https://t.co/cvBoH22cRD
## 2 <U+0001F519><U+0001F483><U+0001F525> @JFrankoQG growupdance @jbalvin #reggaeton #jbalvin #dance #dancers #show #growup en Paseo… https://t.co/mck7Ixghdu
## 3 Google de Google, Inc. https://t.co/KuVbieQ6Pk
## 4 Acaba de publicar una foto en Asociación del Fútbol Argentino https://t.co/YjtHwaS0ab
## 5 // @AbiNicolosi @ Buenos Aires, Argentina https://t.co/3QkaylVtWy
## 6 Hola #FullStackTech2017 (@ Tecnópolis - @tecnopolisarg in Villa Martelli, Buenos Aires) https://t.co/zMOE5KsFbx
## created lang source user_name user_id
## 1 2017-11-23 14:41:42 es Instagram MauroRamzz 1.194116e+08
## 2 2018-02-03 02:04:16 en Instagram ggiannina_ 1.720857e+08
## 3 2018-04-25 01:56:51 en iOS PatriciPonce 2.356248e+09
## 4 2018-04-28 15:29:00 es Instagram LACASADED10S 7.916509e+17
## 5 2018-03-07 00:55:59 es Instagram idlet0wn 1.011428e+09
## 6 2017-11-30 11:35:45 es Foursquare luks_ 1.542621e+07
## user_created
## 1 2010/03/03 15:27:29+00
## 2 2010/07/28 21:49:32+00
## 3 2014/02/22 12:00:43+00
## 4 2016/10/27 14:40:58+00
## 5 2012/12/14 16:00:51+00
## 6 2008/07/14 13:17:13+00
## user_description
## 1 Presentador en #InfiltradosTV #ClaroTV #VisionTV magazine web #ContactosTV Estudiante #UQuindío, Bailarin y Modelo apasionado, sencillo y humilde <U+0001F466><U+0001F4AA>
## 2 <U+264C><U+0001F525> . 23.05.2016 <U+2661><U+0001F48F> \n<U+0001F380> http://instagram.com/shivideco <U+0001F380> • Juro solemnemente que mis intenciones no son buenas <U+0001F52E><U+2607>•
## 3 Abogada, ya de mayor, nunca es tarde cuando la dicha es buena. Muy peroncha y peleadora de tacheros pro x eso mejor a pata
## 4 Seguinos también en Facebook/ Follow us too Facebook: La casa de D10S and Instagram: lacasaded10s Dirección/Address: Lascano 2257 La Paternal
## 5
## 6 Love your hair! hope you win!! Archienemigo de la cebolla y el ajo. Creative Director. Views are my own
## user_location user_followers user_followed
## 1 Armenia, Quindío, Colombia 791 2533
## 2 Buenos Aires, Argentina 270 2694
## 3 130 1144
## 4 Buenos Aires Argentina 568 532
## 5 1684 25689
## 6 Buenos Aires, Argentina 1043 3632
####Paso II
options(scipen = 20)
ggplot(tweets_rmba) +
geom_bar(aes(x = year(created)))+
labs(title = "Cantidad de tweets anualmente",
subtitle = "Region Metropolitana de Buenos Aires ",
caption="Fuente: Información de Twitter")
tweets_rmba %>%
filter(year(created) == 2017) %>%
ggplot() +
geom_bar(aes(x = month(created)))+
labs(title = "Cantidad de tweets por mes",
subtitle = "Region Metropolitana de Buenos Aires, octubre a diciembre 2017",
caption="Fuente: Información de Twitter")
tweets_rmba %>%
filter(year(created) == 2018) %>%
ggplot() +
geom_bar(aes(x = month(created)))+
labs(title = "Cantidad de tweets por mes",
subtitle = "Region Metropolitana de Buenos Aires, enero a mayo 2018",
caption="Fuente: Información de Twitter")
tweets_rmba %>%
filter(year(created) == 2017) %>%
ggplot() +
geom_bar(aes(x = wday(created)))+
labs(title = "Distribucion de tweets semanalmente",
subtitle = "Region Metropolitana de Buenos Aires, octubre a diciembre 2017",
caption="Fuente: Información de Twitter")
tweets_rmba %>%
filter(year(created) == 2017) %>%
count(user_location) %>%
top_n(5) %>%
arrange(desc(n))
## Selecting by n
## # A tibble: 5 x 2
## user_location n
## <chr> <int>
## 1 "" 3033
## 2 "Argentina" 2864
## 3 "Buenos Aires, Argentina" 2082
## 4 "Buenos Aires" 723
## 5 "Ciudad Autónoma de Buenos Aire" 710
ubicacion_frecuente <- tweets_rmba%>%
filter(year(created) == 2017) %>%
count(user_location) %>%
top_n(5) %>%
pull(user_location)
## Selecting by n
tweets_rmba %>%
filter(year(created) == 2017) %>%
count(source) %>%
top_n(5) %>%
arrange(desc(n))
## Selecting by n
## # A tibble: 5 x 2
## source n
## <chr> <int>
## 1 Instagram 14327
## 2 Es Tendencia en Argentina 1580
## 3 Twitter for Android 910
## 4 Foursquare 640
## 5 Sandaysoft Cumulus 547
head(tweets_rmba)
## X Y id in_reply_to_status_id
## 1 -58.46670 -34.60000 933707131898883968 NA
## 2 -58.39022 -34.60438 959608445757545984 NA
## 3 -58.44256 -34.59396 988959995424322048 NA
## 4 -58.38583 -34.60056 990251542354841984 NA
## 5 -58.38170 -34.60330 971187671438712960 NA
## 6 -58.50743 -34.55959 936197053704786048 NA
## in_reply_to_user_id
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## text
## 1 Mi #tbt de hoy es viajando a la eterna primavera noteseme la felicidad de iniciar unas buenas… https://t.co/cvBoH22cRD
## 2 <U+0001F519><U+0001F483><U+0001F525> @JFrankoQG growupdance @jbalvin #reggaeton #jbalvin #dance #dancers #show #growup en Paseo… https://t.co/mck7Ixghdu
## 3 Google de Google, Inc. https://t.co/KuVbieQ6Pk
## 4 Acaba de publicar una foto en Asociación del Fútbol Argentino https://t.co/YjtHwaS0ab
## 5 // @AbiNicolosi @ Buenos Aires, Argentina https://t.co/3QkaylVtWy
## 6 Hola #FullStackTech2017 (@ Tecnópolis - @tecnopolisarg in Villa Martelli, Buenos Aires) https://t.co/zMOE5KsFbx
## created lang source user_name user_id
## 1 2017-11-23 14:41:42 es Instagram MauroRamzz 119411584
## 2 2018-02-03 02:04:16 en Instagram ggiannina_ 172085747
## 3 2018-04-25 01:56:51 en iOS PatriciPonce 2356248204
## 4 2018-04-28 15:29:00 es Instagram LACASADED10S 791650904210599936
## 5 2018-03-07 00:55:59 es Instagram idlet0wn 1011428192
## 6 2017-11-30 11:35:45 es Foursquare luks_ 15426207
## user_created
## 1 2010/03/03 15:27:29+00
## 2 2010/07/28 21:49:32+00
## 3 2014/02/22 12:00:43+00
## 4 2016/10/27 14:40:58+00
## 5 2012/12/14 16:00:51+00
## 6 2008/07/14 13:17:13+00
## user_description
## 1 Presentador en #InfiltradosTV #ClaroTV #VisionTV magazine web #ContactosTV Estudiante #UQuindío, Bailarin y Modelo apasionado, sencillo y humilde <U+0001F466><U+0001F4AA>
## 2 <U+264C><U+0001F525> . 23.05.2016 <U+2661><U+0001F48F> \n<U+0001F380> http://instagram.com/shivideco <U+0001F380> • Juro solemnemente que mis intenciones no son buenas <U+0001F52E><U+2607>•
## 3 Abogada, ya de mayor, nunca es tarde cuando la dicha es buena. Muy peroncha y peleadora de tacheros pro x eso mejor a pata
## 4 Seguinos también en Facebook/ Follow us too Facebook: La casa de D10S and Instagram: lacasaded10s Dirección/Address: Lascano 2257 La Paternal
## 5
## 6 Love your hair! hope you win!! Archienemigo de la cebolla y el ajo. Creative Director. Views are my own
## user_location user_followers user_followed
## 1 Armenia, Quindío, Colombia 791 2533
## 2 Buenos Aires, Argentina 270 2694
## 3 130 1144
## 4 Buenos Aires Argentina 568 532
## 5 1684 25689
## 6 Buenos Aires, Argentina 1043 3632
Origen_frecuente <- tweets_rmba%>%
filter(year(created) == 2017) %>%
count(source) %>%
top_n(5) %>%
pull(source)
## Selecting by n
tweets_rmba %>%
filter(year(created) == 2017 ,
source %in% Origen_frecuente ) %>%
ggplot() +
geom_bar(aes(x = month(created), fill = source))+
labs(title = "Distribucion de tweets de acuerdo a su origen",
subtitle = "Region Metropolitana de Buenos Aires, 2017",
caption="Fuente: Información de Twitter")
conteo <- tweets_rmba %>%
filter(year(created) == 2017,
source %in% Origen_frecuente) %>%
count(source, diasemana = wday(created))
ggplot(conteo) +
geom_line(aes(x = diasemana, y = n, group = source, color = source))+
labs(title = "Distribucion de tweets a lo largo de la semana, de acuerdo a su origen",
subtitle = "Region Metropolitana de Buenos Aires, octubre a diciembre 2017",
caption="Fuente: Información de Twitter")
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
tweets_rmba <- tweets_rmba %>%
filter(Y <0, X <0)
bbox <- c(min(tweets_rmba$X, na.rm = TRUE),
min(tweets_rmba$Y, na.rm = TRUE),
max(tweets_rmba$X, na.rm = TRUE),
max(tweets_rmba$Y, na.rm = TRUE))
RMBA <- get_stamenmap(bbox = bbox,
maptype = "toner-lite")
## Source : http://tile.stamen.com/toner-lite/10/343/614.png
## Source : http://tile.stamen.com/toner-lite/10/344/614.png
## Source : http://tile.stamen.com/toner-lite/10/345/614.png
## Source : http://tile.stamen.com/toner-lite/10/346/614.png
## Source : http://tile.stamen.com/toner-lite/10/347/614.png
## Source : http://tile.stamen.com/toner-lite/10/343/615.png
## Source : http://tile.stamen.com/toner-lite/10/344/615.png
## Source : http://tile.stamen.com/toner-lite/10/345/615.png
## Source : http://tile.stamen.com/toner-lite/10/346/615.png
## Source : http://tile.stamen.com/toner-lite/10/347/615.png
## Source : http://tile.stamen.com/toner-lite/10/343/616.png
## Source : http://tile.stamen.com/toner-lite/10/344/616.png
## Source : http://tile.stamen.com/toner-lite/10/345/616.png
## Source : http://tile.stamen.com/toner-lite/10/346/616.png
## Source : http://tile.stamen.com/toner-lite/10/347/616.png
## Source : http://tile.stamen.com/toner-lite/10/343/617.png
## Source : http://tile.stamen.com/toner-lite/10/344/617.png
## Source : http://tile.stamen.com/toner-lite/10/345/617.png
## Source : http://tile.stamen.com/toner-lite/10/346/617.png
## Source : http://tile.stamen.com/toner-lite/10/347/617.png
## Source : http://tile.stamen.com/toner-lite/10/343/618.png
## Source : http://tile.stamen.com/toner-lite/10/344/618.png
## Source : http://tile.stamen.com/toner-lite/10/345/618.png
## Source : http://tile.stamen.com/toner-lite/10/346/618.png
## Source : http://tile.stamen.com/toner-lite/10/347/618.png
## Source : http://tile.stamen.com/toner-lite/10/343/619.png
## Source : http://tile.stamen.com/toner-lite/10/344/619.png
## Source : http://tile.stamen.com/toner-lite/10/345/619.png
## Source : http://tile.stamen.com/toner-lite/10/346/619.png
## Source : http://tile.stamen.com/toner-lite/10/347/619.png
ggmap(RMBA)
ggmap(RMBA) +
geom_bin2d(data = tweets_rmba, aes(x = X, y = Y), bins = 100) +
scale_fill_viridis_c()+
labs(title = "Concentracion de tweets",
subtitle = "Region Metropolitana de Buenos Aires",
caption="Fuente: Información de Twitter")
## Warning: Removed 3 rows containing missing values (geom_tile).
ggmap(RMBA) +
geom_density2d(data = filter(tweets_rmba, source %in% Origen_frecuente), aes(x = X, y = Y, color = stat(level))) +
scale_color_viridis_c() +
facet_wrap(~source, nrow = 1)+
labs(title = "Concentracion de tweets en relacion a su origen",
subtitle = "Region Metropolitana de Buenos Aires",
caption="Fuente: Información de Twitter")
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive
tweets_rmba <- tweets_rmba %>%
mutate(dia_semana = wday(created))
ggmap(RMBA) +
geom_point(data = filter(tweets_rmba, source %in% c("Instagram", "Twitter for Android")), aes(x = X, y = Y, color = source), alpha = .1, size = .2) +
facet_wrap(~dia_semana, nrow = 2) +
labs(title = "Comparacion de tweets segun su origen",
subtitle = "Por día, en RMBA",
caption="Fuente: Información de Twitter",
x = "longitud",
y = "latitud")
tweets_rmba <- tweets_rmba %>%
mutate(hora_base = hour(ymd_hms(created)))
head(tweets_rmba)
## X Y id in_reply_to_status_id
## 1 -58.46670 -34.60000 933707131898883968 NA
## 2 -58.39022 -34.60438 959608445757545984 NA
## 3 -58.44256 -34.59396 988959995424322048 NA
## 4 -58.38583 -34.60056 990251542354841984 NA
## 5 -58.38170 -34.60330 971187671438712960 NA
## 6 -58.50743 -34.55959 936197053704786048 NA
## in_reply_to_user_id
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## text
## 1 Mi #tbt de hoy es viajando a la eterna primavera noteseme la felicidad de iniciar unas buenas… https://t.co/cvBoH22cRD
## 2 <U+0001F519><U+0001F483><U+0001F525> @JFrankoQG growupdance @jbalvin #reggaeton #jbalvin #dance #dancers #show #growup en Paseo… https://t.co/mck7Ixghdu
## 3 Google de Google, Inc. https://t.co/KuVbieQ6Pk
## 4 Acaba de publicar una foto en Asociación del Fútbol Argentino https://t.co/YjtHwaS0ab
## 5 // @AbiNicolosi @ Buenos Aires, Argentina https://t.co/3QkaylVtWy
## 6 Hola #FullStackTech2017 (@ Tecnópolis - @tecnopolisarg in Villa Martelli, Buenos Aires) https://t.co/zMOE5KsFbx
## created lang source user_name user_id
## 1 2017-11-23 14:41:42 es Instagram MauroRamzz 119411584
## 2 2018-02-03 02:04:16 en Instagram ggiannina_ 172085747
## 3 2018-04-25 01:56:51 en iOS PatriciPonce 2356248204
## 4 2018-04-28 15:29:00 es Instagram LACASADED10S 791650904210599936
## 5 2018-03-07 00:55:59 es Instagram idlet0wn 1011428192
## 6 2017-11-30 11:35:45 es Foursquare luks_ 15426207
## user_created
## 1 2010/03/03 15:27:29+00
## 2 2010/07/28 21:49:32+00
## 3 2014/02/22 12:00:43+00
## 4 2016/10/27 14:40:58+00
## 5 2012/12/14 16:00:51+00
## 6 2008/07/14 13:17:13+00
## user_description
## 1 Presentador en #InfiltradosTV #ClaroTV #VisionTV magazine web #ContactosTV Estudiante #UQuindío, Bailarin y Modelo apasionado, sencillo y humilde <U+0001F466><U+0001F4AA>
## 2 <U+264C><U+0001F525> . 23.05.2016 <U+2661><U+0001F48F> \n<U+0001F380> http://instagram.com/shivideco <U+0001F380> • Juro solemnemente que mis intenciones no son buenas <U+0001F52E><U+2607>•
## 3 Abogada, ya de mayor, nunca es tarde cuando la dicha es buena. Muy peroncha y peleadora de tacheros pro x eso mejor a pata
## 4 Seguinos también en Facebook/ Follow us too Facebook: La casa de D10S and Instagram: lacasaded10s Dirección/Address: Lascano 2257 La Paternal
## 5
## 6 Love your hair! hope you win!! Archienemigo de la cebolla y el ajo. Creative Director. Views are my own
## user_location user_followers user_followed dia_semana hora_base
## 1 Armenia, Quindío, Colombia 791 2533 5 14
## 2 Buenos Aires, Argentina 270 2694 7 2
## 3 130 1144 4 1
## 4 Buenos Aires Argentina 568 532 7 15
## 5 1684 25689 4 0
## 6 Buenos Aires, Argentina 1043 3632 5 11
ggmap(RMBA) +
geom_density2d(data = filter(tweets_rmba,
source == "Instagram",
!(wday(created) %in% 6:1) ),
aes(x = X,
y = Y,
color = stat(level))) +
scale_color_viridis_c() +
facet_wrap(~hora_base, nrow = 4) +
labs(title = "Generacion de tweets desde Instagram",
subtitle = "según hora del día, durante el fin de semana en RMBA",
caption="Fuente: Información de Twitter",
x = "longitud",
y = "latitud")
####Se observa como a la largo del día varía la concentración de tweets, existiendo un máximo de concentracion de los mismos a las 6:00, 10:00, 12:00 y 13:00 horas pero con poca dispersión, mientras que a las 8:00 y 9:00 horas éstos alcanzan su máxima dispersión. #### Luego a lo largo del resto del día se observa una distribución relativamente estándar.