En primer lugar cargamos las librerias con las que trabajamos habitualmente, asimismo intalamos el paquete osrm, ya que la utilizaremos para la presente tarea.
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(leaflet)
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:purrr':
##
## flatten
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(osrm)
## Data: (c) OpenStreetMap contributors, ODbL 1.0 - http://www.openstreetmap.org/copyright
## Routing: OSRM - http://project-osrm.org/
Paso I: Seleccionamos 2 datasets que contienen origen-destino: Subtes y Estaciones de la Ciudad de Buenos Aires.
Cargamos el dataset Viajes
Viajes <- read_csv("CABA-formaciones-despachadas-mensual.csv")
## Parsed with column specification:
## cols(
## fr1_fecha = col_date(format = ""),
## fr1_linea = col_character(),
## fr1_tipo = col_character(),
## fr1_regist = col_double(),
## fr1_orden = col_double(),
## fr1_tren = col_double(),
## fr1_cauc1 = col_character(),
## fr1_cauc2 = col_character(),
## fr1_cocc1 = col_double(),
## fr1_cocc2 = col_double(),
## fr1_km = col_double(),
## fr1_kmv = col_double(),
## fr1_viac1 = col_character(),
## fr1_viac2 = col_character(),
## fr1_salc1 = col_time(format = ""),
## fr1_salc2 = col_time(format = ""),
## fr1_scauc2 = col_character(),
## fr1_scauc1 = col_character()
## )
Viajes
## # A tibble: 37,620 x 18
## fr1_fecha fr1_linea fr1_tipo fr1_regist fr1_orden fr1_tren fr1_cauc1
## <date> <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 2020-01-01 A F 1 1 4 <NA>
## 2 2020-01-01 A F 2 2 5 <NA>
## 3 2020-01-01 A F 3 3 6 <NA>
## 4 2020-01-01 A F 4 4 7 <NA>
## 5 2020-01-01 A F 5 5 1 <NA>
## 6 2020-01-01 A F 6 6 2 <NA>
## 7 2020-01-01 A F 7 7 3 <NA>
## 8 2020-01-01 A F 8 8 4 <NA>
## 9 2020-01-01 A F 9 9 5 <NA>
## 10 2020-01-01 A F 10 10 6 <NA>
## # … with 37,610 more rows, and 11 more variables: fr1_cauc2 <chr>,
## # fr1_cocc1 <dbl>, fr1_cocc2 <dbl>, fr1_km <dbl>, fr1_kmv <dbl>,
## # fr1_viac1 <chr>, fr1_viac2 <chr>, fr1_salc1 <time>, fr1_salc2 <time>,
## # fr1_scauc2 <chr>, fr1_scauc1 <chr>
names(Viajes)
## [1] "fr1_fecha" "fr1_linea" "fr1_tipo" "fr1_regist" "fr1_orden"
## [6] "fr1_tren" "fr1_cauc1" "fr1_cauc2" "fr1_cocc1" "fr1_cocc2"
## [11] "fr1_km" "fr1_kmv" "fr1_viac1" "fr1_viac2" "fr1_salc1"
## [16] "fr1_salc2" "fr1_scauc2" "fr1_scauc1"
Cargamos el dataset Estaciones
Estaciones <- read_csv("CABA-estaciones-de-subte.csv")
## Parsed with column specification:
## cols(
## long = col_double(),
## lat = col_double(),
## id = col_double(),
## estacion = col_character(),
## linea = col_character()
## )
Estaciones
## # A tibble: 90 x 5
## long lat id estacion linea
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 -58.4 -34.6 1 CASEROS H
## 2 -58.4 -34.6 2 INCLAN - MEZQUITA AL AHMAD H
## 3 -58.4 -34.6 3 HUMBERTO 1° H
## 4 -58.4 -34.6 4 VENEZUELA H
## 5 -58.4 -34.6 5 ONCE - 30 DE DICIEMBRE H
## 6 -58.4 -34.6 6 9 DE JULIO D
## 7 -58.4 -34.6 7 FACULTAD DE MEDICINA D
## 8 -58.4 -34.6 8 TRIBUNALES - TEATRO COLÓN D
## 9 -58.4 -34.6 9 AGÜERO D
## 10 -58.4 -34.6 10 R.SCALABRINI ORTIZ D
## # … with 80 more rows
names(Estaciones)
## [1] "long" "lat" "id" "estacion" "linea"
Graficamos la información para verificar como es y que vemos:
bbox <- make_bbox(Estaciones$long, Estaciones$lat)
bbox
## left bottom right top
## -58.49221 -34.64770 -58.36411 -34.55126
mapa_base <- get_stamenmap(bbox, color = "bw", zoom = 14)
## 42 tiles needed, this may take a while (try a smaller zoom).
## Source : http://tile.stamen.com/terrain/14/5529/9869.png
## Source : http://tile.stamen.com/terrain/14/5530/9869.png
## Source : http://tile.stamen.com/terrain/14/5531/9869.png
## Source : http://tile.stamen.com/terrain/14/5532/9869.png
## Source : http://tile.stamen.com/terrain/14/5533/9869.png
## Source : http://tile.stamen.com/terrain/14/5534/9869.png
## Source : http://tile.stamen.com/terrain/14/5535/9869.png
## Source : http://tile.stamen.com/terrain/14/5529/9870.png
## Source : http://tile.stamen.com/terrain/14/5530/9870.png
## Source : http://tile.stamen.com/terrain/14/5531/9870.png
## Source : http://tile.stamen.com/terrain/14/5532/9870.png
## Source : http://tile.stamen.com/terrain/14/5533/9870.png
## Source : http://tile.stamen.com/terrain/14/5534/9870.png
## Source : http://tile.stamen.com/terrain/14/5535/9870.png
## Source : http://tile.stamen.com/terrain/14/5529/9871.png
## Source : http://tile.stamen.com/terrain/14/5530/9871.png
## Source : http://tile.stamen.com/terrain/14/5531/9871.png
## Source : http://tile.stamen.com/terrain/14/5532/9871.png
## Source : http://tile.stamen.com/terrain/14/5533/9871.png
## Source : http://tile.stamen.com/terrain/14/5534/9871.png
## Source : http://tile.stamen.com/terrain/14/5535/9871.png
## Source : http://tile.stamen.com/terrain/14/5529/9872.png
## Source : http://tile.stamen.com/terrain/14/5530/9872.png
## Source : http://tile.stamen.com/terrain/14/5531/9872.png
## Source : http://tile.stamen.com/terrain/14/5532/9872.png
## Source : http://tile.stamen.com/terrain/14/5533/9872.png
## Source : http://tile.stamen.com/terrain/14/5534/9872.png
## Source : http://tile.stamen.com/terrain/14/5535/9872.png
## Source : http://tile.stamen.com/terrain/14/5529/9873.png
## Source : http://tile.stamen.com/terrain/14/5530/9873.png
## Source : http://tile.stamen.com/terrain/14/5531/9873.png
## Source : http://tile.stamen.com/terrain/14/5532/9873.png
## Source : http://tile.stamen.com/terrain/14/5533/9873.png
## Source : http://tile.stamen.com/terrain/14/5534/9873.png
## Source : http://tile.stamen.com/terrain/14/5535/9873.png
## Source : http://tile.stamen.com/terrain/14/5529/9874.png
## Source : http://tile.stamen.com/terrain/14/5530/9874.png
## Source : http://tile.stamen.com/terrain/14/5531/9874.png
## Source : http://tile.stamen.com/terrain/14/5532/9874.png
## Source : http://tile.stamen.com/terrain/14/5533/9874.png
## Source : http://tile.stamen.com/terrain/14/5534/9874.png
## Source : http://tile.stamen.com/terrain/14/5535/9874.png
ggmap(mapa_base) +
geom_point(data = Estaciones, aes(x = long, y = lat), color = "orange")
Verificamos que se ve correcto, continuamos.
Paso 2-A: Analizaremos la cantidad de viajes entre los puntos.
conteo <- viajes %>% group_by(ORIGEN_ESTACION, DESTINO_ESTACION) %>% summarise(total = sum(TOTAL))
ggplot() + geom_tile(data = conteo, aes(x = ORIGEN_ESTACION, y = DESTINO_ESTACION, fill = total)) + scale_fill_distiller(palette = “Spectral”)
Ahora realizaremos un mapa de calor, utilizando el comando heatmap() y calcularemos los 10 viajes más realizados.