Análisis espacial aplicado al transporte

Análisis espacial aplicado al transporte. En este trabajo se analizan datos de cantidad de pasajeros detallados por fecha para cada estación de subterráneo. También, se cruzan los datos de estaciones de subte y de líneas de colectivos para obtener información de las líneas cercanas a cada estación. El tema se eligió como práctica y evaluación de las funciones de RStudio para el análisis de datos públicos de transporte planteando de alguna forma la posibilidad de aplicar el código en el análisis de años anteriores (en el caso de datos de pasajeros por molinetes) o en la aplicación para el análisis de otros datasets. Se usan bases de acceso público descargadas de: 1) Data Buenos Aires: pasajeros por molinetes, estaciones de subte y líneas de subte. El acceso web es: https://data.buenosaires.gob.ar/dataset?q=subte 2) Datos abiertos del Ministerio de Transporte de la Nación: recorridos de las líneas de colectivos. El acceso web es: https://datos.transporte.gob.ar/dataset?groups=automotor

1. INSTALO LIBRERÍAS:

## LIBRERIAS
library(tidyverse)
library(sp)
library(stringr)
library(dplyr)
library(base)
library(leaflet)
library(rgdal)
library(osmdata)
library(sf)

2. LECTURA Y NORMALIZACIÓN DE LAS BASES DE PASAJEROS POR MOLINETES

Como introducción del análisis, se leen las bases originales y se normalizan los datos. Algunos procedimientos son especificar la codificación y el separador de texto, pasar nombres a mayúsculas y reemplazar textos siendo los casos más complicados los nombres de “AGÜERO” y “SÁENZ PEÑA”. Se generan datos mensuales para cada estación con el objetivo de mapearlos posteriormente.

2.1 LECTURA Y NORMALIZACIÓN DE BASES PARA EL AÑO 2019

molinetes2019 <- read.csv('C:/molinetes/datahistorica062019.csv', stringsAsFactors = FALSE, encoding = "UTF-8")

## PASO A MAYÚSCULAS
mayumolinetes2019 <- mutate_all(molinetes2019, toupper)

## REEMPLAZO VALORES
mayumolinetes2019$estacion[startsWith(mayumolinetes2019$estacion, "SAENZ")] <- "SAENZ PEÑA"
mayumolinetes2019$estacion[startsWith(mayumolinetes2019$estacion, "AG")] <- "AGÜERO"

## GUARDO
write.csv(mayumolinetes2019, "C:/molinetes/2019.csv", row.names = FALSE)

## LEO BASE CORREGIDA
datos2019 <- read.csv('C:/molinetes/2019.csv', stringsAsFactors = TRUE, encoding = "system")

## DISCRIMINO POR MES
datos2019 <- mutate(datos2019,
año = substr(periodo, 1, 4),
mes = substr(periodo, 5, 6))

2.2 GENERO LOS DATOS PARA EL AÑO 2019

## CALCULO PAX X ESTACION POR MES
mes_01_2019 <- datos2019 %>%
  filter(mes == "01") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_01_2019)[2] = "Total Enero 2019"

mes_02_2019 <- datos2019 %>%
  filter(mes == "02") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_02_2019)[2] = "Total Febrero 2019"
  
mes_03_2019 <- datos2019 %>%
  filter(mes == "03") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_03_2019)[2] = "Total Marzo 2019"
  
mes_04_2019 <- datos2019 %>%
  filter(mes == "04") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_04_2019)[2] = "Total Abril 2019"
  
mes_05_2019 <- datos2019 %>%
  filter(mes == "05") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_05_2019)[2] = "Total Mayo 2019"
  
mes_06_2019 <- datos2019 %>%
  filter(mes == "06") %>%
  group_by(estacion) %>%
  summarise(total = sum(total)) %>%
  arrange(desc(total))
  names (mes_06_2019)[2] = "Total Junio 2019"

3. PREPARANDO PARA MAPEAR

En este apartado se normalizan los datos de las estaciones de subtes ya que por ejemplo, los nombres de estaciones no coincidian entre los datos georeferenciados y los datos en formato tabla de cantidad de pasajeros por molinetes. Se joinearon los datos generados en el apartado anterior y se calculó un valor del tamaño de los círculos para mapear.

3.1 EDITANDO BASES DE ESTACIONES

## LEO LA BASE DE ESTACIONES DE SUBTE Y NORMALIZO DATOS
esteiyon <- read.csv('C:/molinetes/estaciones-de-subte-zip/estaciones-de-subte.csv', stringsAsFactors = FALSE, encoding = "UTF-8")
editestaciones <- mutate(esteiyon, nombre = (estacion))

## REEMPLAZO NOMBRES SEGÚN DATOS DE LAS BASES DE MOLINETES
editestaciones$nombre[editestaciones$nombre== "C. PELLEGRINI"] <- "CARLOS PELLEGRINI"
editestaciones$nombre[editestaciones$nombre== "AV. DE MAYO"] <- "AVENIDA DE MAYO"
editestaciones$nombre[editestaciones$nombre== "AV. LA PLATA"] <- "AVENIDA LA PLATA"
editestaciones$nombre[editestaciones$nombre== "BELGRANO"] <- "GENERAL BELGRANO"
editestaciones$nombre[editestaciones$nombre== "CÓRDOBA"] <- "CORDOBA"
editestaciones$nombre[editestaciones$nombre== "DE LOS INCAS -PQUE. CHAS"] <- "LOS INCAS"
editestaciones$nombre[editestaciones$nombre== "ECHEVERRÍA"] <- "ECHEVERRIA"
editestaciones$nombre[editestaciones$nombre== "ENTRE RIOS - RODOLFO WALSH"] <- "ENTRE RIOS"
editestaciones$nombre[editestaciones$nombre== "FACULTAD DE DERECHO - JULIETA LANTIERI"] <- "FACULTAD DE DERECHO"
editestaciones$nombre[editestaciones$nombre== "SAN JOSÉ DE FLORES"] <- "FLORES"
editestaciones$nombre[editestaciones$nombre== "SAN MARTIN"] <- "GENERAL SAN MARTIN"
editestaciones$nombre[editestaciones$nombre== "HUMBERTO 1º"] <- "HUMBERTO I"
editestaciones$nombre[editestaciones$nombre== "JUAN MANUEL DE ROSAS - VILLA URQUIZA"] <- "ROSAS"
editestaciones$nombre[editestaciones$nombre== "MALABIA - OSVALDO PUGLIESE"] <- "MALABIA"
editestaciones$nombre[editestaciones$nombre== "MORENO"] <- "MARIANO MORENO"
editestaciones$nombre[editestaciones$nombre== "ONCE - 30 DE DICIEMBRE"] <- "ONCE"
editestaciones$nombre[editestaciones$nombre== "PARQUE PATRICIOS"] <- "PATRICIOS"
editestaciones$nombre[editestaciones$nombre== "PASTEUR - AMIA"] <- "PASTEUR"
editestaciones$nombre[editestaciones$nombre== "PLAZA DE LOS VIRREYES - EVA PERON"] <- "PZA. DE LOS VIRREYES"
editestaciones$nombre[editestaciones$nombre== "PLAZA DE MISERERE"] <- "PLAZA MISERERE"
editestaciones$nombre[editestaciones$nombre== "SANTA FE - CARLOS JAUREGUI"] <- "SANTA FE"
editestaciones$nombre[editestaciones$nombre== "R.SCALABRINI ORTIZ"] <- "SCALABRINI ORTIZ"
editestaciones$nombre[editestaciones$nombre== "TRIBUNALES - TEATRO COLÓN"] <- "TRIBUNALES"
editestaciones$nombre[editestaciones$nombre== "TRONADOR - VILLA ORTÚZAR"] <- "TRONADOR"

editestaciones$nombre[editestaciones$nombre== "CALLAO" & editestaciones$linea == "B"] <- "CALLAO.B"
editestaciones$nombre[editestaciones$nombre== "INDEPENDENCIA" & editestaciones$linea == "E"] <- "INDEPENDENCIA.E"
editestaciones$nombre[editestaciones$nombre== "PUEYRREDON" & editestaciones$linea == "D"] <- "PUEYRREDON.D"
editestaciones$nombre[editestaciones$nombre== "RETIRO" & editestaciones$linea == "E"] <- "RETIRO E"

## GUARDO
write.csv(editestaciones, "C:/molinetes/estacionesgeo.csv", row.names = FALSE)

## LEO BASE CORREGIDA
estacionesgeo <- read.csv('C:/molinetes/estacionesgeo.csv', stringsAsFactors = TRUE, encoding = "system")
estacionesgeo$estacion <- NULL

## JOINEO BASES POR MES A LA BASE GEOREFERENCIADA DE ESTACIONES
enero2019 <- left_join(estacionesgeo, mes_01_2019, by = c("nombre" = "estacion"))
addfebrero2019 <- left_join(enero2019, mes_02_2019, by = c("nombre" = "estacion"))
addmarzo2019 <- left_join(addfebrero2019, mes_03_2019, by = c("nombre" = "estacion"))
addabril2019 <- left_join(addmarzo2019, mes_04_2019, by = c("nombre" = "estacion"))
addmayo2019 <- left_join(addabril2019, mes_05_2019, by = c("nombre" = "estacion"))
estacionesgeo_x_mes2019 <- left_join(addmayo2019, mes_06_2019, by = c("nombre" = "estacion"))

## EDITO VALORES PARA REPRESENTAR
options(digits=3)
estacionesgeo_x_mes2019 <- mutate(estacionesgeo_x_mes2019, Circle_enero = estacionesgeo_x_mes2019$`Total Enero 2019`/35962, Circle_febrero = estacionesgeo_x_mes2019$`Total Febrero 2019`/35962, Circle_marzo = estacionesgeo_x_mes2019$`Total Marzo 2019`/35962, Circle_abril = estacionesgeo_x_mes2019$`Total Abril 2019`/35962, Circle_mayo = estacionesgeo_x_mes2019$`Total Mayo 2019`/35962, Circle_junio = estacionesgeo_x_mes2019$`Total Junio 2019`/35962)

3.2 AGREGANDO LAS LÍNEAS DE SUBTE

## lEO LAYER DE LÍNEAS DE SUBTE Y FILTRO POR LÍNEA
lineas <- st_read("C:/lineas-de-subte-zip")

## Reading layer `lineas-subte' from data source `C:\lineas-de-subte-zip' using driver `ESRI Shapefile'
## Simple feature collection with 82 features and 2 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -58.5 ymin: -34.6 xmax: -58.4 ymax: -34.6
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

lineaA <- lineas %>%
  filter(LINEASUB == "LINEA A")
lineaB <- lineas %>%
  filter(LINEASUB == "LINEA B")
lineaC <- lineas %>%
  filter(LINEASUB == "LINEA C")
lineaD <- lineas %>%
  filter(LINEASUB == "LINEA D")
lineaE <- lineas %>%
  filter(LINEASUB == "LINEA E")
lineaH <- lineas %>%
  filter(LINEASUB == "LINEA H")

4. CRUZANDO DATOS ESPACIALES

4.1 LÍNEAS DE COLECTIVOS CERCANAS A LAS ESTACIONES DE SUBTE

## LEO LAYER DE ESTACIONES DE SUBTE
estaciones_georef <- st_read("C:/molinetes/estaciones-de-subte-zip")

## Reading layer `estaciones-subte' from data source `C:\molinetes\estaciones-de-subte-zip' using driver `ESRI Shapefile'
## Simple feature collection with 90 features and 5 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -58.5 ymin: -34.6 xmax: -58.4 ymax: -34.6
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

## PROYECTO EN METROS PARA CREAR UN BUFFER DE 400 METROS
estaciones_georefmetros = st_transform(estaciones_georef, 5348)
buffer400m <- st_buffer(estaciones_georefmetros, dist = 400)

## REPROYECTO A LAT/LONG WGS84
buffer_wgs84 = st_transform(buffer400m, 4326)

## LEO LAYER DE LÍNEAS DE COLECTIVOS
recorridos <- st_read("https://datos.transporte.gob.ar/dataset/f87b93d4-ade2-44fc-a409-d3736ba9f3ba/resource/84947471-9c1e-4a23-8a2e-03a8c87c056f/download/lineasbusrmbajurisdiccionnacional.geojson", stringsAsFactors = FALSE)

## Reading layer `lineasbusrmbajurisdiccionnacional' from data source `https://datos.transporte.gob.ar/dataset/f87b93d4-ade2-44fc-a409-d3736ba9f3ba/resource/84947471-9c1e-4a23-8a2e-03a8c87c056f/download/lineasbusrmbajurisdiccionnacional.geojson' using driver `GeoJSON'
## Simple feature collection with 1172 features and 5 fields
## geometry type:  MULTILINESTRING
## dimension:      XYZ
## bbox:           xmin: -59.4 ymin: -35.2 xmax: -57.9 ymax: -34
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

## JOINEO COLECTIVOS Y BUFFER
joinespacial = st_join(buffer_wgs84, recorridos)

## SINTETIZO LA INFORMACIÓN POR LÍNEAS PARA CADA ESTACIÓN
lineas_x_estacion <- joinespacial %>%
  mutate(bondi = as.numeric(LINEA)) %>%
  group_by(id, bondi) %>%
  count(bondi)
  
lineas_x_estacion$n <- NULL
lineas_x_estacion$geometry <- NULL

## PARA MAPEAR
sintesis <- lineas_x_estacion %>%
  group_by(id) %>%
  summarise(lineas = paste(bondi, collapse = " - "))

5. MAPEANDO

5.1 MAPEO LOS DATOS MENSUALES POR ESTACIÓN

Se destacan los casos de las estaciones de Constitución y Retiro primeramente y luego aquellas otras estaciones que tienen combinación con líneas ferroviarias o son cabeceras de las líneas de subte. Entre el resto de las estaciones se observa una relativa homogeneidad en la cantidad de pasajeros para la línea B y D. También resulta llamativo los valores bajos de la línea E.

leaflet(estaciones_georef) %>%
addTiles(group = "Open Street Map") %>%
addProviderTiles(providers$CartoDB.Positron, group = "CartoDB (default") %>%

## LAYERS ESTACIONES POR MES
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_enero, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Enero 2019", popup = paste0("<b>Total de pasajeros Enero 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Enero 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px"))) %>%
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_febrero, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Febrero 2019", popup = paste0("<b>Total de pasajeros Febrero 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Febrero 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px"))) %>%
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_marzo, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Marzo 2019", popup = paste0("<b>Total de pasajeros Marzo 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Marzo 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px"))) %>%
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_abril, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Abril 2019", popup = paste0("<b>Total de pasajeros Abril 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Abril 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px"))) %>%
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_mayo, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Mayo 2019", popup = paste0("<b>Total de pasajeros Mayo 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Mayo 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px"))) %>%
addCircleMarkers(radius = estacionesgeo_x_mes2019$Circle_junio, stroke = FALSE, fillColor = "navy", fillOpacity = 0.6, label = ~estaciones_georef$estacion, group = "Junio 2019", popup = paste0("<b>Total de pasajeros Junio 2019:</b>", sep = "<br/>", formatC(estacionesgeo_x_mes2019$`Total Junio 2019`, big.mark = ".")), 
      labelOptions = labelOptions(noHide = F, style = list("color" = "steelblue", "font-size" = "10px", "font-weight" = "bold"))) %>%

## LAYERS LÍNEAS
addPolylines(data = lineaA, weight = 5, color = 'dodgerblue', label = 'Línea A', smoothFactor = 3, group = 'Línea A') %>%
addPolylines(data = lineaB, weight = 5, color = 'red', label = 'Línea B', smoothFactor = 3, group = 'Línea B') %>%
addPolylines(data = lineaC, weight = 5, color = 'blue', label = 'Línea C', smoothFactor = 3, group = 'Línea C') %>%
addPolylines(data = lineaD, weight = 5, color = 'green', label = 'Línea D', smoothFactor = 3, group = 'Línea D') %>%
addPolylines(data = lineaE, weight = 5, color = 'purple', label = 'Línea E', smoothFactor = 3, group = 'Línea E') %>%
addPolylines(data = lineaH, weight = 5, color = 'yellow', label = 'Línea H', smoothFactor = 3, group = 'Línea H') %>%

## LAYERS CONTROL
addLayersControl(
baseGroups = c("CartoDB", "Open Street Map"),
overlayGroups = c("Enero 2019", "Febrero 2019", "Marzo 2019", "Abril 2019", "Mayo 2019", "Junio 2019", "Línea A", "Línea B", "Línea C", "Línea D", "Línea E", "Línea H"),
options = layersControlOptions(collapsed = FALSE)
)

5.2 LÍNEAS DE COLECTIVOS CERCANAS A CADA ESTACIÓN DE SUBTE

circulito <- makeIcon(iconUrl = "http://maps.google.com/mapfiles/kml/shapes/subway.png", iconWidth = 25) 

leaflet(estaciones_georef) %>%
addTiles(group = "Open Street Map") %>%
addProviderTiles(providers$CartoDB.Positron, group = "CartoDB (default") %>%

## LAYERS ESTACIONES POR MES
addMarkers(lng = ~long, lat = ~lat, icon = circulito,  label = ~estaciones_georef$estacion, group = "Colectivos a 400m (Click para info)", popup = paste0("<b>Líneas a 400m:</b>", sep = "<br/>", sintesis$lineas),
      labelOptions = labelOptions(noHide = T, style = list("color" = "steelblue", "font-size" = "12px"))) %>%
  
## LAYERS LÍNEAS
addPolylines(data = lineaA, weight = 5, color = 'dodgerblue', label = 'Línea A', smoothFactor = 3, group = 'Línea A') %>%
addPolylines(data = lineaB, weight = 5, color = 'red', label = 'Línea B', smoothFactor = 3, group = 'Línea B') %>%
addPolylines(data = lineaC, weight = 5, color = 'blue', label = 'Línea C', smoothFactor = 3, group = 'Línea C') %>%
addPolylines(data = lineaD, weight = 5, color = 'green', label = 'Línea D', smoothFactor = 3, group = 'Línea D') %>%
addPolylines(data = lineaE, weight = 5, color = 'purple', label = 'Línea E', smoothFactor = 3, group = 'Línea E') %>%
addPolylines(data = lineaH, weight = 5, color = 'yellow', label = 'Línea H', smoothFactor = 3, group = 'Línea H') %>%

## LAYERS CONTROL
addLayersControl(
baseGroups = c("CartoDB", "Open Street Map"),
overlayGroups = c("Colectivos a 400m (Click para info)", "Línea A", "Línea B", "Línea C", "Línea D", "Línea E", "Línea H"),
options = layersControlOptions(collapsed = FALSE)
)