Mapeando y analizando open data urbana + Open Street Map

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.1
## -- Attaching packages ------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.0     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.6.1
## Warning: package 'tidyr' was built under R version 3.6.1
## Warning: package 'readr' was built under R version 3.6.1
## Warning: package 'purrr' was built under R version 3.6.1
## Warning: package 'dplyr' was built under R version 3.6.1
## Warning: package 'stringr' was built under R version 3.6.1
## Warning: package 'forcats' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Warning: package 'sf' was built under R version 3.6.1
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(ggplot2)
library(osmdata)
## Warning: package 'osmdata' was built under R version 3.6.1
## Data (c) OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.1
mapa_NYC <- st_read("E:/Documentos/Ciencia de Datos II/NYC base/nynta.shp")
## Reading layer `nynta' from data source `E:\Documentos\Ciencia de Datos II\NYC base\nynta.shp' using driver `ESRI Shapefile'
## Simple feature collection with 195 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 913175.1 ymin: 120121.9 xmax: 1067383 ymax: 272844.3
## epsg (SRID):    NA
## proj4string:    +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs

Agregando data con OpenStreetMap

bbox <- getbb("Manhattan New York")
bbox
##         min       max
## x -74.04722 -73.90616
## y  40.68394  40.88045
bbox_poly_manhattan <- st_read("https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON") %>% 
filter(boro_name == "Manhattan") 
## Reading layer `OGRGeoJSON' from data source `https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON' using driver `GeoJSON'
## Simple feature collection with 5 features and 4 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
bbox_manhattan <- st_bbox(bbox_poly_manhattan)
leaflet() %>% 
addTiles() %>% 
addPolygons(data = bbox_poly_manhattan)
callesMan <- opq(bbox_manhattan) %>% 
    add_osm_feature(key = "highway")

callesMan <- callesMan %>% 
    osmdata_sf()

callesMan
## Object of class 'osmdata' with:
##                  $bbox : 40.6829169454451,-74.0477296269704,40.8790380473072,-73.9066509953948
##         $overpass_call : The call submitted to the overpass API
##                  $meta : metadata including timestamp and version numbers
##            $osm_points : 'sf' Simple Features Collection with 165743 points
##             $osm_lines : 'sf' Simple Features Collection with 38685 linestrings
##          $osm_polygons : 'sf' Simple Features Collection with 1988 polygons
##        $osm_multilines : NULL
##     $osm_multipolygons : 'sf' Simple Features Collection with 72 multipolygons
callesMan <- callesMan$osm_lines

callesMan <- st_intersection(callesMan, bbox_poly_manhattan)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
ggplot() +
  geom_sf(data = callesMan)

callesManla <- callesMan %>%
  mutate(lanes = as.numeric(lanes),
        lanes = ifelse(is.na(lanes), 1, as.numeric(lanes)))
         

callesManla <-callesMan %>% 
  mutate(color=case_when(lanes=="1" ~ "midnightblue",
    lanes=="2" ~ "darkorange1",
    lanes=="3" ~ "red4",
    lanes=="4" ~ "brown4",
    lanes=="5" ~ "darkmagenta",
    lanes=="6" ~ "yellow3",
    lanes=="7" ~ "green4", 
    lanes== "NA" ~ "aliceblue")) 

ggplot() +
    geom_sf(data = bbox_poly_manhattan, aes(), color="grey80", size = 0.4) +
    geom_sf(data = callesManla, aes(fill = lanes)) +
    geom_sf(data = callesManla, aes(color = lanes), size = 0.5) +
    scale_color_identity() +
    theme_void() +
    labs(title = "Manhattan - NYC",
         subtitle = "Vías de circulación",
         fill = "Cantidad de carriles",
         caption = "Fuente: OpenStreetMap")

cafeMan <- opq(bbox_manhattan) %>% 
    add_osm_feature(key = "amenity", value = "cafe")

cafeMan <- cafeMan %>% 
    osmdata_sf()

cafeMan <- cafeMan$osm_points

cafeMan <- st_intersection(cafeMan, bbox_poly_manhattan)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
ggplot() +
  geom_sf(data = cafeMan)

ggplot() +
    geom_sf(data = callesMan,
             color = "gray60", alpha = 0.3) +
    geom_sf(data = cafeMan, color = "steelblue4") + 
    theme_void() +
      labs(title = "NYC",
         subtitle = "Sidewalks café",
         caption = "fuente: OpenStreetMap")

mapa_Man <- mapa_NYC

mapa_Man <- st_transform(mapa_Man, st_crs(cafeMan))

cafeMan_mapaNYC <- st_join(cafeMan, mapa_Man)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
conteo_cafeopm <- cafeMan_mapaNYC %>% 
    group_by(NTAName, BoroName) %>% 
    summarise(cantidad = n())

CafeMan_mapa <- filter(mapa_Man, BoroName %in%
                     c("Manhattan"))

CafeMan_mapa <- st_join(CafeMan_mapa, conteo_cafeopm)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
ggplot() +
  geom_sf(data = CafeMan_mapa, aes(fill = cantidad), color = NA) +
    labs(title = "Sidewalks Café - Manhattan",
         fill = "Cantidad",
         caption = "Fuente: Open Steet Map")+
  theme(panel.background = element_rect(fill = "gray100", colour = "gray100", size = 2, linetype = "solid"),
        panel.grid.major = element_line(size = 0.5, linetype = "solid", colour = "grey"),
        panel.grid.minor = element_line(size = 0.25, linetype = "solid", colour = "white"),
        title=element_text(size=12, face = "bold"),
        legend.key.size = unit(0.4, "cm"), 
        legend.key.width = unit(0.4,"cm"),
        legend.position="right",
        legend.direction = "vertical", 
        legend.title=element_text(size=10, face = "bold"), 
        legend.text=element_text(size=7), 
        plot.caption=element_text(face = "italic", colour = "gray35",size=6), 
        axis.text = element_blank(), 
        axis.ticks = element_blank())

Bajamos dataset del sitio de open data de NYC para comparar la información de los datasets descargardos de los diferentes sitios.

SW_NYC <- st_read("E:/Documentos/Ciencia de Datos II/sidewalk cafe/nysidewalkcafe.shp") 
## Reading layer `nysidewalkcafe' from data source `E:\Documentos\Ciencia de Datos II\sidewalk cafe\nysidewalkcafe.shp' using driver `ESRI Shapefile'
## Simple feature collection with 20063 features and 2 fields
## geometry type:  MULTILINESTRING
## dimension:      XY
## bbox:           xmin: 914625.1 ymin: 123984 xmax: 1067336 ymax: 271427.1
## epsg (SRID):    NA
## proj4string:    +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
SW_mapaNYC <- st_join(SW_NYC, mapa_NYC)
conteo_SW <- SW_mapaNYC %>% 
    group_by(NTAName, BoroName) %>% 
    summarise(cantidad = n())
## Warning: Factor `NTAName` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Warning: Factor `BoroName` contains implicit NA, consider using
## `forcats::fct_explicit_na`
SW_mapaMan <- filter(mapa_NYC, BoroName %in%
                     c("Manhattan"))

SW_mapaMan <- st_join(SW_mapaMan, conteo_SW)
ggplot() +
  geom_sf(data = SW_mapaMan, aes(fill = cantidad), color = NA) +
    labs(title = "Sidewalks Café - Manhattan",
         fill = "Cantidad",
         caption = "Fuente: Open Data NYC")+
  theme(panel.background = element_rect(fill = "gray100", colour = "gray100", size = 2, linetype = "solid"),
        panel.grid.major = element_line(size = 0.5, linetype = "solid", colour = "grey"),
        panel.grid.minor = element_line(size = 0.25, linetype = "solid", colour = "white"),
        title=element_text(size=12, face = "bold"),
        legend.key.size = unit(0.4, "cm"), 
        legend.key.width = unit(0.4,"cm"),
        legend.position="right",
        legend.direction = "vertical", 
        legend.title=element_text(size=10, face = "bold"), 
        legend.text=element_text(size=7), 
        plot.caption=element_text(face = "italic", colour = "gray35",size=6), 
        axis.text = element_blank(), 
        axis.ticks = element_blank())

De los gráficos resultantes podemos ver diferencia en los conteos totales entre los datos cargados por el Municipio de NYC y los cargados en Open street map, siendo superior el recuento realizado con la data del Municipio. La misma puede deberse por las distintas fuentes de información de cada uno, pudiendo deberse en el segundo caso que los datos cargados por las personas no son tan estrictos como los del Municipio, ya que este último debe tomar la data de los permisos de habilitación y registros para funcionar.