TRABAJO PRACTICO N°1 cIENCIA DE DATOS PARA CIUDADES II Snyders, Federico / Vargas, Juan
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
#Cargamos el geojson de la ciudad de Boston y el dataset con las escuelas públicas que oferta la ciudad. (fuente= https://data.boston.gov/dataset)
BOSTON <- st_read ("~/JUAN/POSGRADOS/MAESTRIA EN ECONOMIA URBANA/2020/CIENCIA DE DATOS PARA CIUDADES II/CLASE 1/TP_1/datos BOSTON/Boston_Neighborhoods.geojson")
## Reading layer `Boston_Neighborhoods' from data source `C:\Users\usuario\Documents\JUAN\POSGRADOS\MAESTRIA EN ECONOMIA URBANA\2020\CIENCIA DE DATOS PARA CIUDADES II\CLASE 1\TP_1\datos BOSTON\Boston_Neighborhoods.geojson' using driver `GeoJSON'
## Simple feature collection with 26 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 739715.3 ymin: 2908294 xmax: 812255.1 ymax: 2970086
## CRS: 4326
ESCUELAS_BOSTON <- read.csv("~/JUAN/POSGRADOS/MAESTRIA EN ECONOMIA URBANA/2020/CIENCIA DE DATOS PARA CIUDADES II/CLASE 1/TP_1/datos BOSTON/Public_Schools.csv", stringsAsFactors = FALSE)
names(ESCUELAS_BOSTON)
## [1] "ï..X" "Y" "OBJECTID_1" "OBJECTID" "BLDG_ID"
## [6] "BLDG_NAME" "ADDRESS" "CITY" "ZIPCODE" "CSP_SCH_ID"
## [11] "SCH_ID" "SCH_NAME" "SCH_LABEL" "SCH_TYPE" "SHARED"
## [16] "COMPLEX" "POINT_X" "POINT_Y"
#Retiramos las coordenas repetidas para evitar erros en el desarrollo
ESCUELAS_BOSTON2 <- select(ESCUELAS_BOSTON, -(POINT_X:POINT_Y))
names(ESCUELAS_BOSTON2)
## [1] "ï..X" "Y" "OBJECTID_1" "OBJECTID" "BLDG_ID"
## [6] "BLDG_NAME" "ADDRESS" "CITY" "ZIPCODE" "CSP_SCH_ID"
## [11] "SCH_ID" "SCH_NAME" "SCH_LABEL" "SCH_TYPE" "SHARED"
## [16] "COMPLEX"
#Hacemos un grafico de geometrias y puntos, motrando la geolocalizacion de las escuelas públicas en cada barrio de la ciudad.
ggplot()+
geom_sf(data=BOSTON)+
geom_point(data = ESCUELAS_BOSTON2, aes(x=ï..X, y= Y),
alpha=0.8,
color= "red")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
#Convertirmos el dataset de escuelas en un archivo con contenido espacial.
ESCUELAS_BOSTON2 <- ESCUELAS_BOSTON2 %>%
st_as_sf(coords = c("ï..X", "Y"), crs = 4326)
#Generamos un plot con los dos archivos espaciales
ggplot()+
geom_sf(data = BOSTON)+
geom_sf(data = ESCUELAS_BOSTON2, color= "red")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
#Realizamos una union espacial y agrupamos la cantidad de escuelas por barrio.
BOSTON_con_ESCUELAS <- st_join (ESCUELAS_BOSTON2, BOSTON)
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
head(BOSTON_con_ESCUELAS)
## Simple feature collection with 6 features and 21 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 780367 ymin: 2962122 xmax: 790128.2 ymax: 2967094
## CRS: EPSG:4326
## OBJECTID_1 OBJECTID.x BLDG_ID BLDG_NAME ADDRESS
## 1 1 1 1 Guild Bldg 195 Leyden Street
## 2 2 2 3 Kennedy, P Bldg 343 Saratoga Street
## 3 3 3 4 Otis Bldg 218 Marion Street
## 4 4 4 6 Odonnell Bldg 33 Trenton Street
## 5 5 5 7 East Boston High Bldg 86 White Street
## 6 6 6 8 Umana / Barnes Bldg 312 Border Street
## CITY ZIPCODE CSP_SCH_ID SCH_ID SCH_NAME SCH_LABEL
## 1 East Boston 2128 4061 4061 Guild Elementary Guild
## 2 East Boston 2128 4541 4541 Kennedy Patrick Elem PJ Kennedy
## 3 East Boston 2128 4322 4322 Otis Elementary Otis
## 4 East Boston 2128 4543 4543 O'Donnell Elementary O'Donnell
## 5 East Boston 2128 1070 1070 East Boston High East Boston HS
## 6 East Boston 2128 4323 4323 Umana Academy Umana Academy
## SCH_TYPE SHARED COMPLEX OBJECTID.y Name Acres Neighborhood_ID
## 1 ES 38 East Boston 3012.06 8
## 2 ES 38 East Boston 3012.06 8
## 3 ES 38 East Boston 3012.06 8
## 4 ES 38 East Boston 3012.06 8
## 5 HS 38 East Boston 3012.06 8
## 6 K-8 38 East Boston 3012.06 8
## SqMiles ShapeSTArea ShapeSTLength geometry
## 1 4.71 131384487 121089.1 POINT (790128.2 2967094)
## 2 4.71 131384487 121089.1 POINT (783027.7 2963318)
## 3 4.71 131384487 121089.1 POINT (782112.8 2962122)
## 4 4.71 131384487 121089.1 POINT (780994 2963140)
## 5 4.71 131384487 121089.1 POINT (781823 2964190)
## 6 4.71 131384487 121089.1 POINT (780367 2963210)
GRUPO_BOSTON_con_ESCUELAS <- BOSTON_con_ESCUELAS %>%
group_by(Name) %>%
summarise(cantidad = n())
head(GRUPO_BOSTON_con_ESCUELAS)
## Simple feature collection with 6 features and 2 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 747666.9 ymin: 2949869 xmax: 775733 ymax: 2963847
## CRS: EPSG:4326
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
## # A tibble: 6 x 3
## Name cantidad geometry
## <fct> <int> <GEOMETRY [°]>
## 1 Allston 4 MULTIPOINT ((751868.4 2952925), (754098.8 2953524), (75505~
## 2 Back Bay 1 POINT (770246.7 2953059)
## 3 Bay Vill~ 2 MULTIPOINT ((772662 2952280), (772904.8 2952413))
## 4 Brighton 6 MULTIPOINT ((747666.9 2953140), (747865.4 2953524), (74802~
## 5 Charlest~ 4 MULTIPOINT ((773097 2963460), (773855.7 2963070), (774759.~
## 6 Chinatown 1 POINT (773779.1 2952129)
#Creamos nuevamente un mapa con puntos mostrando en distintos colores las escuelas por barrio.
ggplot()+
geom_sf(data = BOSTON)+
geom_sf(data = GRUPO_BOSTON_con_ESCUELAS, aes(color = Name))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
#Grafico de Barras mostrando la cantidad de escuelas por barrio
ggplot(GRUPO_BOSTON_con_ESCUELAS)+
geom_col(aes(x= Name, y= cantidad))
#En un mapa de coropletas se muestra la oferta de escuelas públicas que ofrece la ciudad de Boston.
GRUPO_BOSTON_con_ESCUELAS2 <- GRUPO_BOSTON_con_ESCUELAS %>%
st_set_geometry(NULL)
BOSTON <- BOSTON %>%
left_join(GRUPO_BOSTON_con_ESCUELAS2, by="Name")
names(BOSTON)
## [1] "OBJECTID" "Name" "Acres" "Neighborhood_ID"
## [5] "SqMiles" "ShapeSTArea" "ShapeSTLength" "cantidad"
## [9] "geometry"
ggplot() +
geom_sf(data = BOSTON, aes(fill = cantidad)) +
geom_sf_text(data=BOSTON, aes(label = Name), size=2, colour = "black") +
labs(title = "Escuelas en BOSTON",
subtitle = "Escuelas por barrio",
fill = "Cantidad",
caption= "Fuente: https://data.boston.gov/dataset",
y="",
x="") +
scale_fill_gradient(low="gray", high="light blue")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_is_longlat(x): bounding box has potentially an invalid value range
## for longlat data