TP1

PasoS I y II

Cargamos las librerias que necesitamos para trabajar.

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

Seleccionamos una ciudad Boston con un dataset con registros geo-referenciados con sus coordenadas: Bibliotecas Públicas.

Leemos el dataset que llamaeremos Bibliotecas_Publicas_OK; utilizando el comando st_read que lo lee directamente desde la web.

Bibliotecas_Publicas_OK <- st_read("http://bostonopendata-boston.opendata.arcgis.com/datasets/cb00f9248aa6404ab741071ca3806c0e_6.geojson?outSR={%22latestWkid%22:2249,%22wkid%22:102686}")
## Reading layer `f097a810-8fa1-4582-916b-2cc487926c42202044-1-1a050k9.mlbk' from data source `http://bostonopendata-boston.opendata.arcgis.com/datasets/cb00f9248aa6404ab741071ca3806c0e_6.geojson?outSR={%22latestWkid%22:2249,%22wkid%22:102686}' using driver `GeoJSON'
## Simple feature collection with 26 features and 7 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -71.16788 ymin: 42.2571 xmax: -71.02858 ymax: 42.37777
## CRS:            4326

Filtramos los datos que nos interesan de nuestro dataset y que quede más ordenado:

Bibliotecas_Publicas_Ordenadas <- Bibliotecas_Publicas_OK %>% 
  select(LIBRARIES_, DISTRICT, ST_ADDRESS, BRANCH, geometry)

head(Bibliotecas_Publicas_Ordenadas)
## Simple feature collection with 6 features and 4 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -71.12214 ymin: 42.2571 xmax: -71.06501 ymax: 42.36145
## CRS:            4326
##   LIBRARIES_      DISTRICT        ST_ADDRESS        BRANCH
## 1          5                151 Cambridge St      West End
## 2         11               690 Washington St Codman Square
## 3         17     HYDE PARK     35 Harvard Av     Hyde Park
## 4         18 JAMAICA PLAIN     433 Centre St      Connolly
## 5         22       ROXBURY     149 Dudley St        Dudley
## 6          1 COPLEY SQUARE   700 Boylston St       Central
##                     geometry
## 1 POINT (-71.06501 42.36145)
## 2 POINT (-71.07097 42.28752)
## 3  POINT (-71.12214 42.2571)
## 4  POINT (-71.1111 42.32065)
## 5 POINT (-71.08385 42.32849)
## 6 POINT (-71.07887 42.34944)

Ahora leemos el dataset de Barrios de Boston, utilizando el comando st_read ya que es un elemento geográfico (un shapefile).

Barrios_Boston_OK <- st_read("Boston_Neighborhoods.shp")
## Reading layer `Boston_Neighborhoods' from data source `C:\Users\ELEONORA\Documents\utdt\CIENCIA DE DATOS PARA CIUDADES\DATOS 2\CLASE1_DATOS 2\TP1_DATOS2\Boston_Neighborhoods.shp' using driver `ESRI Shapefile'
## Simple feature collection with 26 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 739715.3 ymin: 2908294 xmax: 812255.1 ymax: 2970086
## proj4string:    +proj=lcc +lat_1=41.71666666666667 +lat_2=42.68333333333333 +lat_0=41 +lon_0=-71.5 +x_0=200000 +y_0=750000.0000000001 +datum=NAD83 +units=us-ft +no_defs
head (Barrios_Boston_OK)
## Simple feature collection with 6 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 751472.1 ymin: 2922750 xmax: 776215.4 ymax: 2953742
## proj4string:    +proj=lcc +lat_1=41.71666666666667 +lat_2=42.68333333333333 +lat_0=41 +lon_0=-71.5 +x_0=200000 +y_0=750000.0000000001 +datum=NAD83 +units=us-ft +no_defs 
##   OBJECTID             Name      Acres Neighborho SqMiles  ShapeSTAre
## 1       27       Roslindale 1605.56824         15    2.51  69938272.9
## 2       28    Jamaica Plain 2519.24539         11    3.94 109737890.8
## 3       29     Mission Hill  350.85356         13    0.55  15283120.0
## 4       30         Longwood  188.61195         28    0.29   8215903.5
## 5       31      Bay Village   26.53984         33    0.04   1156070.8
## 6       32 Leather District   15.63991         27    0.02    681271.7
##   ShapeSTLen                       geometry
## 1  53563.913 MULTIPOLYGON (((757409.1 29...
## 2  56349.937 MULTIPOLYGON (((762983.8 29...
## 3  17918.724 MULTIPOLYGON (((766903.6 29...
## 4  11908.757 MULTIPOLYGON (((764826.9 29...
## 5   4650.635 MULTIPOLYGON (((773315.7 29...
## 6   3237.141 MULTIPOLYGON (((775544.1 29...
names(Barrios_Boston_OK)
## [1] "OBJECTID"   "Name"       "Acres"      "Neighborho" "SqMiles"   
## [6] "ShapeSTAre" "ShapeSTLen" "geometry"

Vamos a explorar el dataset Barrios_Boston, utilizando el comando plot, para tener una rápida observación del mismo.

plot(Barrios_Boston_OK)

Observamos las 7 variables de nuestro dataset de forma correcta, algunas mas relevantes que otras.

Ahora cruzamos nuestros datos de Barrios y Bibliotecas Públicas en un mapa a ver que observamos:

ggplot() +
  geom_sf(data = Barrios_Boston_OK) +
  geom_sf(data = Bibliotecas_Publicas_Ordenadas,
          color = "red")

#### Observamos bibliotecas asignadas a los distintos barrios.

Verificamos los sistemas de coordenadas de ambos datasets, y si no coinciden los transformamos a EPSG:4326.

st_crs(Barrios_Boston_OK)
## Coordinate Reference System:
##   No user input
##   wkt:
## PROJCS["NAD_1983_StatePlane_Massachusetts_Mainland_FIPS_2001_Feet",
##     GEOGCS["GCS_North_American_1983",
##         DATUM["North_American_Datum_1983",
##             SPHEROID["GRS_1980",6378137.0,298.257222101]],
##         PRIMEM["Greenwich",0.0],
##         UNIT["Degree",0.017453292519943295],
##         AUTHORITY["EPSG","4269"]],
##     PROJECTION["Lambert_Conformal_Conic_2SP"],
##     PARAMETER["False_Easting",656166.6666666665],
##     PARAMETER["False_Northing",2460625.0],
##     PARAMETER["Central_Meridian",-71.5],
##     PARAMETER["Standard_Parallel_1",41.71666666666667],
##     PARAMETER["Standard_Parallel_2",42.68333333333333],
##     PARAMETER["Latitude_Of_Origin",41.0],
##     UNIT["Foot_US",0.30480060960121924]]
st_crs(Bibliotecas_Publicas_Ordenadas)
## Coordinate Reference System:
##   User input: 4326 
##   wkt:
## GEOGCS["WGS 84",
##     DATUM["WGS_1984",
##         SPHEROID["WGS 84",6378137,298.257223563,
##             AUTHORITY["EPSG","7030"]],
##         AUTHORITY["EPSG","6326"]],
##     PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##     UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##     AUTHORITY["EPSG","4326"]]
Barrios_Boston_OK <- st_transform(Barrios_Boston_OK, 4326)

Verificamos si el datset de Barrios se transformo correctamente al sistema de coordenadas de Proyeccion Mercator (4326).

st_crs(Barrios_Boston_OK)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCS["WGS 84",
##     DATUM["WGS_1984",
##         SPHEROID["WGS 84",6378137,298.257223563,
##             AUTHORITY["EPSG","7030"]],
##         AUTHORITY["EPSG","6326"]],
##     PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##     UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##     AUTHORITY["EPSG","4326"]]
head(Bibliotecas_Publicas_Ordenadas)
## Simple feature collection with 6 features and 4 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -71.12214 ymin: 42.2571 xmax: -71.06501 ymax: 42.36145
## CRS:            4326
##   LIBRARIES_      DISTRICT        ST_ADDRESS        BRANCH
## 1          5                151 Cambridge St      West End
## 2         11               690 Washington St Codman Square
## 3         17     HYDE PARK     35 Harvard Av     Hyde Park
## 4         18 JAMAICA PLAIN     433 Centre St      Connolly
## 5         22       ROXBURY     149 Dudley St        Dudley
## 6          1 COPLEY SQUARE   700 Boylston St       Central
##                     geometry
## 1 POINT (-71.06501 42.36145)
## 2 POINT (-71.07097 42.28752)
## 3  POINT (-71.12214 42.2571)
## 4  POINT (-71.1111 42.32065)
## 5 POINT (-71.08385 42.32849)
## 6 POINT (-71.07887 42.34944)
head(Barrios_Boston_OK)
## Simple feature collection with 6 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -71.14779 ymin: 42.26761 xmax: -71.05588 ymax: 42.35237
## CRS:            EPSG:4326
##   OBJECTID             Name      Acres Neighborho SqMiles  ShapeSTAre
## 1       27       Roslindale 1605.56824         15    2.51  69938272.9
## 2       28    Jamaica Plain 2519.24539         11    3.94 109737890.8
## 3       29     Mission Hill  350.85356         13    0.55  15283120.0
## 4       30         Longwood  188.61195         28    0.29   8215903.5
## 5       31      Bay Village   26.53984         33    0.04   1156070.8
## 6       32 Leather District   15.63991         27    0.02    681271.7
##   ShapeSTLen                       geometry
## 1  53563.913 MULTIPOLYGON (((-71.12593 4...
## 2  56349.937 MULTIPOLYGON (((-71.10499 4...
## 3  17918.724 MULTIPOLYGON (((-71.09043 4...
## 4  11908.757 MULTIPOLYGON (((-71.09811 4...
## 5   4650.635 MULTIPOLYGON (((-71.06663 4...
## 6   3237.141 MULTIPOLYGON (((-71.05838 4...

Paso III

Ahora mediante un join espacial, utilizando el comando st_join(). A cada biblioteca le asignaremos el barrio que le corresponda.

Bibliotecas_Publicas_Ordenadas_con_Barrios <- st_join(Bibliotecas_Publicas_Ordenadas, Barrios_Boston_OK)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
head(Bibliotecas_Publicas_Ordenadas_con_Barrios)
## Simple feature collection with 6 features and 11 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -71.12214 ymin: 42.2571 xmax: -71.06501 ymax: 42.36145
## CRS:            4326
##   LIBRARIES_      DISTRICT        ST_ADDRESS        BRANCH OBJECTID
## 1          5                151 Cambridge St      West End       40
## 2         11               690 Washington St Codman Square       48
## 3         17     HYDE PARK     35 Harvard Av     Hyde Park       46
## 4         18 JAMAICA PLAIN     433 Centre St      Connolly       28
## 5         22       ROXBURY     149 Dudley St        Dudley       35
## 6          1 COPLEY SQUARE   700 Boylston St       Central       37
##            Name     Acres Neighborho SqMiles ShapeSTAre ShapeSTLen
## 1      West End  190.4907         31    0.30    8297743   17728.59
## 2    Dorchester 4662.8795          6    7.29  203114217  104344.03
## 3     Hyde Park 2927.2212         10    4.57  127509243   66861.24
## 4 Jamaica Plain 2519.2454         11    3.94  109737891   56349.94
## 5       Roxbury 2108.4691         16    3.29   91844546   49488.80
## 6      Back Bay  399.3144          2    0.62   17394066   19455.67
##                     geometry
## 1 POINT (-71.06501 42.36145)
## 2 POINT (-71.07097 42.28752)
## 3  POINT (-71.12214 42.2571)
## 4  POINT (-71.1111 42.32065)
## 5 POINT (-71.08385 42.32849)
## 6 POINT (-71.07887 42.34944)
names(Bibliotecas_Publicas_Ordenadas_con_Barrios)
##  [1] "LIBRARIES_" "DISTRICT"   "ST_ADDRESS" "BRANCH"     "OBJECTID"  
##  [6] "Name"       "Acres"      "Neighborho" "SqMiles"    "ShapeSTAre"
## [11] "ShapeSTLen" "geometry"

Con este nuevo dataset, estamos en condiciones de saber, cuantas Bibliotecas Públias hay por Barrio = Neighborho. Asimismo, podremos graficar mapas de valores máximos, mínimos, promedios, entre otros, de la cantidad de Bibliotecas que haya por Barrio.

Paso IV

Vamos a graficar Un gráfico de puntos para visualizar la cantidad de bilioteas por barrio. Primero debemos agrupar los bibliotecas por barrio = Neighborho.

Total_Bibliotecas_Publicas_Ordenadas_con_Barrios <- Bibliotecas_Publicas_Ordenadas_con_Barrios %>% 
  group_by(Neighborho) %>% 
  summarise(total=n())

Total_Bibliotecas_Publicas_Ordenadas_con_Barrios
## Simple feature collection with 18 features and 2 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: -71.16788 ymin: 42.2571 xmax: -71.02858 ymax: 42.37777
## CRS:            4326
## # A tibble: 18 x 3
##    Neighborho total                                                     geometry
##    <fct>      <int>                                               <GEOMETRY [°]>
##  1 10             1                                    POINT (-71.12214 42.2571)
##  2 11             2       MULTIPOINT ((-71.11488 42.30863), (-71.1111 42.32065))
##  3 12             2      MULTIPOINT ((-71.09314 42.27752), (-71.06851 42.27355))
##  4 13             1                                   POINT (-71.09908 42.33246)
##  5 14             1                                   POINT (-71.05502 42.36408)
##  6 15             1                                   POINT (-71.12865 42.28565)
##  7 16             2      MULTIPOINT ((-71.09563 42.31406), (-71.08385 42.32849))
##  8 17             1                                   POINT (-71.03872 42.33584)
##  9 19             1                                    POINT (-71.15745 42.2834)
## 10 2              1                                   POINT (-71.07887 42.34944)
## 11 24             1                                   POINT (-71.12811 42.36011)
## 12 25             2      MULTIPOINT ((-71.16788 42.35132), (-71.15291 42.34769))
## 13 31             1                                   POINT (-71.06501 42.36145)
## 14 32             1                                   POINT (-71.07703 42.34133)
## 15 4              1                                   POINT (-71.06444 42.37582)
## 16 6              5 MULTIPOINT ((-71.08119 42.30799), (-71.07097 42.28752), (-7~
## 17 7              1                                   POINT (-71.06306 42.35214)
## 18 8              1                                   POINT (-71.02858 42.37777)

Ahora realizamos el grafico de puntos:

ggplot(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios) +
  geom_point(aes(x=Neighborho, y=total), color="red") + 
  labs(title="Bibliotecas por Barrio",
       subtitle="Boston",
       caption="Fuente:https://data.boston.gov/dataset/public-libraries/resource/ed710c4a-689a-47af-9dd0-6e4215003c24",
       x="Barrio ID",
       y="Total")

Mapeamos las bibliotecas por barrio, distinguiendo por color:

ggplot() +
    geom_sf(data = Barrios_Boston_OK) +
    geom_sf(data = Total_Bibliotecas_Publicas_Ordenadas_con_Barrios, aes(color = Neighborho), size=1) +
  theme(legend.position = "bottom")

Observamos que en los barrios prevalecen 1 Biblioteca pública por Barrio aunque en 4 Barrios se destacan 2 Bibliotecas. Es llamativo en el Barrio ID: 6 la concentración de 5 Bibliotecas públicas, habrá que estudiar a que se debe esto.

Vamos a pintar cada barrio = Neighborho con la cantidad máxima de Bibliotecas Públicas, utilizando un gráfico de coropletas.

Para ello utilizamos al dataset Total_Bibliotecas_Publicas_Ordenadas_con_Barrios y le quitamos la geometría. Luego lo unimos al dataset Barrios_Boston:

Total_Bibliotecas_Publicas_Ordenadas_con_Barrios <- Total_Bibliotecas_Publicas_Ordenadas_con_Barrios %>% 
  st_set_geometry(NULL)

head(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios)
## # A tibble: 6 x 2
##   Neighborho total
##   <fct>      <int>
## 1 10             1
## 2 11             2
## 3 12             2
## 4 13             1
## 5 14             1
## 6 15             1

Modificamos el nombre de la columa Total = Total Bibliotecas Publicas

colnames(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios)[colnames(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios) %in% c('total')] <- paste0(c('Total_Bibliotecas_Publicas'))

head(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios)
## # A tibble: 6 x 2
##   Neighborho Total_Bibliotecas_Publicas
##   <fct>                           <int>
## 1 10                                  1
## 2 11                                  2
## 3 12                                  2
## 4 13                                  1
## 5 14                                  1
## 6 15                                  1

Mediante el comando left_join() unificamos el datasat Total_Bibliotecas_Publicas_Ordenadas_con_Barrios con Barrios_Boston:

Bibliotecas_Barrios_Total <- Barrios_Boston_OK %>% 
  left_join(Total_Bibliotecas_Publicas_Ordenadas_con_Barrios, by="Neighborho")

head(Bibliotecas_Barrios_Total)
## Simple feature collection with 6 features and 8 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -71.14779 ymin: 42.26761 xmax: -71.05588 ymax: 42.35237
## CRS:            EPSG:4326
##   OBJECTID             Name      Acres Neighborho SqMiles  ShapeSTAre
## 1       27       Roslindale 1605.56824         15    2.51  69938272.9
## 2       28    Jamaica Plain 2519.24539         11    3.94 109737890.8
## 3       29     Mission Hill  350.85356         13    0.55  15283120.0
## 4       30         Longwood  188.61195         28    0.29   8215903.5
## 5       31      Bay Village   26.53984         33    0.04   1156070.8
## 6       32 Leather District   15.63991         27    0.02    681271.7
##   ShapeSTLen Total_Bibliotecas_Publicas                       geometry
## 1  53563.913                          1 MULTIPOLYGON (((-71.12593 4...
## 2  56349.937                          2 MULTIPOLYGON (((-71.10499 4...
## 3  17918.724                          1 MULTIPOLYGON (((-71.09043 4...
## 4  11908.757                         NA MULTIPOLYGON (((-71.09811 4...
## 5   4650.635                         NA MULTIPOLYGON (((-71.06663 4...
## 6   3237.141                         NA MULTIPOLYGON (((-71.05838 4...
names(Bibliotecas_Barrios_Total)
## [1] "OBJECTID"                   "Name"                      
## [3] "Acres"                      "Neighborho"                
## [5] "SqMiles"                    "ShapeSTAre"                
## [7] "ShapeSTLen"                 "Total_Bibliotecas_Publicas"
## [9] "geometry"

Finalmente mapeamos las bibliotecas por barrio :

ggplot() +
  geom_sf(data = Bibliotecas_Barrios_Total, aes(fill = Total_Bibliotecas_Publicas)) + 
  geom_sf_text(data = Bibliotecas_Barrios_Total, aes(label = Name), size=2, colour="black") +
  labs(title = "Total Bibliotecas por Barrio",
         subtitle = "Boston",
         fill = " Total Bibliotecas",
         caption= "Fuente: https://data.boston.gov/dataset/boston-neighborhoods y https://data.boston.gov/dataset/public-libraries/resource/ed710c4a-689a-47af-9dd0-6e4215003c24",
         y="",
         x="") +
  scale_fill_gradient(low="yellow", high="red")
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

Se observa como cambia el gradiente por barrio según la cantidad de bibliotecas que hay en cada uno de ellos. Los barrios en color gris refiere que no hay presencia de biblioteas públicas.