R Spatial Lab Assignment # 2
Task 1 (Prerequisite): Set up a R project for the R-Spatial
section.
Task 2: Join the COVID-19 data to the NYC zip code area data (sf or
sp polygons).
# read nyc zip code sf
nyc_zip_sf <- st_read(dsn = "./data/nys/nys.gpkg", layer = "nyc_zipcode") %>% st_transform(2263)
## Reading layer `nyc_zipcode' from data source
## `C:\Users\steve\Downloads\G385_R_Data_Viz\R_Spatial\data\nys\nys.gpkg'
## using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
# read covid zcta
covid2021_04_23 <- read_csv("C:/Users/steve/Downloads/G385_R_Data_Viz/R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_zip_sf <- merge(nyc_zip_sf, covid2021_04_23, by.x='ZIPCODE', by.y='MODIFIED_ZCTA')
# check view
covid_zip_sf %>% mapview::mapview()
Task 3: Aggregate the NYC food retails store data (points) to the
zip code data, so that we know how many retail stores in each zip code
area. Note that not all locations are for food retail. And we need to
choose the specific types according to the data.
# read retail food store sf
rf_sf <- st_read("C:/Users/steve/Downloads/G385_R_Data_Viz/R-Spatial_II_Lab/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source
## `C:\Users\steve\Downloads\G385_R_Data_Viz\R-Spatial_II_Lab\R-Spatial_II_Lab\nycFoodStore.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
#Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.
# filter to see if Establishment type column contains characters A, J, or D
# join with nyc_zip based on contains property
# group by zip code
# count number per zipcode
rf_sf <- rf_sf %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
st_transform(2263) %>%
sf::st_join(nyc_zip_sf, ., join= st_contains) %>%
group_by(ZIPCODE) %>%
summarise(FoodStoreNum = n())
# check view
rf_sf %>% mapview::mapview()
Task 4: Aggregate the NYC health facilities (points) to the zip code
data. Similarly, choose appropriate subtypes such as nursing homes from
the facilities.
# filter NH: Nursing homes
NYS_Health_sf <- st_read(dsn = "./data/nys/nys.gpkg", layer = "NYS_Health_Facilities")
## Reading layer `NYS_Health_Facilities' from data source
## `C:\Users\steve\Downloads\G385_R_Data_Viz\R_Spatial\data\nys\nys.gpkg'
## using driver `GPKG'
## Simple feature collection with 3844 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -79.6299 ymin: 40.51677 xmax: -72.17 ymax: 44.97849
## Geodetic CRS: WGS 84
# filter for only nursing home health facility type
# transform to match zip crs
nycNursingHome <- NYS_Health_sf %>%
dplyr::filter(Short.Description == 'NH') %>%
st_transform(st_crs(nyc_zip_sf))
nycNursingHome <- sf::st_join(nyc_zip_sf, nycNursingHome, join=st_contains) %>%
group_by(ZIPCODE) %>%
summarise(NH_num = n())
nycNursingHome %>% mapview::mapview()
Task 5: Join the Census ACS population, race, and age data to the
NYC Planning Census Tract Data.
# read acs
# select only necessary columns
# remove metadata row
# to join:
# convert leading fips codes to match census codes
# select last 8 digits of GEO_ID
# convert first two letters to fips codes
acsData <- read.csv("C:/Users/steve/Downloads/G385_R_Data_Viz/R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
dplyr::select(
GEO_ID,
pop_total = DP05_0001E,
elder65_pop = DP05_0024E,
male_pop = DP05_0002E,
female_pop = DP05_0003E,
white_pop = DP05_0037E,
black_pop = DP05_0038E,
asian_pop = DP05_0067E,
hispanic_pop = DP05_0071E,
adult_pop = DP05_0021E,
citizen_adult = DP05_0087E
) %>%
dplyr::slice(-1) %>%
dplyr::mutate(
boro_part = substr(GEO_ID, 13,20),
boro_ct = case_when(
substr(boro_part, 1,2) == '61' ~ paste0('1', substr(boro_part,3,8)),
substr(boro_part, 1,2) == '47' ~ paste0('3', substr(boro_part,3,8)),
substr(boro_part, 1,2) == '05' ~ paste0('2', substr(boro_part,3,8)),
substr(boro_part, 1,2) == '81' ~ paste0('4', substr(boro_part,3,8)),
substr(boro_part, 1,2) == '85' ~ paste0('59', substr(boro_part,3,8))
)
)
str(acsData)
## 'data.frame': 2167 obs. of 13 variables:
## $ GEO_ID : chr "1400000US36005000100" "1400000US36005000200" "1400000US36005000400" "1400000US36005001600" ...
## $ pop_total : chr "7080" "4542" "5634" "5917" ...
## $ elder65_pop : chr "51" "950" "710" "989" ...
## $ male_pop : chr "6503" "2264" "2807" "2365" ...
## $ female_pop : chr "577" "2278" "2827" "3552" ...
## $ white_pop : chr "1773" "2165" "2623" "2406" ...
## $ black_pop : chr "4239" "1279" "1699" "2434" ...
## $ asian_pop : chr "130" "119" "226" "68" ...
## $ hispanic_pop : chr "2329" "3367" "3873" "3603" ...
## $ adult_pop : chr "6909" "3582" "4507" "4416" ...
## $ citizen_adult: chr "6100" "2952" "4214" "3851" ...
## $ boro_part : chr "05000100" "05000200" "05000400" "05001600" ...
## $ boro_ct : chr "2000100" "2000200" "2000400" "2001600" ...
# read census tracts as sf
# transform to match zip
census_tract_2010 <- st_read("C:/Users/steve/Downloads/G385_R_Data_Viz/R-Spatial_II_Lab/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")%>%
st_transform(st_crs(nyc_zip_sf))
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\steve\Downloads\G385_R_Data_Viz\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
# merge census tract shp sf with acs table
census_tract_2010 <- merge(census_tract_2010, acsData, by.x='boro_ct201', by.y='boro_ct')
str(census_tract_2010)
## Classes 'sf' and 'data.frame': 2055 obs. of 24 variables:
## $ boro_ct201 : chr "1000100" "1000201" "1000202" "1000500" ...
## $ boro_code : chr "1" "1" "1" "1" ...
## $ boro_name : chr "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil : chr "I" "E" "E" "I" ...
## $ ct2010 : chr "000100" "000201" "000202" "000500" ...
## $ ctlabel : chr "1" "2.01" "2.02" "5" ...
## $ ntacode : chr "MN99" "MN28" "MN28" "MN99" ...
## $ ntaname : chr "park-cemetery-etc-Manhattan" "Lower East Side" "Lower East Side" "park-cemetery-etc-Manhattan" ...
## $ puma : chr "3810" "3809" "3809" "3810" ...
## $ shape_area : num 1844421 971599 3315114 9083215 2583418 ...
## $ shape_leng : num 11023 4748 8568 32642 6971 ...
## $ GEO_ID : chr "1400000US36061000100" "1400000US36061000201" "1400000US36061000202" "1400000US36061000500" ...
## $ pop_total : chr "0" "2835" "7764" "0" ...
## $ elder65_pop : chr "0" "521" "1524" "0" ...
## $ male_pop : chr "0" "1218" "3364" "0" ...
## $ female_pop : chr "0" "1617" "4400" "0" ...
## $ white_pop : chr "0" "455" "2794" "0" ...
## $ black_pop : chr "0" "342" "1713" "0" ...
## $ asian_pop : chr "0" "1317" "1914" "0" ...
## $ hispanic_pop : chr "0" "1265" "2915" "0" ...
## $ adult_pop : chr "0" "2200" "6393" "0" ...
## $ citizen_adult: chr "0" "1682" "5899" "0" ...
## $ boro_part : chr "61000100" "61000201" "61000202" "61000500" ...
## $ geometry :sfc_MULTIPOLYGON of length 2055; first list element: List of 2
## ..$ :List of 1
## .. ..$ : num [1:48, 1:2] 972082 972185 972399 972385 972407 ...
## ..$ :List of 1
## .. ..$ : num [1:30, 1:2] 973173 973311 973330 973793 973686 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:23] "boro_ct201" "boro_code" "boro_name" "cdeligibil" ...
census_tract_2010 %>% mapview::mapview()
Task 6: Aggregate the ACS census data to zip code area data.
# aggregate to zip
census_zip <- sf::st_join(nyc_zip_sf, census_tract_2010 %>% sf::st_centroid(),, join=st_contains) %>%
group_by(ZIPCODE) %>%
summarize(
pop_total = sum(as.numeric(pop_total)),
elder65_pop = sum(as.numeric(elder65_pop)),
male_pop = sum(as.numeric(male_pop)),
malePctg = sum(as.numeric(male_pop))/pop_total*100,
female_pop = sum(as.numeric(female_pop)),
white_pop = sum(as.numeric(white_pop)),
black_pop = sum(as.numeric(black_pop)),
hispanic_pop = sum(as.numeric(hispanic_pop)),
adult_pop = sum(as.numeric(adult_pop)),
citizen_adult = sum(as.numeric(citizen_adult))
)
## Warning: st_centroid assumes attributes are constant over geometries
str(census_zip)
## sf [248 × 12] (S3: sf/tbl_df/tbl/data.frame)
## $ ZIPCODE : chr [1:248] "00083" "10001" "10002" "10003" ...
## $ pop_total : num [1:248] 3 19146 74310 53487 NA ...
## $ elder65_pop : num [1:248] 0 2500 15815 6296 NA ...
## $ male_pop : num [1:248] 0 9799 35957 26930 NA ...
## $ malePctg : num [1:248] 0 51.2 48.4 50.3 NA ...
## $ female_pop : num [1:248] 3 9347 38353 26557 NA ...
## $ white_pop : num [1:248] 1 12485 24033 40503 NA ...
## $ black_pop : num [1:248] 2 1092 5969 3130 NA ...
## $ hispanic_pop : num [1:248] 1 2435 20065 4375 NA ...
## $ adult_pop : num [1:248] 2 17658 64499 49620 NA ...
## $ citizen_adult: num [1:248] 2 13800 52528 42971 NA ...
## $ geom :sfc_GEOMETRY of length 248; first list element: List of 1
## ..$ : num [1:216, 1:2] 998310 998283 998251 998178 998050 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geom"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "ZIPCODE" "pop_total" "elder65_pop" "male_pop" ...
census_zip[0:10] %>% mapview::mapview()
Join them all and View
- I’m not sure how to disable zipcode column repeating
mega <- sf::st_join(rf_sf, nycNursingHome, join=st_equals)
mega <- sf::st_join(mega, covid_zip_sf, join=st_equals)
mega <- sf::st_join(mega, census_zip, join=st_equals)
## New names:
## • `ZIPCODE.x` -> `ZIPCODE.x...1`
## • `ZIPCODE.y` -> `ZIPCODE.y...4`
## • `ZIPCODE.x` -> `ZIPCODE.x...6`
## • `ZIPCODE.y` -> `ZIPCODE.y...30`
mega[0:20] %>% mapview::mapview()