R Spaital Lab Assignment # 2

task 1:

Join the COVID-19 data to the NYC zip code area data (sf polygons).

#read zip code layer from geopackage
zipcodes <- st_read("ny_data.gpkg", layer = 'nyc_zip_codes')
## Reading layer `nyc_zip_codes' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
zipcodes_4326 <- st_transform(zipcodes, 4326)
coviddata <- read_csv("tests-by-zcta_2021_04_23.csv",show_col_types = FALSE)
covid_sf <- st_as_sf(coviddata,
                     coords = c('lon','lat'),
                     crs = 4326)
#join and group covid numbers by zip code
zipcovid <-covid_sf %>% 
  st_join(zipcodes_4326, ., join = st_contains) %>%
  group_by(ZIPCODE) %>%
  select(ZIPCODE, POPULATION, PO_NAME, AREA, COUNTY, ST_FIPS, CTY_FIPS, COVID_CASE_COUNT, COVID_CASE_RATE,
         POP_DENOMINATOR, COVID_DEATH_COUNT, COVID_DEATH_RATE, PERCENT_POSITIVE, TOTAL_COVID_TESTS, geom)

kable(head(zipcovid, n=5))
ZIPCODE POPULATION PO_NAME AREA COUNTY ST_FIPS CTY_FIPS COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS geom
11436 18681 Jamaica 22699295 Queens 36 081 1888 9419.96 20042.54 64 319.32 17.57 11082 POLYGON ((-73.80585 40.6829…
11213 62426 Brooklyn 29631004 Kings 36 047 5166 7996.75 64601.26 203 314.24 13.72 38560 POLYGON ((-73.9374 40.67973…
11212 83866 Brooklyn 41972104 Kings 36 047 7182 9709.74 73966.99 330 446.14 15.64 47319 POLYGON ((-73.90294 40.6708…
11225 56527 Brooklyn 23698630 Kings 36 047 3833 6664.50 57513.69 177 307.75 11.62 33709 POLYGON ((-73.95797 40.6706…
11218 72280 Brooklyn 36868799 Kings 36 047 6199 8377.49 73995.92 218 294.61 13.93 45884 POLYGON ((-73.97208 40.6506…

task 2

food stores to zip code aggregation for food retail

foodstores <- st_read('ny_data.gpkg', layer= 'food_stores')
## Reading layer `food_stores' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
#multipolygons were creating NA values when spatial joining so cast all polygons to single
#removed st_cast() to polygon, because it was causing errors in R environment
#filter for retail by article 20-c liscence stores
zipcodes_t2 <- foodstores %>%
  filter(str_detect(Establishment.Type, "(B|C|E|G|H)")) %>%
  st_join(zipcovid, ., join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME) %>%
  summarize(
    food_stores = n(),
    case_count = if_else(all(is.na(COVID_CASE_COUNT)), 0, max(COVID_CASE_COUNT, na.rm = TRUE)),
    case_rate = if_else(all(is.na(COVID_CASE_RATE)), 0, max(COVID_CASE_RATE, na.rm = TRUE)),
    pop_denominator = if_else(all(is.na(POP_DENOMINATOR)), 0, max(POP_DENOMINATOR, na.rm = TRUE)),
    death_count = if_else(all(is.na(COVID_DEATH_COUNT)), 0, max(COVID_DEATH_COUNT, na.rm = TRUE)),
    death_rate = if_else(all(is.na(COVID_DEATH_RATE)), 0, max(COVID_DEATH_RATE, na.rm = TRUE)),
    percent_positive = if_else(all(is.na(PERCENT_POSITIVE)), 0, max(PERCENT_POSITIVE, na.rm = TRUE)),
    tests = if_else(all(is.na(TOTAL_COVID_TESTS)), 0, max(TOTAL_COVID_TESTS, na.rm = TRUE))
  )
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.
#replace NAs with zero before pulling max count
  
#view first 5 rows
kable(head(zipcodes_t2, n=5))
ZIPCODE PO_NAME food_stores case_count case_rate pop_denominator death_count death_rate percent_positive tests geom
00083 Central Park 1 0 0.00 0.00 0 0.00 0.00 0 POLYGON ((-73.94969 40.7970…
10001 New York 38 1542 5584.31 27613.09 35 126.75 7.86 20158 POLYGON ((-74.00771 40.7524…
10002 New York 146 5902 7835.62 75322.71 264 350.49 12.63 48197 POLYGON ((-73.9747 40.73653…
10003 New York 67 2803 5192.87 53977.81 48 88.93 6.93 41076 POLYGON ((-73.98011 40.7351…
10004 New York 7 247 8310.58 2972.12 2 67.29 6.92 3599 MULTIPOLYGON (((-74.01119 4…
zipcodes_t2 %>% mapview(zcol='food_stores')

task 3

Aggregate the NYC health facilities (points) to the zip code data. Similarly, Nursing Homes only.

health_fac <- st_read("ny_data.gpkg", layer='health_facilities')
## Reading layer `health_facilities' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 3843 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: 40.51677 xmax: -72.17 ymax: 44.97849
## Geodetic CRS:  WGS 84
NursingHomes <- health_fac %>% filter(Short.Description== 'NH') %>%
  filter(str_detect(Facility.County, '(Bronx|Kings|New York|Queens|Richmond)'))

nh_zip <- NursingHomes %>% 
  st_join(zipcodes_t2,., join=st_contains) %>%
  group_by(ZIPCODE, PO_NAME) %>%
  summarize(
    nursing_homes= n()
  )
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.
#convert sf to df to join nursing home count into zip code
nhzip1 <-as.data.frame(nh_zip)
zipcodes_t3 <- left_join(zipcodes_t2, nhzip1, by = 'ZIPCODE')
## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 210 of `x` matches multiple rows in `y`.
## ℹ Row 210 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
kable(head(zipcodes_t3, n=5))
ZIPCODE PO_NAME.x food_stores case_count case_rate pop_denominator death_count death_rate percent_positive tests geom.x PO_NAME.y nursing_homes geom.y
00083 Central Park 1 0 0.00 0.00 0 0.00 0.00 0 POLYGON ((-73.94969 40.7970… Central Park 1 POLYGON ((-73.94969 40.7970…
10001 New York 38 1542 5584.31 27613.09 35 126.75 7.86 20158 POLYGON ((-74.00771 40.7524… New York 1 POLYGON ((-74.00771 40.7524…
10002 New York 146 5902 7835.62 75322.71 264 350.49 12.63 48197 POLYGON ((-73.9747 40.73653… New York 2 POLYGON ((-73.9747 40.73653…
10003 New York 67 2803 5192.87 53977.81 48 88.93 6.93 41076 POLYGON ((-73.98011 40.7351… New York 1 POLYGON ((-73.98011 40.7351…
10004 New York 7 247 8310.58 2972.12 2 67.29 6.92 3599 MULTIPOLYGON (((-74.01119 4… New York 1 MULTIPOLYGON (((-74.01119 4…
zipcodes_t3 %>% mapview(zcol='nursing_homes')

task 4

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
tracts <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
tracts <-tracts %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

tractpop <- merge(tracts, acsData, by.x = "tractFIPS", by.y="censusCode") 
tractpop <- st_set_crs(tractpop, 4326)

kable(head(tractpop, n=5))
tractFIPS boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode ntaname puma shape_area shape_leng cntyFIPS GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop asianPop hispanicPop adultPop citizenAdult geometry
005000100 2 2000100 Bronx I 000100 1 BX98 Rikers Island 3710 18163828 18898.116 005 1400000US36005000100 7080 51 6503 577 1773 4239 130 2329 6909 6100 MULTIPOLYGON (((-73.87287 4…
005000200 2 2000200 Bronx I 000200 2 BX09 Soundview-Castle Hill-Clason Point-Harding Park 3709 5006558 15610.702 005 1400000US36005000200 4542 950 2264 2278 2165 1279 119 3367 3582 2952 MULTIPOLYGON (((-73.85652 4…
005000400 2 2000400 Bronx I 000400 4 BX09 Soundview-Castle Hill-Clason Point-Harding Park 3709 8561175 24725.472 005 1400000US36005000400 5634 710 2807 2827 2623 1699 226 3873 4507 4214 MULTIPOLYGON (((-73.84611 4…
005001600 2 2001600 Bronx E 001600 16 BX09 Soundview-Castle Hill-Clason Point-Harding Park 3709 5221330 9671.306 005 1400000US36005001600 5917 989 2365 3552 2406 2434 68 3603 4416 3851 MULTIPOLYGON (((-73.85514 4…
005001900 2 2001900 Bronx I 001900 19 BX39 Mott Haven-Port Morris 3710 17961359 29999.575 005 1400000US36005001900 2765 76 1363 1402 585 1041 130 1413 2008 1787 MULTIPOLYGON (((-73.89681 4…

task 5

aggregate tract population data to zip code

zipcodes_t5<- zipcodes_t3 %>%
  st_join(tractpop %>% st_centroid(), join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME.x, boro_name) %>%
  summarize(
    case_count = max(case_count, na.rm = TRUE),
    tests = max(tests, na.rm = TRUE),
    food_retail = max(food_stores),
    nursing_homes = max(nursing_homes),
    totPop = sum(totPop),
    elderlyPop = sum(elderlyPop),
    malePop = sum(malePop),
    femalePop = sum(femalePop),
    asianPop = sum(asianPop),
    blackPop = sum(blackPop),
    hispanicPop = sum(hispanicPop),
    whitePop = sum(whitePop)
  ) 
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME.x'. You can override
## using the `.groups` argument.
kable(head(zipcodes_t5, n=5))
ZIPCODE PO_NAME.x boro_name case_count tests food_retail nursing_homes totPop elderlyPop malePop femalePop asianPop blackPop hispanicPop whitePop geom.x
00083 Central Park Manhattan 0 0 1 1 3 0 0 3 0 2 1 1 POLYGON ((-73.94969 40.7970…
10001 New York Manhattan 1542 20158 38 1 19146 2500 9799 9347 4837 1092 2435 12485 POLYGON ((-74.00771 40.7524…
10002 New York Manhattan 5902 48197 146 2 74310 15815 35957 38353 32149 5969 20065 24033 POLYGON ((-73.9747 40.73653…
10003 New York Manhattan 2803 41076 67 1 53487 6296 26930 26557 8027 3130 4375 40503 POLYGON ((-73.98011 40.7351…
10004 New York Manhattan 247 3599 7 1 1731 52 948 783 358 71 150 1207 MULTIPOLYGON (((-74.01119 4…
zipcodes_t5 %>% mapview(zcol='totPop')