Join the COVID-19 data to the NYC zip code area data (sf polygons).
#read zip code layer from geopackage
zipcodes <- st_read("ny_data.gpkg", layer = 'nyc_zip_codes')
## Reading layer `nyc_zip_codes' from data source
## `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
zipcodes_4326 <- st_transform(zipcodes, 4326)
coviddata <- read_csv("tests-by-zcta_2021_04_23.csv",show_col_types = FALSE)
covid_sf <- st_as_sf(coviddata,
coords = c('lon','lat'),
crs = 4326)
#join and group covid numbers by zip code
zipcovid <-covid_sf %>%
st_join(zipcodes_4326, ., join = st_contains) %>%
group_by(ZIPCODE) %>%
select(ZIPCODE, POPULATION, PO_NAME, AREA, COUNTY, ST_FIPS, CTY_FIPS, COVID_CASE_COUNT, COVID_CASE_RATE,
POP_DENOMINATOR, COVID_DEATH_COUNT, COVID_DEATH_RATE, PERCENT_POSITIVE, TOTAL_COVID_TESTS, geom)
kable(head(zipcovid, n=5))
| ZIPCODE | POPULATION | PO_NAME | AREA | COUNTY | ST_FIPS | CTY_FIPS | COVID_CASE_COUNT | COVID_CASE_RATE | POP_DENOMINATOR | COVID_DEATH_COUNT | COVID_DEATH_RATE | PERCENT_POSITIVE | TOTAL_COVID_TESTS | geom |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11436 | 18681 | Jamaica | 22699295 | Queens | 36 | 081 | 1888 | 9419.96 | 20042.54 | 64 | 319.32 | 17.57 | 11082 | POLYGON ((-73.80585 40.6829… |
| 11213 | 62426 | Brooklyn | 29631004 | Kings | 36 | 047 | 5166 | 7996.75 | 64601.26 | 203 | 314.24 | 13.72 | 38560 | POLYGON ((-73.9374 40.67973… |
| 11212 | 83866 | Brooklyn | 41972104 | Kings | 36 | 047 | 7182 | 9709.74 | 73966.99 | 330 | 446.14 | 15.64 | 47319 | POLYGON ((-73.90294 40.6708… |
| 11225 | 56527 | Brooklyn | 23698630 | Kings | 36 | 047 | 3833 | 6664.50 | 57513.69 | 177 | 307.75 | 11.62 | 33709 | POLYGON ((-73.95797 40.6706… |
| 11218 | 72280 | Brooklyn | 36868799 | Kings | 36 | 047 | 6199 | 8377.49 | 73995.92 | 218 | 294.61 | 13.93 | 45884 | POLYGON ((-73.97208 40.6506… |
food stores to zip code aggregation for food retail
foodstores <- st_read('ny_data.gpkg', layer= 'food_stores')
## Reading layer `food_stores' from data source
## `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
#multipolygons were creating NA values when spatial joining so cast all polygons to single
#removed st_cast() to polygon, because it was causing errors in R environment
#filter for retail by article 20-c liscence stores
zipcodes_t2 <- foodstores %>%
filter(str_detect(Establishment.Type, "(B|C|E|G|H)")) %>%
st_join(zipcovid, ., join = st_contains) %>%
group_by(ZIPCODE, PO_NAME) %>%
summarize(
food_stores = n(),
case_count = if_else(all(is.na(COVID_CASE_COUNT)), 0, max(COVID_CASE_COUNT, na.rm = TRUE)),
case_rate = if_else(all(is.na(COVID_CASE_RATE)), 0, max(COVID_CASE_RATE, na.rm = TRUE)),
pop_denominator = if_else(all(is.na(POP_DENOMINATOR)), 0, max(POP_DENOMINATOR, na.rm = TRUE)),
death_count = if_else(all(is.na(COVID_DEATH_COUNT)), 0, max(COVID_DEATH_COUNT, na.rm = TRUE)),
death_rate = if_else(all(is.na(COVID_DEATH_RATE)), 0, max(COVID_DEATH_RATE, na.rm = TRUE)),
percent_positive = if_else(all(is.na(PERCENT_POSITIVE)), 0, max(PERCENT_POSITIVE, na.rm = TRUE)),
tests = if_else(all(is.na(TOTAL_COVID_TESTS)), 0, max(TOTAL_COVID_TESTS, na.rm = TRUE))
)
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.
#replace NAs with zero before pulling max count
#view first 5 rows
kable(head(zipcodes_t2, n=5))
| ZIPCODE | PO_NAME | food_stores | case_count | case_rate | pop_denominator | death_count | death_rate | percent_positive | tests | geom |
|---|---|---|---|---|---|---|---|---|---|---|
| 00083 | Central Park | 1 | 0 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0 | POLYGON ((-73.94969 40.7970… |
| 10001 | New York | 38 | 1542 | 5584.31 | 27613.09 | 35 | 126.75 | 7.86 | 20158 | POLYGON ((-74.00771 40.7524… |
| 10002 | New York | 146 | 5902 | 7835.62 | 75322.71 | 264 | 350.49 | 12.63 | 48197 | POLYGON ((-73.9747 40.73653… |
| 10003 | New York | 67 | 2803 | 5192.87 | 53977.81 | 48 | 88.93 | 6.93 | 41076 | POLYGON ((-73.98011 40.7351… |
| 10004 | New York | 7 | 247 | 8310.58 | 2972.12 | 2 | 67.29 | 6.92 | 3599 | MULTIPOLYGON (((-74.01119 4… |
zipcodes_t2 %>% mapview(zcol='food_stores')
Aggregate the NYC health facilities (points) to the zip code data. Similarly, Nursing Homes only.
health_fac <- st_read("ny_data.gpkg", layer='health_facilities')
## Reading layer `health_facilities' from data source
## `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 3843 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -79.6299 ymin: 40.51677 xmax: -72.17 ymax: 44.97849
## Geodetic CRS: WGS 84
NursingHomes <- health_fac %>% filter(Short.Description== 'NH') %>%
filter(str_detect(Facility.County, '(Bronx|Kings|New York|Queens|Richmond)'))
nh_zip <- NursingHomes %>%
st_join(zipcodes_t2,., join=st_contains) %>%
group_by(ZIPCODE, PO_NAME) %>%
summarize(
nursing_homes= n()
)
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.
#convert sf to df to join nursing home count into zip code
nhzip1 <-as.data.frame(nh_zip)
zipcodes_t3 <- left_join(zipcodes_t2, nhzip1, by = 'ZIPCODE')
## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 210 of `x` matches multiple rows in `y`.
## ℹ Row 210 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
kable(head(zipcodes_t3, n=5))
| ZIPCODE | PO_NAME.x | food_stores | case_count | case_rate | pop_denominator | death_count | death_rate | percent_positive | tests | geom.x | PO_NAME.y | nursing_homes | geom.y |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 00083 | Central Park | 1 | 0 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0 | POLYGON ((-73.94969 40.7970… | Central Park | 1 | POLYGON ((-73.94969 40.7970… |
| 10001 | New York | 38 | 1542 | 5584.31 | 27613.09 | 35 | 126.75 | 7.86 | 20158 | POLYGON ((-74.00771 40.7524… | New York | 1 | POLYGON ((-74.00771 40.7524… |
| 10002 | New York | 146 | 5902 | 7835.62 | 75322.71 | 264 | 350.49 | 12.63 | 48197 | POLYGON ((-73.9747 40.73653… | New York | 2 | POLYGON ((-73.9747 40.73653… |
| 10003 | New York | 67 | 2803 | 5192.87 | 53977.81 | 48 | 88.93 | 6.93 | 41076 | POLYGON ((-73.98011 40.7351… | New York | 1 | POLYGON ((-73.98011 40.7351… |
| 10004 | New York | 7 | 247 | 8310.58 | 2972.12 | 2 | 67.29 | 6.92 | 3599 | MULTIPOLYGON (((-74.01119 4… | New York | 1 | MULTIPOLYGON (((-74.01119 4… |
zipcodes_t3 %>% mapview(zcol='nursing_homes')
Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
tracts <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
tracts <-tracts %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
tractpop <- merge(tracts, acsData, by.x = "tractFIPS", by.y="censusCode")
tractpop <- st_set_crs(tractpop, 4326)
kable(head(tractpop, n=5))
| tractFIPS | boro_code | boro_ct201 | boro_name | cdeligibil | ct2010 | ctlabel | ntacode | ntaname | puma | shape_area | shape_leng | cntyFIPS | GEO_ID | totPop | elderlyPop | malePop | femalePop | whitePop | blackPop | asianPop | hispanicPop | adultPop | citizenAdult | geometry |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 005000100 | 2 | 2000100 | Bronx | I | 000100 | 1 | BX98 | Rikers Island | 3710 | 18163828 | 18898.116 | 005 | 1400000US36005000100 | 7080 | 51 | 6503 | 577 | 1773 | 4239 | 130 | 2329 | 6909 | 6100 | MULTIPOLYGON (((-73.87287 4… |
| 005000200 | 2 | 2000200 | Bronx | I | 000200 | 2 | BX09 | Soundview-Castle Hill-Clason Point-Harding Park | 3709 | 5006558 | 15610.702 | 005 | 1400000US36005000200 | 4542 | 950 | 2264 | 2278 | 2165 | 1279 | 119 | 3367 | 3582 | 2952 | MULTIPOLYGON (((-73.85652 4… |
| 005000400 | 2 | 2000400 | Bronx | I | 000400 | 4 | BX09 | Soundview-Castle Hill-Clason Point-Harding Park | 3709 | 8561175 | 24725.472 | 005 | 1400000US36005000400 | 5634 | 710 | 2807 | 2827 | 2623 | 1699 | 226 | 3873 | 4507 | 4214 | MULTIPOLYGON (((-73.84611 4… |
| 005001600 | 2 | 2001600 | Bronx | E | 001600 | 16 | BX09 | Soundview-Castle Hill-Clason Point-Harding Park | 3709 | 5221330 | 9671.306 | 005 | 1400000US36005001600 | 5917 | 989 | 2365 | 3552 | 2406 | 2434 | 68 | 3603 | 4416 | 3851 | MULTIPOLYGON (((-73.85514 4… |
| 005001900 | 2 | 2001900 | Bronx | I | 001900 | 19 | BX39 | Mott Haven-Port Morris | 3710 | 17961359 | 29999.575 | 005 | 1400000US36005001900 | 2765 | 76 | 1363 | 1402 | 585 | 1041 | 130 | 1413 | 2008 | 1787 | MULTIPOLYGON (((-73.89681 4… |
aggregate tract population data to zip code
zipcodes_t5<- zipcodes_t3 %>%
st_join(tractpop %>% st_centroid(), join = st_contains) %>%
group_by(ZIPCODE, PO_NAME.x, boro_name) %>%
summarize(
case_count = max(case_count, na.rm = TRUE),
tests = max(tests, na.rm = TRUE),
food_retail = max(food_stores),
nursing_homes = max(nursing_homes),
totPop = sum(totPop),
elderlyPop = sum(elderlyPop),
malePop = sum(malePop),
femalePop = sum(femalePop),
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop)
)
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME.x'. You can override
## using the `.groups` argument.
kable(head(zipcodes_t5, n=5))
| ZIPCODE | PO_NAME.x | boro_name | case_count | tests | food_retail | nursing_homes | totPop | elderlyPop | malePop | femalePop | asianPop | blackPop | hispanicPop | whitePop | geom.x |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 00083 | Central Park | Manhattan | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 3 | 0 | 2 | 1 | 1 | POLYGON ((-73.94969 40.7970… |
| 10001 | New York | Manhattan | 1542 | 20158 | 38 | 1 | 19146 | 2500 | 9799 | 9347 | 4837 | 1092 | 2435 | 12485 | POLYGON ((-74.00771 40.7524… |
| 10002 | New York | Manhattan | 5902 | 48197 | 146 | 2 | 74310 | 15815 | 35957 | 38353 | 32149 | 5969 | 20065 | 24033 | POLYGON ((-73.9747 40.73653… |
| 10003 | New York | Manhattan | 2803 | 41076 | 67 | 1 | 53487 | 6296 | 26930 | 26557 | 8027 | 3130 | 4375 | 40503 | POLYGON ((-73.98011 40.7351… |
| 10004 | New York | Manhattan | 247 | 3599 | 7 | 1 | 1731 | 52 | 948 | 783 | 358 | 71 | 150 | 1207 | MULTIPOLYGON (((-74.01119 4… |
zipcodes_t5 %>% mapview(zcol='totPop')