Hunter_Rspatial_24S_GTECH39300

R Spaital Lab Assignment # 2

task 1:

Join the COVID-19 data to the NYC zip code area data (sf polygons).

#read zip code layer from geopackage
zipcodes <- st_read("ny_data.gpkg", layer = 'nyc_zip_codes')

## Reading layer `nyc_zip_codes' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

zipcodes_4326 <- st_transform(zipcodes, 4326)
coviddata <- read_csv("tests-by-zcta_2021_04_23.csv",show_col_types = FALSE)
covid_sf <- st_as_sf(coviddata,
                     coords = c('lon','lat'),
                     crs = 4326)
#join and group covid numbers by zip code
zipcovid <-covid_sf %>% 
  st_join(zipcodes_4326, ., join = st_contains) %>%
  group_by(ZIPCODE) %>%
  select(ZIPCODE, POPULATION, PO_NAME, AREA, COUNTY, ST_FIPS, CTY_FIPS, COVID_CASE_COUNT, COVID_CASE_RATE,
         POP_DENOMINATOR, COVID_DEATH_COUNT, COVID_DEATH_RATE, PERCENT_POSITIVE, TOTAL_COVID_TESTS, geom)

kable(head(zipcovid, n=5))

ZIPCODE	POPULATION	PO_NAME	AREA	COUNTY	ST_FIPS	CTY_FIPS	COVID_CASE_COUNT	COVID_CASE_RATE	POP_DENOMINATOR	COVID_DEATH_COUNT	COVID_DEATH_RATE	PERCENT_POSITIVE	TOTAL_COVID_TESTS	geom
11436	18681	Jamaica	22699295	Queens	36	081	1888	9419.96	20042.54	64	319.32	17.57	11082	POLYGON ((-73.80585 40.6829…
11213	62426	Brooklyn	29631004	Kings	36	047	5166	7996.75	64601.26	203	314.24	13.72	38560	POLYGON ((-73.9374 40.67973…
11212	83866	Brooklyn	41972104	Kings	36	047	7182	9709.74	73966.99	330	446.14	15.64	47319	POLYGON ((-73.90294 40.6708…
11225	56527	Brooklyn	23698630	Kings	36	047	3833	6664.50	57513.69	177	307.75	11.62	33709	POLYGON ((-73.95797 40.6706…
11218	72280	Brooklyn	36868799	Kings	36	047	6199	8377.49	73995.92	218	294.61	13.93	45884	POLYGON ((-73.97208 40.6506…

task 2

food stores to zip code aggregation for food retail

foodstores <- st_read('ny_data.gpkg', layer= 'food_stores')

## Reading layer `food_stores' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

#multipolygons were creating NA values when spatial joining so cast all polygons to single
#removed st_cast() to polygon, because it was causing errors in R environment
#filter for retail by article 20-c liscence stores
zipcodes_t2 <- foodstores %>%
  filter(str_detect(Establishment.Type, "(B|C|E|G|H)")) %>%
  st_join(zipcovid, ., join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME) %>%
  summarize(
    food_stores = n(),
    case_count = if_else(all(is.na(COVID_CASE_COUNT)), 0, max(COVID_CASE_COUNT, na.rm = TRUE)),
    case_rate = if_else(all(is.na(COVID_CASE_RATE)), 0, max(COVID_CASE_RATE, na.rm = TRUE)),
    pop_denominator = if_else(all(is.na(POP_DENOMINATOR)), 0, max(POP_DENOMINATOR, na.rm = TRUE)),
    death_count = if_else(all(is.na(COVID_DEATH_COUNT)), 0, max(COVID_DEATH_COUNT, na.rm = TRUE)),
    death_rate = if_else(all(is.na(COVID_DEATH_RATE)), 0, max(COVID_DEATH_RATE, na.rm = TRUE)),
    percent_positive = if_else(all(is.na(PERCENT_POSITIVE)), 0, max(PERCENT_POSITIVE, na.rm = TRUE)),
    tests = if_else(all(is.na(TOTAL_COVID_TESTS)), 0, max(TOTAL_COVID_TESTS, na.rm = TRUE))
  )

## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.

#replace NAs with zero before pulling max count
  
#view first 5 rows
kable(head(zipcodes_t2, n=5))

ZIPCODE	PO_NAME	food_stores	case_count	case_rate	pop_denominator	death_count	death_rate	percent_positive	tests	geom
00083	Central Park	1	0	0.00	0.00	0	0.00	0.00	0	POLYGON ((-73.94969 40.7970…
10001	New York	38	1542	5584.31	27613.09	35	126.75	7.86	20158	POLYGON ((-74.00771 40.7524…
10002	New York	146	5902	7835.62	75322.71	264	350.49	12.63	48197	POLYGON ((-73.9747 40.73653…
10003	New York	67	2803	5192.87	53977.81	48	88.93	6.93	41076	POLYGON ((-73.98011 40.7351…
10004	New York	7	247	8310.58	2972.12	2	67.29	6.92	3599	MULTIPOLYGON (((-74.01119 4…

zipcodes_t2 %>% mapview(zcol='food_stores')

task 3

Aggregate the NYC health facilities (points) to the zip code data. Similarly, Nursing Homes only.

health_fac <- st_read("ny_data.gpkg", layer='health_facilities')

## Reading layer `health_facilities' from data source 
##   `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\ny_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 3843 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: 40.51677 xmax: -72.17 ymax: 44.97849
## Geodetic CRS:  WGS 84

NursingHomes <- health_fac %>% filter(Short.Description== 'NH') %>%
  filter(str_detect(Facility.County, '(Bronx|Kings|New York|Queens|Richmond)'))

nh_zip <- NursingHomes %>% 
  st_join(zipcodes_t2,., join=st_contains) %>%
  group_by(ZIPCODE, PO_NAME) %>%
  summarize(
    nursing_homes= n()
  )

## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.

#convert sf to df to join nursing home count into zip code
nhzip1 <-as.data.frame(nh_zip)
zipcodes_t3 <- left_join(zipcodes_t2, nhzip1, by = 'ZIPCODE')

## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 210 of `x` matches multiple rows in `y`.
## ℹ Row 210 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

kable(head(zipcodes_t3, n=5))

ZIPCODE	PO_NAME.x	food_stores	case_count	case_rate	pop_denominator	death_count	death_rate	percent_positive	tests	geom.x	PO_NAME.y	nursing_homes	geom.y
00083	Central Park	1	0	0.00	0.00	0	0.00	0.00	0	POLYGON ((-73.94969 40.7970…	Central Park	1	POLYGON ((-73.94969 40.7970…
10001	New York	38	1542	5584.31	27613.09	35	126.75	7.86	20158	POLYGON ((-74.00771 40.7524…	New York	1	POLYGON ((-74.00771 40.7524…
10002	New York	146	5902	7835.62	75322.71	264	350.49	12.63	48197	POLYGON ((-73.9747 40.73653…	New York	2	POLYGON ((-73.9747 40.73653…
10003	New York	67	2803	5192.87	53977.81	48	88.93	6.93	41076	POLYGON ((-73.98011 40.7351…	New York	1	POLYGON ((-73.98011 40.7351…
10004	New York	7	247	8310.58	2972.12	2	67.29	6.92	3599	MULTIPOLYGON (((-74.01119 4…	New York	1	MULTIPOLYGON (((-74.01119 4…

zipcodes_t3 %>% mapview(zcol='nursing_homes')

task 4

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
tracts <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Andy\Desktop\R_Course\assignments\Session_8\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

tracts <-tracts %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

tractpop <- merge(tracts, acsData, by.x = "tractFIPS", by.y="censusCode") 
tractpop <- st_set_crs(tractpop, 4326)

kable(head(tractpop, n=5))

tractFIPS	boro_code	boro_ct201	boro_name	cdeligibil	ct2010	ctlabel	ntacode	ntaname	puma	shape_area	shape_leng	cntyFIPS	GEO_ID	totPop	elderlyPop	malePop	femalePop	whitePop	blackPop	asianPop	hispanicPop	adultPop	citizenAdult	geometry
005000100	2	2000100	Bronx	I	000100	1	BX98	Rikers Island	3710	18163828	18898.116	005	1400000US36005000100	7080	51	6503	577	1773	4239	130	2329	6909	6100	MULTIPOLYGON (((-73.87287 4…
005000200	2	2000200	Bronx	I	000200	2	BX09	Soundview-Castle Hill-Clason Point-Harding Park	3709	5006558	15610.702	005	1400000US36005000200	4542	950	2264	2278	2165	1279	119	3367	3582	2952	MULTIPOLYGON (((-73.85652 4…
005000400	2	2000400	Bronx	I	000400	4	BX09	Soundview-Castle Hill-Clason Point-Harding Park	3709	8561175	24725.472	005	1400000US36005000400	5634	710	2807	2827	2623	1699	226	3873	4507	4214	MULTIPOLYGON (((-73.84611 4…
005001600	2	2001600	Bronx	E	001600	16	BX09	Soundview-Castle Hill-Clason Point-Harding Park	3709	5221330	9671.306	005	1400000US36005001600	5917	989	2365	3552	2406	2434	68	3603	4416	3851	MULTIPOLYGON (((-73.85514 4…
005001900	2	2001900	Bronx	I	001900	19	BX39	Mott Haven-Port Morris	3710	17961359	29999.575	005	1400000US36005001900	2765	76	1363	1402	585	1041	130	1413	2008	1787	MULTIPOLYGON (((-73.89681 4…

task 5

aggregate tract population data to zip code

zipcodes_t5<- zipcodes_t3 %>%
  st_join(tractpop %>% st_centroid(), join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME.x, boro_name) %>%
  summarize(
    case_count = max(case_count, na.rm = TRUE),
    tests = max(tests, na.rm = TRUE),
    food_retail = max(food_stores),
    nursing_homes = max(nursing_homes),
    totPop = sum(totPop),
    elderlyPop = sum(elderlyPop),
    malePop = sum(malePop),
    femalePop = sum(femalePop),
    asianPop = sum(asianPop),
    blackPop = sum(blackPop),
    hispanicPop = sum(hispanicPop),
    whitePop = sum(whitePop)
  )

## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME.x'. You can override
## using the `.groups` argument.

kable(head(zipcodes_t5, n=5))

ZIPCODE	PO_NAME.x	boro_name	case_count	tests	food_retail	nursing_homes	totPop	elderlyPop	malePop	femalePop	asianPop	blackPop	hispanicPop	whitePop	geom.x
00083	Central Park	Manhattan	0	0	1	1	3	0	0	3	0	2	1	1	POLYGON ((-73.94969 40.7970…
10001	New York	Manhattan	1542	20158	38	1	19146	2500	9799	9347	4837	1092	2435	12485	POLYGON ((-74.00771 40.7524…
10002	New York	Manhattan	5902	48197	146	2	74310	15815	35957	38353	32149	5969	20065	24033	POLYGON ((-73.9747 40.73653…
10003	New York	Manhattan	2803	41076	67	1	53487	6296	26930	26557	8027	3130	4375	40503	POLYGON ((-73.98011 40.7351…
10004	New York	Manhattan	247	3599	7	1	1731	52	948	783	358	71	150	1207	MULTIPOLYGON (((-74.01119 4…

zipcodes_t5 %>% mapview(zcol='totPop')

Hunter_Rspatial_24S_GTECH39300_24084343

Andy Xie

2024-03-19