Join the COVID-19 data to the NYC zip code area data.
# Read data
zpNYC <- sf::st_read("C:/Users/linwa/Downloads/GTECH785_2025/R_Spatial/Data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp") %>%
st_transform(crs = 4326)
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid19<- read.csv("tests-by-zcta_2021_04_23.csv") %>%
st_as_sf(coords = c("lon", "lat"), crs = 4326)
# Filter data
zpNYC_filter<- zpNYC[, c("ZIPCODE", "COUNTY", "PO_NAME")]
covid19_filter <- covid19[,c("MODIFIED_ZCTA", "COVID_CASE_COUNT", "COVID_DEATH_COUNT", "TOTAL_COVID_TESTS")]
# Not sure why there are duplicates in zip codes
sum(!duplicated(zpNYC_filter$ZIPCODE))
## [1] 248
sum(duplicated(zpNYC_filter$ZIPCODE))
## [1] 15
zpNYC_filter[duplicated(zpNYC_filter$ZIPCODE),]
## Simple feature collection with 15 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.0473 ymin: 40.58123 xmax: -73.74691 ymax: 40.87921
## Geodetic CRS: WGS 84
## First 10 features:
## ZIPCODE COUNTY PO_NAME geometry
## 20 10463 New York Bronx POLYGON ((-73.91544 40.8755...
## 28 10464 Bronx Bronx POLYGON ((-73.772 40.85712,...
## 29 10464 Bronx Bronx POLYGON ((-73.79235 40.8560...
## 110 10004 New York New York POLYGON ((-74.04166 40.6964...
## 114 10004 New York New York POLYGON ((-74.02418 40.6839...
## 115 10004 New York New York POLYGON ((-74.04699 40.6901...
## 124 11231 Kings Brooklyn POLYGON ((-74.01658 40.6647...
## 145 11096 Queens Inwood POLYGON ((-73.74906 40.6118...
## 147 11693 Queens Far Rockaway POLYGON ((-73.83263 40.6084...
## 150 11693 Queens Far Rockaway POLYGON ((-73.81336 40.5909...
zpNYC_filter<- zpNYC_filter[!duplicated(zpNYC_filter$ZIPCODE),]
# Join the COVID-19 data to the NYC zip code area data.
zip_covid <- sf::st_join(zpNYC_filter,covid19_filter, join = st_contains) %>%
group_by(ZIPCODE) %>%
summarise(
covid_case_count = sum(COVID_CASE_COUNT, na.rm = TRUE),
total_covid_test = sum(TOTAL_COVID_TESTS, na.rm = TRUE),
COVID_DEATH_COUNT = sum(COVID_DEATH_COUNT, na.rm = TRUE))
Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area.Note that not all locations are for food retail. And we need to choose the specific types according to the data.
# Read and filter data
nycFoodStoreSF<- sf::st_read("nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source
## `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_II_Lab\nycFoodStore.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
nycFoodStoreSF_filter<- nycFoodStoreSF[, "Estbl_T"]
# Get certain types of food stores and count the number of stores in each zip code area
zip_food<- nycFoodStoreSF %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
sf::st_join(zpNYC, ., join= st_contains) %>%
group_by(ZIPCODE) %>%
summarise(FoodStoreNum = n())
Aggregate the NYC health facilities (points) to the zip code data. Choose appropriate subtypes such as nursing homes from the facilities.
# Read and filter data
nysHealthFacilities<- read.csv("NYS_Health_Facility.csv") %>%
filter(!is.na(Facility.Longitude)) %>% # remove coordinates with na
st_as_sf(coords = c("Facility.Longitude", "Facility.Latitude"), crs = 4326)
nycHealthFacilities<-
nysHealthFacilities[nysHealthFacilities$Facility.County %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"),] %>%
# replace empty string with na
mutate(Short.Description = ifelse(Short.Description == "" | is.na(Short.Description), "Unknown", Short.Description))
# Aggregate the NYC health facilities (points) to the zip code data
nycHealthFacilities_type <- nycHealthFacilities %>%
group_by(Facility.Zip.Code, Short.Description) %>%
summarise(count = n(), .groups = 'drop')%>%
pivot_wider(names_from = Short.Description, values_from = count, values_fill = NA) %>%
group_by(Facility.Zip.Code)%>%
summarise(across(starts_with("CHHA") | starts_with("DTC") | starts_with("HOSP") | starts_with("ADHCP"),
~sum(.x, na.rm = TRUE)))
zip_health <- left_join(zpNYC_filter, st_drop_geometry(nycHealthFacilities_type), by = c("ZIPCODE" = "Facility.Zip.Code"))
mapview(zip_health)
Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
# Read and filter data
nycCensus <- sf::st_read('./2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE) %>%
st_transform(crs = 4326)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep=''))
acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
# Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
Aggregate the ACS census data to zip code area data.
zip_popData<- sf::st_join(zpNYC_filter,
popData %>% sf::st_centroid(), # this essentially converts census tracts to points
join = st_contains) %>%
group_by(ZIPCODE) %>% # use names(zpNYC) and names(popNYC) to see what we have
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
# Check and verify the data again
sum(zip_popData$totPop, na.rm = T)
## [1] 8415507
# combine zip_covid, zip_health, zip_food, and zip_popData into a single sf
zipData<- zip_covid %>%
left_join(st_drop_geometry(zip_health), by = "ZIPCODE") %>%
left_join(st_drop_geometry(zip_food), by = "ZIPCODE") %>%
left_join(st_drop_geometry(zip_popData), by = "ZIPCODE")
mapview(zipData)
st_write(zipData,
dsn = "HW08_zip_data.gpkg",
layer="HW08_zip_data",
delete_layer = TRUE)
## Deleting layer `HW08_zip_data' using driver `GPKG'
## Writing layer `HW08_zip_data' to data source `HW08_zip_data.gpkg' using driver `GPKG'
## Writing 248 features with 20 fields and geometry type Polygon.