Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

# Read COVID-19 case data
covidData <- read.csv("R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")

# Rename column for consistency
covidData <- covidData %>% rename(ZIPCODE = MODIFIED_ZCTA)

# Convert ZIPCODE in covidData to character
covidData <- covidData %>% mutate(ZIPCODE = as.character(ZIPCODE))

# Join COVID data to ZIP code shapefile
zpNYC <- zpNYC %>% 
  dplyr::left_join(covidData, by = "ZIPCODE")

# Replace NA values in COVID_CASE_COUNT with 0
zpNYC <- zpNYC %>% 
  mutate(COVID_CASE_COUNT = replace_na(COVID_CASE_COUNT, 0))

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

# Ensure CRS matches before spatial join
nycFoodStoreSF <- sf::st_transform(nycFoodStoreSF, st_crs(zpNYC))

# Filter specific types of food stores
nycFoodStoreSF <- nycFoodStoreSF %>% 
  dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]'))  # Adjust regex based on categories

# Perform spatial join directly with st_join()
foodStoreZip <- sf::st_join(zpNYC, nycFoodStoreSF, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(FoodStoreNum = n())

# Ensure foodStoreZip is an sf object before merging back
st_geometry(foodStoreZip) <- NULL 

# Join back the aggregated data to the ZIP code shapefile
zpNYC <- zpNYC %>% 
  dplyr::left_join(foodStoreZip, by = "ZIPCODE")

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

# Read the NYC health facility data (point data)
nycHealthFacilities <- read.csv("R-Spatial_II_Lab/NYS_Health_Facility.csv")

# Remove rows with missing coordinates
nycHealthFacilities <- nycHealthFacilities %>%
  dplyr::filter(!is.na(Facility.Longitude) & !is.na(Facility.Latitude))

# Convert to an sf object using correct latitude/longitude columns
nycHealthFacilities <- sf::st_as_sf(nycHealthFacilities, coords = c("Facility.Longitude", "Facility.Latitude"), crs = 4326)

# Ensure CRS matches before spatial join
nycHealthFacilities <- sf::st_transform(nycHealthFacilities, st_crs(zpNYC))

# Filter nursing homes (NH) or other relevant facility types
nycNursingHome <- nycHealthFacilities %>% 
  dplyr::filter(Short.Description == 'NH')

# Aggregate nursing homes to ZIP codes
nursingHomeZip <- sf::st_join(zpNYC, nycNursingHome, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(NursingHomeNum = n())

# Remove geometry before joining
st_geometry(nursingHomeZip) <- NULL

# Join the aggregated health facility data back to ZIP code shapefile
zpNYC <- zpNYC %>% 
  dplyr::left_join(nursingHomeZip, by = "ZIPCODE")

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

# Convert borough names to FIPS codes and create a unique tract identifier
nycCensus <- nycCensus %>% 
  dplyr::mutate(cntyFIPS = case_when(
    boro_name == 'Bronx' ~ '005',
    boro_name == 'Brooklyn' ~ '047',
    boro_name == 'Manhattan' ~ '061',
    boro_name == 'Queens' ~ '081',
    boro_name == 'Staten Island' ~ '085'),
    tractFIPS = paste0(cntyFIPS, ct2010)
  )

# Read ACS demographic data
acsData <- readLines("R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E,
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))

# Merge ACS data with census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

Aggregate the ACS census data to zip code area data.

# Ensure CRS match before joining
popNYC <- sf::st_transform(popData, st_crs(zpNYC))

# Perform spatial join ensuring attributes are retained
covidPopZipNYC <- sf::st_join(zpNYC, popNYC, join = st_intersects)

# Summarise population data at ZIP code level
covidPopZipNYC <- covidPopZipNYC %>%
  group_by(ZIPCODE) %>%
  summarise(
    totPop = sum(totPop, na.rm=TRUE),
    malePctg = sum(malePop, na.rm=TRUE)/sum(totPop, na.rm=TRUE) * 100,
    asianPop = sum(asianPop, na.rm=TRUE),
    blackPop = sum(blackPop, na.rm=TRUE),
    hispanicPop = sum(hispanicPop, na.rm=TRUE),
    whitePop = sum(whitePop, na.rm=TRUE)
  )

# Remove geometry before joining
st_geometry(covidPopZipNYC) <- NULL

# Join aggregated demographic data to ZIP code shapefile
zpNYC <- zpNYC %>% 
  dplyr::left_join(covidPopZipNYC, by = "ZIPCODE")

Visualize spatial distributions

# Replace NA values in COVID case count
zpNYC <- zpNYC %>% 
  mutate(COVID_CASE_COUNT = replace_na(COVID_CASE_COUNT, 0))

# Visualize spatial distributions
plot(zpNYC["COVID_CASE_COUNT"], breaks='jenks', main="COVID-19 Cases by ZIP Code")

plot(zpNYC["FoodStoreNum"], breaks='jenks', main="Food Stores by ZIP Code")

plot(zpNYC["NursingHomeNum"], breaks='jenks', main="Nursing Homes by ZIP Code")

plot(zpNYC["asianPop"], breaks='jenks', main="Asian Population by ZIP Code")