Step 1: Joining COVID-19 Data to Zip Code Areas

# Load zip code boundaries
zpNYC <- sf::st_read("NYC_zip_code_shapefile.shp") 

# Load COVID-19 test data
covidData <- read.csv("tests-by-zcta_2021_04_23.csv") 

# Join COVID data to zip code polygons
zpNYC <- merge(zpNYC, covidData, by.x = "ZIPCODE", by.y = "ZCTA")

# Examine the first few records
head(zpNYC) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)

Step 2: Aggregating Food Retail Stores to Zip Codes

# Filter food stores (types A, J, or D) and aggregate to zip codes
nycFoodStoresByZip <- nycFoodStoreSF %>% 
  dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  sf::st_join(zpNYC, ., join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(FoodStoreNum = n())

# Map food store counts by zip code
ggplot(nycFoodStoresByZip) +
  geom_sf(aes(fill = FoodStoreNum)) +
  scale_fill_viridis_c(option = "plasma") +
  theme_minimal() +
  labs(title = "Number of Food Stores by NYC Zip Code",
       fill = "Food Store Count") +
  theme(legend.position = "bottom")

Step 3: Aggregating Health Facilities to Zip Codes

We filter for nursing homes and then count them by zip code.

# Filter for nursing homes
nycNursingHome <- nycHealthFacilities %>% 
  dplyr::filter(Short.Description == 'NH')

# Aggregate nursing homes to zip codes
nursingHomesByZip <- zpNYC %>%
  sf::st_join(nycNursingHome, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(NursingHomeNum = n())

# Map nursing home counts
ggplot(nursingHomesByZip) +
  geom_sf(aes(fill = NursingHomeNum)) +
  scale_fill_viridis_c(option = "magma") +
  theme_minimal() +
  labs(title = "Number of Nursing Homes by NYC Zip Code",
       fill = "Nursing Home Count") +
  theme(legend.position = "bottom")

Step 4: Joining Census ACS Data to Census Tracts

# Load NYC census tract boundaries
nycCensus <- sf::st_read('./R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', 
                         stringsAsFactors = FALSE)

# Add county FIPS codes based on borough names
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

# Load ACS demographic data, bypassing problematic second line
acsData <- readLines("./R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, 
                elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, 
                femalePop = DP05_0003E,  
                whitePop = DP05_0037E, 
                blackPop = DP05_0038E,
                asianPop = DP05_0067E, 
                hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, 
                citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9, -1))

# Merge ACS data with census tract boundaries
popData <- merge(nycCensus, acsData, by.x = 'tractFIPS', by.y = 'censusCode')

# Verify total population matches expectations
sum(popData$totPop)

# Transform to match coordinate system of zip code data
popNYC <- sf::st_transform(popData, st_crs(zpNYC))

Step 5: Aggregating Census Data to Zip Code Level

# Aggregate census data to zip codes using centroids
covidPopZipNYC <- sf::st_join(zpNYC, 
                             popNYC %>% sf::st_centroid(), 
                             join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/sum(totPop)*100,
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 

# Check for ZIP codes with missing population data
missingPop <- covidPopZipNYC %>% 
  dplyr::filter(is.na(totPop)) %>%
  st_drop_geometry() %>%
  select(ZIPCODE, PO_NAME, COUNTY)

kable(missingPop, caption = "ZIP Codes with Missing Population Data") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

#Final Results

# Map COVID case counts
ggplot(covidPopZipNYC) +
  geom_sf(aes(fill = COVID_CASE_COUNT)) +
  scale_fill_viridis_c(option = "inferno") +
  theme_minimal() +
  labs(title = "COVID-19 Cases by NYC ZIP Code",
       fill = "Case Count") +
  theme(legend.position = "bottom")

# Map Asian population distribution
ggplot(covidPopZipNYC) +
  geom_sf(aes(fill = asianPop)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  labs(title = "Asian Population by NYC ZIP Code",
       fill = "Population") +
  theme(legend.position = "bottom")

Correlation Analysis

# Extract numeric columns and calculate correlations
cor_data <- covidPopZipNYC %>%
  st_drop_geometry() %>%
  select(COVID_CASE_COUNT, TOTAL_COVID_TESTS, totPop, FoodStoreNum, 
         NursingHomeNum, asianPop, blackPop, hispanicPop, whitePop)

# Calculate correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs")

# Visualize correlation matrix
corrplot::corrplot(cor_matrix, method = "circle", type = "upper", 
                   tl.col = "black", tl.srt = 45)

R Week 08 Assignment

Lianne Persaud

May 2025

Introduction

Workflow Overview

Step 1: Joining COVID-19 Data to Zip Code Areas

Step 2: Aggregating Food Retail Stores to Zip Codes

Step 3: Aggregating Health Facilities to Zip Codes

Step 4: Joining Census ACS Data to Census Tracts

Step 5: Aggregating Census Data to Zip Code Level

Correlation Analysis

Conclusion