R Week 08 Assignment

Task 1

Join the COVID-19 data to the NYC zip code area data.

# Read data
zpNYC <- sf::st_read("C:/Users/linwa/Downloads/GTECH785_2025/R_Spatial/Data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp") %>% 
  st_transform(crs = 4326)

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

covid19<- read.csv("tests-by-zcta_2021_04_23.csv") %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4326)

# Filter data
zpNYC_filter<- zpNYC[, c("ZIPCODE", "COUNTY", "PO_NAME")]
covid19_filter <- covid19[,c("MODIFIED_ZCTA", "COVID_CASE_COUNT", "COVID_DEATH_COUNT", "TOTAL_COVID_TESTS")]

# Not sure why there are duplicates in zip codes
sum(!duplicated(zpNYC_filter$ZIPCODE))

## [1] 248

sum(duplicated(zpNYC_filter$ZIPCODE))

## [1] 15

zpNYC_filter[duplicated(zpNYC_filter$ZIPCODE),]

## Simple feature collection with 15 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.0473 ymin: 40.58123 xmax: -73.74691 ymax: 40.87921
## Geodetic CRS:  WGS 84
## First 10 features:
##     ZIPCODE   COUNTY      PO_NAME                       geometry
## 20    10463 New York        Bronx POLYGON ((-73.91544 40.8755...
## 28    10464    Bronx        Bronx POLYGON ((-73.772 40.85712,...
## 29    10464    Bronx        Bronx POLYGON ((-73.79235 40.8560...
## 110   10004 New York     New York POLYGON ((-74.04166 40.6964...
## 114   10004 New York     New York POLYGON ((-74.02418 40.6839...
## 115   10004 New York     New York POLYGON ((-74.04699 40.6901...
## 124   11231    Kings     Brooklyn POLYGON ((-74.01658 40.6647...
## 145   11096   Queens       Inwood POLYGON ((-73.74906 40.6118...
## 147   11693   Queens Far Rockaway POLYGON ((-73.83263 40.6084...
## 150   11693   Queens Far Rockaway POLYGON ((-73.81336 40.5909...

zpNYC_filter<- zpNYC_filter[!duplicated(zpNYC_filter$ZIPCODE),]

# Join the COVID-19 data to the NYC zip code area data.
zip_covid <- sf::st_join(zpNYC_filter,covid19_filter, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(
    covid_case_count = sum(COVID_CASE_COUNT, na.rm = TRUE),
    total_covid_test = sum(TOTAL_COVID_TESTS, na.rm = TRUE),
    COVID_DEATH_COUNT = sum(COVID_DEATH_COUNT, na.rm = TRUE))

Task 2

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area.Note that not all locations are for food retail. And we need to choose the specific types according to the data.

# Read and filter data
nycFoodStoreSF<- sf::st_read("nycFoodStore.shp")

## Reading layer `nycFoodStore' from data source 
##   `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

nycFoodStoreSF_filter<- nycFoodStoreSF[, "Estbl_T"]

# Get certain types of food stores and count the number of stores in each zip code area
zip_food<- nycFoodStoreSF %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  sf::st_join(zpNYC, ., join= st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(FoodStoreNum = n())

Task 3

Aggregate the NYC health facilities (points) to the zip code data. Choose appropriate subtypes such as nursing homes from the facilities.

# Read and filter data
nysHealthFacilities<- read.csv("NYS_Health_Facility.csv") %>%
  filter(!is.na(Facility.Longitude)) %>% # remove coordinates with na
  st_as_sf(coords = c("Facility.Longitude", "Facility.Latitude"), crs = 4326)

nycHealthFacilities<- 
  nysHealthFacilities[nysHealthFacilities$Facility.County %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"),] %>%
  # replace empty string with na
  mutate(Short.Description = ifelse(Short.Description == "" | is.na(Short.Description), "Unknown", Short.Description))

# Aggregate the NYC health facilities (points) to the zip code data
nycHealthFacilities_type <- nycHealthFacilities %>%
  group_by(Facility.Zip.Code, Short.Description) %>%
  summarise(count = n(), .groups = 'drop')%>%
  pivot_wider(names_from = Short.Description, values_from = count, values_fill = NA) %>%
  group_by(Facility.Zip.Code)%>%
    summarise(across(starts_with("CHHA") | starts_with("DTC") | starts_with("HOSP") | starts_with("ADHCP"),
                   ~sum(.x, na.rm = TRUE)))

zip_health <- left_join(zpNYC_filter, st_drop_geometry(nycHealthFacilities_type), by = c("ZIPCODE" = "Facility.Zip.Code"))

mapview(zip_health)

Task 4

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

# Read and filter data
nycCensus <- sf::st_read('./2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE) %>%
  st_transform(crs = 4326)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\linwa\Downloads\GTECH785_2025\R_Spatial\Data\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')) 

acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))

# Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

Task 5

Aggregate the ACS census data to zip code area data.

zip_popData<- sf::st_join(zpNYC_filter, 
                          popData %>% sf::st_centroid(), # this essentially converts census tracts to points
                          join = st_contains) %>% 
  group_by(ZIPCODE) %>% # use names(zpNYC) and names(popNYC) to see what we have
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning: st_centroid assumes attributes are constant over geometries

#  Check and verify the data again
sum(zip_popData$totPop, na.rm = T)

## [1] 8415507

Task 6. Results

# combine zip_covid, zip_health, zip_food, and zip_popData into a single sf
zipData<- zip_covid %>%
  left_join(st_drop_geometry(zip_health), by = "ZIPCODE") %>%
  left_join(st_drop_geometry(zip_food), by = "ZIPCODE") %>%
  left_join(st_drop_geometry(zip_popData), by = "ZIPCODE")

mapview(zipData)

st_write(zipData,          
         dsn = "HW08_zip_data.gpkg", 
         layer="HW08_zip_data",
         delete_layer = TRUE)

## Deleting layer `HW08_zip_data' using driver `GPKG'
## Writing layer `HW08_zip_data' to data source `HW08_zip_data.gpkg' using driver `GPKG'
## Writing 248 features with 20 fields and geometry type Polygon.