R Week 7 Assignment

Load packages

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

#Going to assume "Zip Code Area Data" is from lab 1. No zip code area data was provided in week 8 folder

#Get your working directory
wd <- getwd()

#Create a path to the "ZIP_CODE_040114" folder 
data_dir <- file.path(wd, "R-Spatial_I_Lab", "ZIP_CODE_040114")

nycZipCode <- st_read(file.path(data_dir,"ZIP_CODE_040114.shp"))

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\odell\Desktop\GTECH R\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

#Path to covid file
data_dir <- file.path(wd, "R-Spatial_II_Lab","R-Spatial_II_Lab") 

#Read data
covid <- read_csv(file.path(data_dir,"tests-by-zcta_2021_04_23.csv"),show_col_types=F)

#Convert ZIPCODE to character for consistency
nycZipCode$ZIPCODE <- as.character(nycZipCode$ZIPCODE)
covid$MODIFIED_ZCTA <- as.character(covid$MODIFIED_ZCTA)

#Join the COVID data to the zip code geometry data based on ZIPCODE
zpNYC <- left_join(nycZipCode, covid, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

#Read data
df <- st_read(file.path(data_dir,"nycFoodStore.shp"))

## Reading layer `nycFoodStore' from data source 
##   `C:\Users\odell\Desktop\GTECH R\R-Spatial_II_Lab\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

#Filter based on Estbl_T Name JAC,JABCHK,JABC,JAZ,JACD
food_retail <- df %>%
  filter(Estbl_T %in% c("JAC", "JABCHK", "JABC", "JAZ", "JACD"))

#Split the "Coords" column into "lat" and "lon"
food_retail <- food_retail %>%
  separate(Coords, into = c("lat", "lon"), sep = ", ") %>%
  mutate(across(c(lat, lon), as.numeric))

foodRetailStores_sf <- st_transform(food_retail, st_crs(zpNYC))

#Perform a spatial join between food retail stores and zip code polygons
foodStoresInZip <- st_join(zpNYC, foodRetailStores_sf, join = st_contains)

#Aggregate the data by ZIP code
aggregatedStores <- foodStoresInZip %>%
  group_by(ZIPCODE, PO_NAME) %>%
  summarise(FoodStoreNum = n(), .groups = "drop")%>% 
  magrittr::extract('FoodStoreNum') %>% 
  plot(breaks = "jenks", main="Number of Food Stores")

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

#Read data
df <- read_csv(file.path(data_dir,"NYS_Health_Facility.csv"),show_col_types=F)

#Define relevant facility types NH,HOSP-EC,HSPC,DTC,CHHA,HOSP,DTC-EC    
valid_types <- c("NH", "HOSP-EC", "HSPC", "DTC", "CHHA", "HOSP", "DTC-EC")

#Filter data based on Short Description
health_facilities <- df %>%
  filter(`Short Description` %in% valid_types)

#Remove rows with missing longitude or latitude
health_facilities <- health_facilities %>%
  drop_na(`Facility Longitude`, `Facility Latitude`)

#Convert to sf object
health_facilities_sf <- st_as_sf(health_facilities, coords = c("Facility Longitude", "Facility Latitude"), crs = 4269)

#Check and transform CRS
health_facilities_sf <- st_transform(health_facilities_sf, st_crs(zpNYC))

#Spatial join to assign ZIPCODE to each point
health_zip_join <- st_join(health_facilities_sf, zpNYC, join = st_within)

#Count the number of health facilities per zip code
zip_health_count <- health_zip_join %>%
  group_by(ZIPCODE) %>%
  summarize(num_facilities = n(), .groups = "drop")%>% 
  magrittr::extract('num_facilities') %>% 
  plot(breaks = "quantile", main = "Number of Health Facilities")

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

#Planning census data
nycCensus <- st_read(file.path(data_dir,"2010 Census Tracts" ,"geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp"))

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\odell\Desktop\GTECH R\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

#Read data
acsData <- read.csv(file.path(data_dir, "ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv"),
                    header = TRUE, 
                    quote = "\"")

#Remove the second row manually
acsData <- acsData[-1, ]

acsData <- acsData %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

#Join census acs and nyc planning census data
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

Aggregate the ACS census data to zip code area data.

st_crs(popData)

## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]

popNYC <- sf::st_transform(popData, st_crs(zpNYC)) 

#Convert population-related columns to numeric
popNYC <- popNYC %>%
  mutate(
    totPop = as.numeric(totPop),
    malePop = as.numeric(malePop),
    asianPop = as.numeric(asianPop),
    blackPop = as.numeric(blackPop),
    hispanicPop = as.numeric(hispanicPop),
    whitePop = as.numeric(whitePop)
  )

#Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(zpNYC, 
                              popNYC %>% sf::st_centroid(), 
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME) %>% 
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100,
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning: st_centroid assumes attributes are constant over geometries

## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.

R Week 7 Assignment

Deondrae Francis

3/15/2025

Load packages

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

Aggregate the ACS census data to zip code area data.