R Spatial Lab Assignment # 2.7

Task 1:

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

zipcodes_nyc_sf <- sf::st_read("data/nyc/nyc_data.gpkg", layer = "census_tracts")
## Reading layer `census_tracts' from data source 
##   `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
names(zipcodes_nyc_sf)
##  [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
##  [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
## [11] "SHAPE_AREA" "SHAPE_LEN"  "geom"
# ZIPCODE was a string and I had to convert it to a number, made sure to use as.numeric() with as.character() to preserve the zipcode format 
zipcodes_nyc_sf$ZIPCODE <- as.numeric(as.character(zipcodes_nyc_sf$ZIPCODE))

# Verify the change
sapply(zipcodes_nyc_sf, class) # "numeric"
## $ZIPCODE
## [1] "numeric"
## 
## $BLDGZIP
## [1] "character"
## 
## $PO_NAME
## [1] "character"
## 
## $POPULATION
## [1] "numeric"
## 
## $AREA
## [1] "numeric"
## 
## $STATE
## [1] "character"
## 
## $COUNTY
## [1] "character"
## 
## $ST_FIPS
## [1] "character"
## 
## $CTY_FIPS
## [1] "character"
## 
## $URL
## [1] "character"
## 
## $SHAPE_AREA
## [1] "numeric"
## 
## $SHAPE_LEN
## [1] "numeric"
## 
## $geom
## [1] "sfc_POLYGON" "sfc"
# Loading the covid data
covid_df <- read_csv("R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv",
  show_col_types = FALSE)

names(covid_df) # finding the col that has zipcode to left join with
## [1] "MODZCTA"           "Positive"          "Total"            
## [4] "zcta_cum.perc_pos"
zipcodes_covid_sf <- dplyr::left_join(zipcodes_nyc_sf, covid_df,
                          by = c("ZIPCODE" = "MODZCTA"))  

names(zipcodes_covid_sf)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "Positive"          "Total"             "zcta_cum.perc_pos"
## [16] "geom"
mapview(zipcodes_covid_sf, zcol = "Total", layer.name = "COVID Cases by ZIP")

Task 2:

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail and we need to choose the specific types according to the data.

  1. Read food stores from GeoPackage
  2. Filter to relevant food retail types from lab_2_key_tasks.R
  3. Join polygon left, tHEN points right, one row per store matched to its containing zip
food_sf_nyc <- st_read("data/nyc/nyc_data.gpkg", layer = "retail_food_stores")
## Reading layer `retail_food_stores' from data source 
##   `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 11306 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 915179.3 ymin: 124371.4 xmax: 1067194 ymax: 270861.1
## Projected CRS: NAD83 / New York Long Island (ftUS)
food_retail <- food_sf_nyc %>%
  filter(str_detect(trimws(Establishment.Type), '[AJD]'))

food_counts_sf <- st_join(zipcodes_covid_sf, food_retail, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(
    FoodStoreNum        = n(),
    Total               = first(Total),
    Positive            = first(Positive)
  )
  1. Plot FoodStoreNum with mapview
mapview(food_counts_sf, zcol = "FoodStoreNum", layer.name = "Food Stores by ZIP")

Task 3:

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

  1. Get health_facilities layer as health_sf_nyc
  2. Filter data for exact matches of nursing homes NH
  3. Same pattern as food store task join, using food_counts_sf as the base so COVID and food columns are preserved
  4. Mapview plot for NursingHomeNum
health_sf_nyc <- st_read("data/nyc/nyc_data.gpkg", layer = "health_facilities")
## Reading layer `health_facilities' from data source 
##   `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 1293 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 929529 ymin: 127612.4 xmax: 1066447 ymax: 271059.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
nursing_homes <- health_sf_nyc %>%
  filter(Short.Description == 'NH')

zipcodes_final_sf <- st_join(food_counts_sf, nursing_homes, join = st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(
    FoodStoreNum      = first(FoodStoreNum),   
    Total             = first(Total),           
    Positive          = first(Positive),
    NursingHomeNum    = n()                     
  )

mapview(zipcodes_final_sf, zcol = "NursingHomeNum", layer.name = "Nursing Homes by ZIP")

Task 4:

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

  1. Read the NYC Planning census tract shapefile
  2. The shapefile uses boro codes not standard FIPs, need to build tractFIPS manually
  3. Read ACS CSV, readLines() skips the 2nd header row that breaks read.csv extract(-2)
  • Extract last 9 chars as key, last 9 characters to match the tractFIPS built in nycCensus
  1. Attribute joining match tractFIPS to censusCode
  • popData is a spatial sf object that keeps the nycCensus geometry ith the ACS demographic columns attached
  1. Total population by census tract mapview plot
nycCensus <- st_read("R-Spatial_II_Lab/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
                     stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\R-Spatial_II_Lab\R-Spatial_II_Lab\2010_Census_Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
nycCensus <- nycCensus %>%
  mutate(
    cntyFIPS = case_when(
      boro_name == 'Bronx'         ~ '005',
      boro_name == 'Brooklyn'      ~ '047',
      boro_name == 'Manhattan'     ~ '061',
      boro_name == 'Queens'        ~ '081',
      boro_name == 'Staten Island' ~ '085'
    ),
    tractFIPS = paste(cntyFIPS, ct2010, sep = '')
  )

acsData <- readLines("R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>%        
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  dplyr::select(
    GEO_ID,
    totPop      = DP05_0001E,
    elderlyPop  = DP05_0024E,
    malePop     = DP05_0002E,
    femalePop   = DP05_0003E,
    whitePop    = DP05_0037E,
    blackPop    = DP05_0038E,
    asianPop    = DP05_0067E,
    hispanicPop = DP05_0071E
  ) %>%
  mutate(censusCode = stringr::str_sub(GEO_ID, -9, -1))  

popData <- merge(nycCensus, acsData, by.x = 'tractFIPS', by.y = 'censusCode')

mapview(popData, zcol = "totPop", layer.name = "Total Population by Census Tract")

Task 5:

Aggregate the ACS census data to zip code area data. 1. Reproject popData to match zipcodes_final_sf CRS 2. Spatial join using centroids, converting census tract polygons to points, so each tract falls into exactly one zip code with no doubles

popNYC <- st_transform(popData, st_crs(zipcodes_final_sf))

nyc_final <- st_join(zipcodes_final_sf,
                     popNYC %>% st_centroid(),
                     join = st_contains) %>%
  group_by(ZIPCODE, FoodStoreNum, NursingHomeNum, Total, Positive) %>%
  summarise(
    totPop      = sum(totPop,      na.rm = TRUE),
    elderlyPop  = sum(elderlyPop,  na.rm = TRUE),
    whitePop    = sum(whitePop,    na.rm = TRUE),
    blackPop    = sum(blackPop,    na.rm = TRUE),
    asianPop    = sum(asianPop,    na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    malePctg    = sum(malePop) / sum(totPop) * 100,
    .groups     = "drop"
  )
## Warning: st_centroid assumes attributes are constant over geometries
  1. Final plots for population data with mapview
  2. Saving final object as a single sf object
# Final plots
mapview(nyc_final, zcol = "totPop",      layer.name = "Total Population by ZIP")
mapview(nyc_final, zcol = "elderlyPop",  layer.name = "Elderly Population by ZIP")
mapview(nyc_final, zcol = "blackPop",    layer.name = "Black Population by ZIP")
mapview(nyc_final, zcol = "hispanicPop", layer.name = "Hispanic Population by ZIP")
# Save final sf object
st_write(nyc_final,
         dsn = "./data/nyc/nyc_hw2.gpkg",
         layer = "nyc_final",
         delete_layer = TRUE)
## Deleting layer `nyc_final' using driver `GPKG'
## Writing layer `nyc_final' to data source `./data/nyc/nyc_hw2.gpkg' using driver `GPKG'
## Writing 248 features with 12 fields and geometry type Unknown (any).