R Spatial Lab Assignment # 2

task 1: Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(readr)
load("~/Desktop/SPRING 2026 HUNTER/Programming with R/Week 8/Section_08/nyc_spatial_data.RData")

tests_raw <- read.csv("R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv")

tests_clean <- tests_raw %>%
  mutate(MODZCTA = as.character(MODZCTA)) %>%
  filter(!is.na(MODZCTA))

zip_covid <- nyc_zipcode %>%
 left_join(tests_clean, by = c("ZIPCODE" = "MODZCTA"))

task 2:Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

#The first step of filtering the data was not working, so trimming was required:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
NYS_FOOD_sf <- NYS_FOOD_sf %>%
  mutate(Establishment.Type = str_trim(Establishment.Type))

#After learning that grocery stores and restaurants are labelled by establishment type "A":
food_filtered <- NYS_FOOD_sf %>%
  filter(Establishment.Type == "A")

library(sf)

#It was found that "zip_covid" has a different CRS to that of "food_filtered", so the following correction was made:

food_filtered <- st_transform(food_filtered, st_crs(zip_covid))

food_join <- st_join(food_filtered, zip_covid)

food_counts <- food_join %>%
  st_drop_geometry() %>%
  group_by(ZIPCODE) %>%
  summarise(food_count = n())

zip_food <- zip_covid %>%
  left_join(food_counts, by = "ZIPCODE")

task 3:Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

hf_filtered <- NYS_HF_sf %>%
  filter(Short.Description %in% c("NH", "HSPC"))

hf_filtered <- st_transform(hf_filtered, st_crs(zip_food))

hf_join <- st_join(hf_filtered, zip_food)

hf_counts <- hf_join %>%
  st_drop_geometry() %>%       
  group_by(ZIPCODE) %>%
  summarise(hf_count = n())

zip_food_hf <- zip_food %>%
  left_join(hf_counts, by = "ZIPCODE")

###task 4:Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

ACS_data <- read_csv("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv")
## Rows: 2168 Columns: 358
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (358): GEO_ID, NAME, DP05_0031PM, DP05_0032E, DP05_0032M, DP05_0032PE, D...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
acs_df <- as.data.frame(ACS_data)

#GEOID column had to be renamed:
acs_df <- ACS_data %>%
  rename(GEOID = GEO_ID) %>%        
  mutate(GEOID = as.character(GEOID))

nyc_tracts_sf <- st_read("2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/emilycoty/Desktop/SPRING 2026 HUNTER/Programming with R/Week 8/Section_08/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
#A new GEOID had to be created for nyc_tracts_sf:

county_fips <- c("1" = "061",  # Manhattan
                 "2" = "005",  # Bronx
                 "3" = "047",  # Brooklyn
                 "4" = "081",  # Queens
                 "5" = "085")  # Staten Island


nyc_tracts_sf <- nyc_tracts_sf %>%
  mutate(
    county_fips = county_fips[as.character(boro_code)],  
    tract_code = sprintf("%06s", boro_ct201),             
    GEOID = paste0("36", county_fips, tract_code)        
  )

tracts_with_acs <- nyc_tracts_sf %>%
  left_join(acs_df, by = c("ct2010" = "GEOID"))

###task 5:Aggregate the ACS census data to zip code area data.

#I was having Issues with coordinates again:
library(mapview)
zip_covid <- st_transform(zip_covid, 4326)
nyc_tracts_sf <- st_transform(nyc_tracts_sf, 4326)


zip_clean <- zip_food_hf[st_is_valid(zip_food_hf), ]

tracts_with_acs <- st_transform(tracts_with_acs, st_crs(zip_clean))

zip_final <- st_join(zip_clean, tracts_with_acs, join = st_contains, s2 = FALSE)

#Checking basic visual representation of object for confirmation:
mapview(zip_final)