GTECH_38520_2025S_Week7_23939929

Author

Ibrahim Moftah

Published

March 15, 2025

Read the NYC postal areas in Shapefiles into sf objects. As NYC DOH publishes COVID-19 data by zip code, we will utilize the postal area data later.

Show the code
# Transform CRS to WGS84 (EPSG:4326) for compatibility
nyc_zip_sf <- st_transform(nyc_zip_sf, 4326)

# Quick plot
plot(st_geometry(nyc_zip_sf), main="NYC ZIP Code Boundaries")

Read and process the NYS health facilities spreadsheet data. Create sf objects from geographic coordinates.

Show the code
# Remove rows with missing lat/lon
health_facilities_clean <- health_facilities %>%
  drop_na(`Facility Latitude`, `Facility Longitude`)

# Filter Facilities Within New York State
nys_bbox <- list(
  min_lat = 40.4, max_lat = 45.1,  # Approximate NYS latitude range
  min_lon = -79.8, max_lon = -71.8 # Approximate NYS longitude range
)

health_facilities_clean <- health_facilities_clean %>%
  filter(
    `Facility Latitude` >= nys_bbox$min_lat & `Facility Latitude` <= nys_bbox$max_lat,
    `Facility Longitude` >= nys_bbox$min_lon & `Facility Longitude` <= nys_bbox$max_lon
  )

# Convert to sf object
health_facilities_sf <- st_as_sf(health_facilities_clean, 
                                 coords = c("Facility Longitude", "Facility Latitude"), 
                                 crs = 4326)

# Quick visualization
plot(st_geometry(health_facilities_sf), col = "red", main = "Health Facilities in NYS")

Read and process the NYS retail food stores data. Create sf objects from geographic coordinates for NYC.

Show the code
# Extract Latitude & Longitude from "Location" column
nysFoodStoreDat <- nysFoodStoreDat %>%
  tidyr::extract(Location, into = c('Lat', 'Long'), regex = "(\\d+\\.\\d+),[ ]*(-\\d+\\.\\d+)") %>%
  mutate(Lat = as.numeric(Lat), Long = as.numeric(Long)) %>%
  drop_na(Lat, Long)

# Filter for NYC-only Data
nyc_bbox <- list(
  min_lat = 40.4, max_lat = 41.0,  # NYC Latitude range
  min_lon = -74.3, max_lon = -73.7 # NYC Longitude range
)

nycFoodStores <- nysFoodStoreDat %>%
  filter(
    Lat >= nyc_bbox$min_lat & Lat <= nyc_bbox$max_lat,
    Long >= nyc_bbox$min_lon & Long <= nyc_bbox$max_lon
  )

# Convert to sf object
nycFoodStores_sf <- st_as_sf(nycFoodStores, coords = c('Long', 'Lat'), crs = 4326)
Show the code
nyc_zip_sf <- st_transform(nyc_zip_sf, 4326)

# Overlay NYC ZIP Codes and Food Stores
plot(st_geometry(nyc_zip_sf), main="NYC ZIP Code Boundaries")
plot(st_geometry(nyc_zip_sf), main="NYC ZIP Code Boundaries")
plot(st_geometry(nycFoodStores_sf), col="blue", add=TRUE)

Use simple mapping method, either based on ggmap+ggplot or mapview, with a basemap to verify the above datasets in terms of their geometry locations.

Show the code
# Reduce the number of food stores and health facilities (random sampling)
nycFoodStores_sf_sample <- nycFoodStores_sf %>% slice_sample(n = 3000)  # Adjust sample size
health_facilities_sf_sample <- health_facilities_sf %>% slice_sample(n = 2000)

# Simplify ZIP code polygons (reduce rendering complexity)
nyc_zip_sf_simplified <- st_simplify(nyc_zip_sf, dTolerance = 50)  # Adjust tolerance for balance

# Enable FlatGeobuf for faster rendering
mapviewOptions(fgb = TRUE)

# Render the optimized map
mapview(nycFoodStores_sf_sample, col.regions = "blue", layer.name = "Retail Food Stores") + 
mapview(health_facilities_sf_sample, col.regions = "red", layer.name = "Health Facilities") +
mapview(nyc_zip_sf_simplified, col.regions = "black", alpha.region = 0.3, layer.name = "NYC ZIP Codes",
        basemaps = FALSE)  # Disabling basemaps speeds up rendering

Save the three sf objects in a RData file or in a single GeoPackage file/database.

Show the code
# Save as GeoPackage for future use
st_write(nyc_zip_sf, "data/nyc_spatial_data.gpkg", layer="NYC_Postal_Areas", delete_layer=TRUE)
Deleting layer `NYC_Postal_Areas' using driver `GPKG'
Writing layer `NYC_Postal_Areas' to data source 
  `data/nyc_spatial_data.gpkg' using driver `GPKG'
Writing 263 features with 12 fields and geometry type Polygon.
Show the code
st_write(health_facilities_sf, "data/nyc_spatial_data.gpkg", layer="Health_Facilities", delete_layer=TRUE)
Deleting layer `Health_Facilities' using driver `GPKG'
Writing layer `Health_Facilities' to data source 
  `data/nyc_spatial_data.gpkg' using driver `GPKG'
Writing 3843 features with 34 fields and geometry type Point.
Show the code
st_write(nycFoodStores_sf, "data/nyc_spatial_data.gpkg", layer="Retail_Food_Stores", delete_layer=TRUE)
Deleting layer `Retail_Food_Stores' using driver `GPKG'
Writing layer `Retail_Food_Stores' to data source 
  `data/nyc_spatial_data.gpkg' using driver `GPKG'
Writing 12140 features with 14 fields and geometry type Point.