```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 7, out.width = “100%”, dpi = 300)

Install packages if not already installed

if (!require(“sf”)) install.packages(“sf”) if (!require(“mapview”)) install.packages(“mapview”) if (!require(“dplyr”)) install.packages(“dplyr”) if (!require(“ggplot2”)) install.packages(“ggplot2”) if (!require(“readr”)) install.packages(“readr”) if (!require(“knitr”)) install.packages(“knitr”) if (!require(“kableExtra”)) install.packages(“kableExtra”) if (!require(“leaflet”)) install.packages(“leaflet”) if (!require(“htmltools”)) install.packages(“htmltools”) if (!require(“RColorBrewer”)) install.packages(“RColorBrewer”)


## Introduction

This analysis explores the spatial relationship between COVID-19 cases in New York City and demographic variables along with essential services such as retail food stores and public health facilities. This document covers the first lab in a series, which focuses on setting up the spatial data framework for further analysis.

## 1. Loading Required Libraries

```{r load-libraries}
# Load required packages
library(sf)          # Simple Features for spatial data
library(mapview)     # Interactive map viewing
library(dplyr)       # Data manipulation
library(ggplot2)     # Plotting
library(readr)       # Reading CSV files
library(lubridate)   # Date manipulation
library(knitr)       # For nice tables
library(kableExtra)  # Enhanced tables
library(leaflet)     # Interactive maps
library(htmltools)   # HTML tools for R
library(RColorBrewer) # Color palettes

2. Reading NYC Postal Areas Shapefile

NYC Department of Health and Mental Hygiene (DOHMH) publishes COVID-19 data by ZIP code tabulation area (ZCTA). We’ll use this postal area dataset as the foundation for our spatial analysis.

```{r read-postal-areas} # Read the NYC postal areas shapefile nyc_postal_sf <- st_read(“data/nyc/nyc_postal_areas.shp”, quiet = TRUE)

Display the structure of the data

glimpse(nyc_postal_sf)

Show a sample of the data

head(nyc_postal_sf, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)


Let's visualize the postal areas to get a sense of the geography:

```{r plot-postal-areas}
# Static visualization of the postal areas
ggplot() +
  geom_sf(data = nyc_postal_sf, fill = "lightblue", color = "white", alpha = 0.7) +
  theme_minimal() +
  labs(title = "NYC Postal Areas")

# Interactive map of the postal areas
mapview(nyc_postal_sf, 
        zcol = "zipcode",  # Adjust to match your actual column name
        layer.name = "NYC ZIP Codes",
        col.regions = brewer.pal(9, "Blues"),
        alpha.regions = 0.6)

3. Processing Health Facilities Data

Now we’ll read in and process the New York State health facilities data, focusing on NYC.

```{r read-health-facilities} # Reading NYS Health Facility CSV file nys_health_df <- read_csv(“data/NYS_Health_Facility.csv”, show_col_types = FALSE)

Clean column names

names(nys_health_df) <- tolower(names(nys_health_df))

Display the structure

glimpse(nys_health_df)

Sample of the data

head(nys_health_df, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)


Next, we'll filter to only include NYC facilities and convert to an `sf` object:

```{r process-health-facilities}
# Filter to only NYC health facilities
# Adjust the filter based on your actual data columns
nyc_health_df <- nys_health_df %>%
  filter(grepl("new york|brooklyn|queens|bronx|staten island", 
               tolower(as.character(county)), 
               ignore.case = TRUE))

# Display count by county or borough
nyc_health_df %>%
  count(county) %>%
  arrange(desc(n)) %>%
  kable(col.names = c("County/Borough", "Number of Facilities")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

# Convert to sf object
# Adjust column names for coordinates as needed
nyc_health_sf <- st_as_sf(nyc_health_df, 
                        coords = c("longitude", "latitude"),
                        crs = 4326)  # WGS84 projection

# Verify the sf object
nyc_health_sf %>%
  head(5) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 12)

4. Processing Retail Food Stores Data

Similarly, we’ll process the retail food stores data for NYC.

```{r read-food-stores} # Reading NYS Retail Food Stores CSV file nys_food_df <- read_csv(“data/NYS_Retail_Food_Stores.csv”, show_col_types = FALSE)

Clean column names

names(nys_food_df) <- tolower(names(nys_food_df))

Display the structure

glimpse(nys_food_df)

Sample of the data

head(nys_food_df, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)


Filter for NYC food stores and convert to spatial format:

```{r process-food-stores}
# Filter to only NYC food stores
nyc_food_df <- nys_food_df %>%
  filter(grepl("new york|brooklyn|queens|bronx|staten island", 
               tolower(as.character(county)), 
               ignore.case = TRUE))

# Display count by county or borough
nyc_food_df %>%
  count(county) %>%
  arrange(desc(n)) %>%
  kable(col.names = c("County/Borough", "Number of Stores")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

# Convert to sf object
nyc_food_sf <- st_as_sf(nyc_food_df, 
                      coords = c("longitude", "latitude"),
                      crs = 4326)  # WGS84 projection

# Verify the sf object
nyc_food_sf %>%
  head(5) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 12)

5.1 Health Facilities Map

{r map-health-facilities} # Create an interactive map of health facilities health_map <- mapview(nyc_health_sf, zcol = "facility_type", # Adjust to match your column name layer.name = "NYC Health Facilities", col.regions = "blue", cex = 4, alpha = 0.8) health_map

5.2 Retail Food Stores Map

{r map-food-stores} # Create an interactive map of food stores food_map <- mapview(nyc_food_sf, zcol = "store_type", # Adjust to match your column name layer.name = "NYC Retail Food Stores", col.regions = "red", cex = 3, alpha = 0.7) food_map

5.3 Combined Maps

Let’s visualize the combined spatial distributions:

```{r combined-maps} # Postal areas with health facilities postal_health_map <- mapview(nyc_postal_sf, zcol = “zipcode”, layer.name = “NYC ZIP Codes”, alpha.regions = 0.3) + mapview(nyc_health_sf, zcol = “facility_type”, layer.name = “Health Facilities”, col.regions = “blue”, cex = 3) postal_health_map

Postal areas with food stores

postal_food_map <- mapview(nyc_postal_sf, zcol = “zipcode”, layer.name = “NYC ZIP Codes”, alpha.regions = 0.3) + mapview(nyc_food_sf, zcol = “store_type”, layer.name = “Food Stores”, col.regions = “red”, cex = 2) postal_food_map


## 6. Saving the Spatial Data Objects

```{r save-data}
# Option 1: Save to RData file
save(nyc_postal_sf, nyc_health_sf, nyc_food_sf, 
     file = 'data/nyc/nyc_spatial_data.RData')

# Option 2: Save to GeoPackage (more interoperable format)
st_write(nyc_postal_sf, 
         dsn = 'data/nyc/nyc_covid_data.gpkg', 
         layer = 'nyc_postal_areas',
         delete_layer = TRUE)

st_write(nyc_health_sf, 
         dsn = 'data/nyc/nyc_covid_data.gpkg', 
         layer = 'nyc_health_facilities',
         delete_layer = TRUE)

st_write(nyc_food_sf, 
         dsn = 'data/nyc/nyc_covid_data.gpkg', 
         layer = 'nyc_retail_food_stores',
         delete_layer = TRUE)

cat("Data processing complete. Files saved in both RData and GeoPackage formats.\n")