```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 7, out.width = “100%”, dpi = 300)
if (!require(“sf”)) install.packages(“sf”) if (!require(“mapview”)) install.packages(“mapview”) if (!require(“dplyr”)) install.packages(“dplyr”) if (!require(“ggplot2”)) install.packages(“ggplot2”) if (!require(“readr”)) install.packages(“readr”) if (!require(“knitr”)) install.packages(“knitr”) if (!require(“kableExtra”)) install.packages(“kableExtra”) if (!require(“leaflet”)) install.packages(“leaflet”) if (!require(“htmltools”)) install.packages(“htmltools”) if (!require(“RColorBrewer”)) install.packages(“RColorBrewer”)
## Introduction
This analysis explores the spatial relationship between COVID-19 cases in New York City and demographic variables along with essential services such as retail food stores and public health facilities. This document covers the first lab in a series, which focuses on setting up the spatial data framework for further analysis.
## 1. Loading Required Libraries
```{r load-libraries}
# Load required packages
library(sf) # Simple Features for spatial data
library(mapview) # Interactive map viewing
library(dplyr) # Data manipulation
library(ggplot2) # Plotting
library(readr) # Reading CSV files
library(lubridate) # Date manipulation
library(knitr) # For nice tables
library(kableExtra) # Enhanced tables
library(leaflet) # Interactive maps
library(htmltools) # HTML tools for R
library(RColorBrewer) # Color palettes
NYC Department of Health and Mental Hygiene (DOHMH) publishes COVID-19 data by ZIP code tabulation area (ZCTA). We’ll use this postal area dataset as the foundation for our spatial analysis.
```{r read-postal-areas} # Read the NYC postal areas shapefile nyc_postal_sf <- st_read(“data/nyc/nyc_postal_areas.shp”, quiet = TRUE)
glimpse(nyc_postal_sf)
head(nyc_postal_sf, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)
Let's visualize the postal areas to get a sense of the geography:
```{r plot-postal-areas}
# Static visualization of the postal areas
ggplot() +
geom_sf(data = nyc_postal_sf, fill = "lightblue", color = "white", alpha = 0.7) +
theme_minimal() +
labs(title = "NYC Postal Areas")
# Interactive map of the postal areas
mapview(nyc_postal_sf,
zcol = "zipcode", # Adjust to match your actual column name
layer.name = "NYC ZIP Codes",
col.regions = brewer.pal(9, "Blues"),
alpha.regions = 0.6)
Now we’ll read in and process the New York State health facilities data, focusing on NYC.
```{r read-health-facilities} # Reading NYS Health Facility CSV file nys_health_df <- read_csv(“data/NYS_Health_Facility.csv”, show_col_types = FALSE)
names(nys_health_df) <- tolower(names(nys_health_df))
glimpse(nys_health_df)
head(nys_health_df, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)
Next, we'll filter to only include NYC facilities and convert to an `sf` object:
```{r process-health-facilities}
# Filter to only NYC health facilities
# Adjust the filter based on your actual data columns
nyc_health_df <- nys_health_df %>%
filter(grepl("new york|brooklyn|queens|bronx|staten island",
tolower(as.character(county)),
ignore.case = TRUE))
# Display count by county or borough
nyc_health_df %>%
count(county) %>%
arrange(desc(n)) %>%
kable(col.names = c("County/Borough", "Number of Facilities")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Convert to sf object
# Adjust column names for coordinates as needed
nyc_health_sf <- st_as_sf(nyc_health_df,
coords = c("longitude", "latitude"),
crs = 4326) # WGS84 projection
# Verify the sf object
nyc_health_sf %>%
head(5) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 12)
Similarly, we’ll process the retail food stores data for NYC.
```{r read-food-stores} # Reading NYS Retail Food Stores CSV file nys_food_df <- read_csv(“data/NYS_Retail_Food_Stores.csv”, show_col_types = FALSE)
names(nys_food_df) <- tolower(names(nys_food_df))
glimpse(nys_food_df)
head(nys_food_df, 5) %>% kable() %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), font_size = 12)
Filter for NYC food stores and convert to spatial format:
```{r process-food-stores}
# Filter to only NYC food stores
nyc_food_df <- nys_food_df %>%
filter(grepl("new york|brooklyn|queens|bronx|staten island",
tolower(as.character(county)),
ignore.case = TRUE))
# Display count by county or borough
nyc_food_df %>%
count(county) %>%
arrange(desc(n)) %>%
kable(col.names = c("County/Borough", "Number of Stores")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Convert to sf object
nyc_food_sf <- st_as_sf(nyc_food_df,
coords = c("longitude", "latitude"),
crs = 4326) # WGS84 projection
# Verify the sf object
nyc_food_sf %>%
head(5) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 12)
{r map-health-facilities} # Create an interactive map of health facilities health_map <- mapview(nyc_health_sf, zcol = "facility_type", # Adjust to match your column name layer.name = "NYC Health Facilities", col.regions = "blue", cex = 4, alpha = 0.8) health_map
{r map-food-stores} # Create an interactive map of food stores food_map <- mapview(nyc_food_sf, zcol = "store_type", # Adjust to match your column name layer.name = "NYC Retail Food Stores", col.regions = "red", cex = 3, alpha = 0.7) food_map
Let’s visualize the combined spatial distributions:
```{r combined-maps} # Postal areas with health facilities postal_health_map <- mapview(nyc_postal_sf, zcol = “zipcode”, layer.name = “NYC ZIP Codes”, alpha.regions = 0.3) + mapview(nyc_health_sf, zcol = “facility_type”, layer.name = “Health Facilities”, col.regions = “blue”, cex = 3) postal_health_map
postal_food_map <- mapview(nyc_postal_sf, zcol = “zipcode”, layer.name = “NYC ZIP Codes”, alpha.regions = 0.3) + mapview(nyc_food_sf, zcol = “store_type”, layer.name = “Food Stores”, col.regions = “red”, cex = 2) postal_food_map
## 6. Saving the Spatial Data Objects
```{r save-data}
# Option 1: Save to RData file
save(nyc_postal_sf, nyc_health_sf, nyc_food_sf,
file = 'data/nyc/nyc_spatial_data.RData')
# Option 2: Save to GeoPackage (more interoperable format)
st_write(nyc_postal_sf,
dsn = 'data/nyc/nyc_covid_data.gpkg',
layer = 'nyc_postal_areas',
delete_layer = TRUE)
st_write(nyc_health_sf,
dsn = 'data/nyc/nyc_covid_data.gpkg',
layer = 'nyc_health_facilities',
delete_layer = TRUE)
st_write(nyc_food_sf,
dsn = 'data/nyc/nyc_covid_data.gpkg',
layer = 'nyc_retail_food_stores',
delete_layer = TRUE)
cat("Data processing complete. Files saved in both RData and GeoPackage formats.\n")