```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 12, fig.height = 8)
library(sf) library(ggplot2) library(dplyr) library(RColorBrewer) library(viridis) library(tmap) library(leaflet) library(mapview) library(patchwork) library(scales) library(stringr) library(grid) library(gridExtra)
## Introduction
Using spatial data visualization techniques, we aim to identify patterns and correlations that might help understand the spread and impact of COVID-19 in urban environments.
## Data Preparation
```{r data_preparation}
# Load data from previous labs (adjust paths as needed)
# If you're reproducing this analysis, you'll need to either:
# 1. Have these files available, or
# 2. Download the raw data from NYC Open Data sources
# For this demonstration, we'll create synthetic data
# In a real analysis, you would load actual data files
# Create synthetic spatial data for demonstration
# Create NYC postal zones (simplified for demonstration)
nyc_postal_sf <- st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
# Use North Carolina counties as placeholder shapes for NYC ZIP codes
# In a real scenario, you would use actual NYC boundary files
head(50) %>%
# Rename columns to match our expected structure
rename(zipcode = NAME) %>%
# Transform to NYC-like coordinates
st_transform(4326) %>%
# Adjust coordinates to roughly match NYC location
st_shift_longitude() %>%
st_set_crs(4326)
# Synthetic health facilities
set.seed(42)
nyc_health_sf <- st_sample(nyc_postal_sf, 80) %>%
st_sf() %>%
mutate(
facility_id = 1:n(),
facility_name = paste0("Facility ", facility_id),
facility_type = sample(
c("Hospital", "Nursing Home", "Long Term Care Facility", "Clinic", "Urgent Care"),
n(),
replace = TRUE,
prob = c(0.2, 0.3, 0.2, 0.2, 0.1)
)
)
# Synthetic food stores
nyc_food_sf <- st_sample(nyc_postal_sf, 200) %>%
st_sf() %>%
mutate(
store_id = 1:n(),
store_name = paste0("Store ", store_id),
store_type = sample(
c("Grocery", "Supermarket", "Convenience Store", "Specialty Food", "Corner Store"),
n(),
replace = TRUE,
prob = c(0.3, 0.2, 0.3, 0.1, 0.1)
)
)
# Create synthetic COVID data based on existing postal areas
set.seed(42) # For reproducibility
# Get population data from postal areas if available, otherwise create random values
nyc_covid_df <- data.frame(
ZIPCODE = nyc_postal_sf$zipcode,
POPULATION = round(runif(nrow(nyc_postal_sf), 5000, 50000))
)
# Create synthetic COVID-19 data
nyc_covid_df <- nyc_covid_df %>%
mutate(
# Cases related to population (with some randomness)
COVID_CASE_COUNT = round(POPULATION * runif(n(), 0.01, 0.05)),
# Tests based on population with variation
TOTAL_COVID_TESTS = round(POPULATION * runif(n(), 0.2, 0.5)),
# Calculate positivity rate
POSITIVE_RATE = (COVID_CASE_COUNT / TOTAL_COVID_TESTS) * 100,
# Case rate per 100,000 population
CASE_RATE = (COVID_CASE_COUNT / POPULATION) * 100000
)
# Prepare the postal areas with COVID data
nyc_covid_sf <- nyc_postal_sf %>%
left_join(nyc_covid_df, by = c("zipcode" = "ZIPCODE"))
```{r data_processing} # Calculate nursing homes per ZIP code nursing_homes_by_zip <- st_join(nyc_covid_sf, nyc_health_sf) %>% group_by(zipcode) %>% summarize( nursing_home_count = sum(grepl(“nursing|elderly|long term care”, tolower(facility_type), ignore.case = TRUE), na.rm = TRUE), hospital_count = sum(grepl(“hospital”, tolower(facility_type), ignore.case = TRUE), na.rm = TRUE), .groups = “drop” )
food_stores_by_zip <- st_join(nyc_covid_sf, nyc_food_sf) %>% group_by(zipcode) %>% summarize( food_store_count = n(), grocery_count = sum(grepl(“grocery|supermarket”, tolower(store_type), ignore.case = TRUE), na.rm = TRUE), .groups = “drop” )
nyc_covid_analysis <- nyc_covid_sf %>% left_join(st_drop_geometry(nursing_homes_by_zip), by = “zipcode”) %>% left_join(st_drop_geometry(food_stores_by_zip), by = “zipcode”) %>% # Handle NA values mutate( nursing_home_count = ifelse(is.na(nursing_home_count), 0, nursing_home_count), hospital_count = ifelse(is.na(hospital_count), 0, hospital_count), food_store_count = ifelse(is.na(food_store_count), 0, food_store_count), grocery_count = ifelse(is.na(grocery_count), 0, grocery_count) )
nyc_covid_analysis <- nyc_covid_analysis %>% mutate( # Food store density (per 10,000 population) food_store_density = (food_store_count / POPULATION) * 10000,
# Nursing home density (per 10,000 population)
nursing_home_density = (nursing_home_count / POPULATION) * 10000
)
glimpse(nyc_covid_analysis)
## Static Maps
### Map 1:
```{r covid_caserate_map, fig.height=10}
# Setup the graphics area
par(mar = c(0, 0, 2, 6)) # Adjust margins (bottom, left, top, right)
# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"],
main = "COVID-19 Case Rate per 100,000 Population by ZIP Code",
breaks = "quantile",
nbreaks = 7,
pal = brewer.pal(7, "YlOrRd"),
key.pos = 4, # Legend on right
key.width = lcm(1.3),
key.length = 1.0,
lwd = 0.3,
border = "white")
# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)
# Add approximate borough labels
# Note: These are approximations for our synthetic data
borough_centers <- data.frame(
borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
x = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[10,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[20,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[30,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[40,]))[1]),
y = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[10,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[20,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[30,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[40,]))[2])
)
# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = st_crs(nyc_covid_analysis))
# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)
# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough,
col = "black", cex = 1.0, font = 2)
# Add north arrow (using a simple function)
north_arrow <- function(x, y, size) {
arrows(x, y, x, y + size, lwd = 2, length = 0.1)
text(x, y + size*1.1, "N", cex = 1.2)
}
# Use coordinates from our data for placement
bbox <- st_bbox(nyc_covid_analysis)
north_arrow(bbox[1] + (bbox[3]-bbox[1])*0.1,
bbox[4] - (bbox[4]-bbox[2])*0.1,
(bbox[4]-bbox[2])*0.05)
# Add scale bar
sbar <- function(loc, length_km) {
# Draw scale bar (approximating scale for demonstration)
length_deg <- length_km / 111 # 1 degree ≈ 111 km at the equator
lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), lwd = 2)
text(loc[1] + length_deg/2, loc[2] - (bbox[4]-bbox[2])*0.01,
labels = paste(length_km, "km"), pos = 1)
}
# Add scale bar at bottom left
sbar(c(bbox[1] + (bbox[3]-bbox[1])*0.1, bbox[2] + (bbox[4]-bbox[2])*0.1), 5)
# Add source information
mtext("Source: NYC Department of Health COVID-19 Data (Synthetic Data for Demonstration)",
side = 1, line = -2, adj = 0, cex = 0.8)
```{r nursing_home_map} nursing_home_map <- ggplot() + # Base map of ZIP codes with COVID-19 case rate geom_sf(data = nyc_covid_analysis, aes(fill = CASE_RATE), color = “white”, size = 0.2) + # Add points for nursing homes geom_sf(data = nyc_health_sf %>% filter(grepl(“nursing|elderly|long term care”, facility_type, ignore.case = TRUE)), color = “darkblue”, size = 2, alpha = 0.7, shape = 17) + # Triangle shape for nursing homes # Color scale for COVID-19 rate scale_fill_viridis_c(option = “inferno”, name = “COVID-19 Cases,000”, breaks = pretty_breaks(n = 5), labels = scales::comma) + # Titles and captions labs(title = “COVID-19 Case Rate and Nursing Home Locations”, subtitle = “Triangle markers indicate nursing home and long-term care facilities”, caption = “Source: NYC COVID-19 Data & NYS Health Facility Data (Synthetic Data for Demonstration)”) + # Custom theme theme_minimal() + theme( plot.title = element_text(face = “bold”, size = 16), plot.subtitle = element_text(size = 12), plot.caption = element_text(hjust = 0, size = 9), panel.grid = element_line(color = “grey90”, size = 0.2), panel.background = element_rect(fill = “white”), legend.position = “right”, legend.title = element_text(face = “bold”), legend.background = element_rect(fill = “white”, color = “grey90”) )
nursing_home_map
## Multi-Map Analysis: COVID-19 and Community Factors
```{r multi_map_figure}
# Map 1: COVID-19 Case Rate
covid_map <- ggplot(nyc_covid_analysis) +
geom_sf(aes(fill = CASE_RATE), color = "white", size = 0.2) +
scale_fill_viridis_c(option = "inferno",
name = "COVID-19 Cases\nper 100,000",
breaks = pretty_breaks(n = 5),
labels = scales::comma) +
labs(title = "COVID-19 Case Rate") +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "bottom",
legend.key.width = unit(1, "cm")
)
# Map 2: Nursing Home Density
nursing_map <- ggplot(nyc_covid_analysis) +
geom_sf(aes(fill = nursing_home_density), color = "white", size = 0.2) +
scale_fill_viridis_c(option = "viridis",
name = "Nursing Homes\nper 10,000 People",
breaks = pretty_breaks(n = 5)) +
labs(title = "Nursing Home Density") +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "bottom",
legend.key.width = unit(1, "cm")
)
# Create graticule for food store map
graticule <- st_graticule(nyc_covid_analysis)
# Map 3: Food Store Density with graticule
food_map <- ggplot() +
# Add graticule
geom_sf(data = graticule, color = "grey80", size = 0.2, linetype = "dashed") +
# Add ZIP codes with food store density
geom_sf(data = nyc_covid_analysis, aes(fill = food_store_density),
color = "white", size = 0.2) +
scale_fill_viridis_c(option = "mako",
name = "Food Stores\nper 10,000 People",
breaks = pretty_breaks(n = 5)) +
labs(title = "Food Store Density") +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "bottom",
legend.key.width = unit(1, "cm")
)
# Label top 5 ZIP codes with highest COVID-19 rates
top_covid_zips <- nyc_covid_analysis %>%
arrange(desc(CASE_RATE)) %>%
head(5)
# Create centroids for labels
top_covid_centroids <- st_centroid(top_covid_zips)
# Add labels to COVID map
covid_map_labeled <- covid_map +
geom_sf_text(data = top_covid_centroids,
aes(label = zipcode),
size = 3,
color = "white",
fontface = "bold")
# Combine maps using patchwork
multi_map <- covid_map_labeled + nursing_map + food_map +
plot_layout(ncol = 3) +
plot_annotation(
title = "Relationship Between COVID-19 Rate and Community Factors",
subtitle = "Higher nursing home density and lower food store density may correlate with COVID-19 cases",
caption = "Source: NYC COVID-19 Data, NYS Health Facility Data, and NYS Retail Food Store Data (Synthetic Data for Demonstration)",
theme = theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(hjust = 0, size = 9)
)
)
# Display the multi-map figure
multi_map
```{r interactive_map} # Using tmap for interactive map tmap_mode(“view”) # Switch to interactive view mode
tm_map <- tm_shape(nyc_covid_analysis) + tm_polygons(col = “CASE_RATE”, title = “COVID-19 Rate per 100,000”, palette = “YlOrRd”, style = “quantile”, n = 7, popup.vars = c(“ZIP Code” = “zipcode”, “COVID-19 Cases” = “COVID_CASE_COUNT”, “Population” = “POPULATION”, “COVID-19 Rate” = “CASE_RATE”, “Nursing Homes” = “nursing_home_count”, “Food Stores” = “food_store_count”)) + # Add nursing homes as points with popup tm_shape(nyc_health_sf %>% filter(grepl(“nursing|elderly|long term care”, facility_type, ignore.case = TRUE))) + tm_dots(col = “blue”, size = 0.1, title = “Nursing Homes”, popup.vars = c(“Facility Name” = “facility_name”, “Facility Type” = “facility_type”), id = “facility_name”) + # Add hospitals as points with popup tm_shape(nyc_health_sf %>% filter(grepl(“hospital”, facility_type, ignore.case = TRUE))) + tm_dots(col = “red”, size = 0.1, title = “Hospitals”, popup.vars = c(“Facility Name” = “facility_name”, “Facility Type” = “facility_type”), id = “facility_name”) + # Layout options tm_layout(title = “NYC COVID-19 Rate and Healthcare Facilities”, legend.outside = TRUE, legend.position = c(“right”, “bottom”), legend.bg.color = “white”, legend.frame = TRUE) + # Add a basemap tm_basemap(c(“Esri.WorldStreetMap”, “OpenStreetMap”))
tm_map
## Statistical Analysis
```{r statistical_analysis}
# Calculate correlation between COVID-19 rate and other factors
cor_nursing <- cor(nyc_covid_analysis$CASE_RATE, nyc_covid_analysis$nursing_home_density,
use = "complete.obs")
cor_food <- cor(nyc_covid_analysis$CASE_RATE, nyc_covid_analysis$food_store_density,
use = "complete.obs")
# Print correlation results in a nice table
knitr::kable(
data.frame(
Factor = c("Nursing Home Density", "Food Store Density"),
Correlation_with_COVID19_Rate = c(cor_nursing, cor_food)
),
caption = "Correlation between COVID-19 rates and community factors",
digits = 3
)
# Create a scatter plot with regression line
scatter_plot <- ggplot(nyc_covid_analysis, aes(x = nursing_home_density, y = CASE_RATE)) +
geom_point(aes(size = POPULATION/10000, color = food_store_density), alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +
scale_color_viridis_c(option = "plasma", name = "Food Stores\nper 10,000 People") +
scale_size_continuous(name = "Population (10,000s)") +
labs(title = "Relationship Between COVID-19 Rate and Nursing Home Density",
subtitle = paste0("Correlation: r = ", round(cor_nursing, 3)),
x = "Nursing Homes per 10,000 People",
y = "COVID-19 Cases per 100,000 Population",
caption = "Size indicates population, color indicates food store density") +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right"
)
# Display the scatter plot
scatter_plot
These findings suggest that community factors, particularly those related to vulnerable populations and resource access, played a significant role in the spatial patterns of COVID-19 spread in urban environments.