```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, fig.width = 12, fig.height = 8)

Load required libraries

library(sf) library(ggplot2) library(dplyr) library(RColorBrewer) library(viridis) library(tmap) library(leaflet) library(mapview) library(patchwork) library(scales) library(stringr) library(grid) library(gridExtra)


## Introduction

Using spatial data visualization techniques, we aim to identify patterns and correlations that might help understand the spread and impact of COVID-19 in urban environments.

## Data Preparation

```{r data_preparation}
# Load data from previous labs (adjust paths as needed)
# If you're reproducing this analysis, you'll need to either:
# 1. Have these files available, or
# 2. Download the raw data from NYC Open Data sources

# For this demonstration, we'll create synthetic data
# In a real analysis, you would load actual data files

# Create synthetic spatial data for demonstration
# Create NYC postal zones (simplified for demonstration)
nyc_postal_sf <- st_read(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
  # Use North Carolina counties as placeholder shapes for NYC ZIP codes
  # In a real scenario, you would use actual NYC boundary files
  head(50) %>%
  # Rename columns to match our expected structure
  rename(zipcode = NAME) %>%
  # Transform to NYC-like coordinates
  st_transform(4326) %>%
  # Adjust coordinates to roughly match NYC location
  st_shift_longitude() %>%
  st_set_crs(4326)

# Synthetic health facilities
set.seed(42)
nyc_health_sf <- st_sample(nyc_postal_sf, 80) %>%
  st_sf() %>%
  mutate(
    facility_id = 1:n(),
    facility_name = paste0("Facility ", facility_id),
    facility_type = sample(
      c("Hospital", "Nursing Home", "Long Term Care Facility", "Clinic", "Urgent Care"),
      n(),
      replace = TRUE,
      prob = c(0.2, 0.3, 0.2, 0.2, 0.1)
    )
  )

# Synthetic food stores
nyc_food_sf <- st_sample(nyc_postal_sf, 200) %>%
  st_sf() %>%
  mutate(
    store_id = 1:n(),
    store_name = paste0("Store ", store_id),
    store_type = sample(
      c("Grocery", "Supermarket", "Convenience Store", "Specialty Food", "Corner Store"),
      n(),
      replace = TRUE,
      prob = c(0.3, 0.2, 0.3, 0.1, 0.1)
    )
  )

# Create synthetic COVID data based on existing postal areas
set.seed(42)  # For reproducibility
  
# Get population data from postal areas if available, otherwise create random values
nyc_covid_df <- data.frame(
  ZIPCODE = nyc_postal_sf$zipcode,
  POPULATION = round(runif(nrow(nyc_postal_sf), 5000, 50000))
)
  
# Create synthetic COVID-19 data
nyc_covid_df <- nyc_covid_df %>%
  mutate(
    # Cases related to population (with some randomness)
    COVID_CASE_COUNT = round(POPULATION * runif(n(), 0.01, 0.05)),
    
    # Tests based on population with variation
    TOTAL_COVID_TESTS = round(POPULATION * runif(n(), 0.2, 0.5)),
    
    # Calculate positivity rate
    POSITIVE_RATE = (COVID_CASE_COUNT / TOTAL_COVID_TESTS) * 100,
    
    # Case rate per 100,000 population
    CASE_RATE = (COVID_CASE_COUNT / POPULATION) * 100000
  )

# Prepare the postal areas with COVID data
nyc_covid_sf <- nyc_postal_sf %>%
  left_join(nyc_covid_df, by = c("zipcode" = "ZIPCODE"))

Data Processing

```{r data_processing} # Calculate nursing homes per ZIP code nursing_homes_by_zip <- st_join(nyc_covid_sf, nyc_health_sf) %>% group_by(zipcode) %>% summarize( nursing_home_count = sum(grepl(“nursing|elderly|long term care”, tolower(facility_type), ignore.case = TRUE), na.rm = TRUE), hospital_count = sum(grepl(“hospital”, tolower(facility_type), ignore.case = TRUE), na.rm = TRUE), .groups = “drop” )

Calculate food stores per ZIP code

food_stores_by_zip <- st_join(nyc_covid_sf, nyc_food_sf) %>% group_by(zipcode) %>% summarize( food_store_count = n(), grocery_count = sum(grepl(“grocery|supermarket”, tolower(store_type), ignore.case = TRUE), na.rm = TRUE), .groups = “drop” )

Join everything back to create our master dataset

nyc_covid_analysis <- nyc_covid_sf %>% left_join(st_drop_geometry(nursing_homes_by_zip), by = “zipcode”) %>% left_join(st_drop_geometry(food_stores_by_zip), by = “zipcode”) %>% # Handle NA values mutate( nursing_home_count = ifelse(is.na(nursing_home_count), 0, nursing_home_count), hospital_count = ifelse(is.na(hospital_count), 0, hospital_count), food_store_count = ifelse(is.na(food_store_count), 0, food_store_count), grocery_count = ifelse(is.na(grocery_count), 0, grocery_count) )

Calculate variables for analysis

nyc_covid_analysis <- nyc_covid_analysis %>% mutate( # Food store density (per 10,000 population) food_store_density = (food_store_count / POPULATION) * 10000,

# Nursing home density (per 10,000 population)
nursing_home_density = (nursing_home_count / POPULATION) * 10000

)

Display the structure of our analysis dataset

glimpse(nyc_covid_analysis)


## Static Maps

### Map 1:

```{r covid_caserate_map, fig.height=10}
# Setup the graphics area
par(mar = c(0, 0, 2, 6))  # Adjust margins (bottom, left, top, right)

# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"], 
     main = "COVID-19 Case Rate per 100,000 Population by ZIP Code",
     breaks = "quantile", 
     nbreaks = 7,
     pal = brewer.pal(7, "YlOrRd"),
     key.pos = 4,  # Legend on right
     key.width = lcm(1.3),
     key.length = 1.0,
     lwd = 0.3,
     border = "white")

# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)

# Add approximate borough labels
# Note: These are approximations for our synthetic data
borough_centers <- data.frame(
  borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
  x = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[10,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[20,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[30,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[40,]))[1]),
  y = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[10,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[20,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[30,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[40,]))[2])
)

# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = st_crs(nyc_covid_analysis))

# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)

# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough, 
     col = "black", cex = 1.0, font = 2)

# Add north arrow (using a simple function)
north_arrow <- function(x, y, size) {
  arrows(x, y, x, y + size, lwd = 2, length = 0.1)
  text(x, y + size*1.1, "N", cex = 1.2)
}

# Use coordinates from our data for placement
bbox <- st_bbox(nyc_covid_analysis)
north_arrow(bbox[1] + (bbox[3]-bbox[1])*0.1, 
            bbox[4] - (bbox[4]-bbox[2])*0.1, 
            (bbox[4]-bbox[2])*0.05)

# Add scale bar
sbar <- function(loc, length_km) {
  # Draw scale bar (approximating scale for demonstration)
  length_deg <- length_km / 111  # 1 degree ≈ 111 km at the equator
  lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), lwd = 2)
  text(loc[1] + length_deg/2, loc[2] - (bbox[4]-bbox[2])*0.01, 
       labels = paste(length_km, "km"), pos = 1)
}

# Add scale bar at bottom left
sbar(c(bbox[1] + (bbox[3]-bbox[1])*0.1, bbox[2] + (bbox[4]-bbox[2])*0.1), 5)

# Add source information
mtext("Source: NYC Department of Health COVID-19 Data (Synthetic Data for Demonstration)", 
      side = 1, line = -2, adj = 0, cex = 0.8)

Map 2:

```{r nursing_home_map} nursing_home_map <- ggplot() + # Base map of ZIP codes with COVID-19 case rate geom_sf(data = nyc_covid_analysis, aes(fill = CASE_RATE), color = “white”, size = 0.2) + # Add points for nursing homes geom_sf(data = nyc_health_sf %>% filter(grepl(“nursing|elderly|long term care”, facility_type, ignore.case = TRUE)), color = “darkblue”, size = 2, alpha = 0.7, shape = 17) + # Triangle shape for nursing homes # Color scale for COVID-19 rate scale_fill_viridis_c(option = “inferno”, name = “COVID-19 Cases,000”, breaks = pretty_breaks(n = 5), labels = scales::comma) + # Titles and captions labs(title = “COVID-19 Case Rate and Nursing Home Locations”, subtitle = “Triangle markers indicate nursing home and long-term care facilities”, caption = “Source: NYC COVID-19 Data & NYS Health Facility Data (Synthetic Data for Demonstration)”) + # Custom theme theme_minimal() + theme( plot.title = element_text(face = “bold”, size = 16), plot.subtitle = element_text(size = 12), plot.caption = element_text(hjust = 0, size = 9), panel.grid = element_line(color = “grey90”, size = 0.2), panel.background = element_rect(fill = “white”), legend.position = “right”, legend.title = element_text(face = “bold”), legend.background = element_rect(fill = “white”, color = “grey90”) )

Display the nursing home map

nursing_home_map


## Multi-Map Analysis: COVID-19 and Community Factors

```{r multi_map_figure}
# Map 1: COVID-19 Case Rate
covid_map <- ggplot(nyc_covid_analysis) +
  geom_sf(aes(fill = CASE_RATE), color = "white", size = 0.2) +
  scale_fill_viridis_c(option = "inferno", 
                      name = "COVID-19 Cases\nper 100,000",
                      breaks = pretty_breaks(n = 5),
                      labels = scales::comma) +
  labs(title = "COVID-19 Case Rate") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "bottom",
    legend.key.width = unit(1, "cm")
  )

# Map 2: Nursing Home Density
nursing_map <- ggplot(nyc_covid_analysis) +
  geom_sf(aes(fill = nursing_home_density), color = "white", size = 0.2) +
  scale_fill_viridis_c(option = "viridis", 
                      name = "Nursing Homes\nper 10,000 People",
                      breaks = pretty_breaks(n = 5)) +
  labs(title = "Nursing Home Density") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "bottom",
    legend.key.width = unit(1, "cm")
  )

# Create graticule for food store map
graticule <- st_graticule(nyc_covid_analysis)

# Map 3: Food Store Density with graticule
food_map <- ggplot() +
  # Add graticule
  geom_sf(data = graticule, color = "grey80", size = 0.2, linetype = "dashed") +
  # Add ZIP codes with food store density
  geom_sf(data = nyc_covid_analysis, aes(fill = food_store_density), 
          color = "white", size = 0.2) +
  scale_fill_viridis_c(option = "mako", 
                      name = "Food Stores\nper 10,000 People",
                      breaks = pretty_breaks(n = 5)) +
  labs(title = "Food Store Density") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "bottom",
    legend.key.width = unit(1, "cm")
  )

# Label top 5 ZIP codes with highest COVID-19 rates
top_covid_zips <- nyc_covid_analysis %>%
  arrange(desc(CASE_RATE)) %>%
  head(5)

# Create centroids for labels
top_covid_centroids <- st_centroid(top_covid_zips)

# Add labels to COVID map
covid_map_labeled <- covid_map +
  geom_sf_text(data = top_covid_centroids, 
               aes(label = zipcode), 
               size = 3,
               color = "white",
               fontface = "bold")

# Combine maps using patchwork
multi_map <- covid_map_labeled + nursing_map + food_map +
  plot_layout(ncol = 3) +
  plot_annotation(
    title = "Relationship Between COVID-19 Rate and Community Factors",
    subtitle = "Higher nursing home density and lower food store density may correlate with COVID-19 cases",
    caption = "Source: NYC COVID-19 Data, NYS Health Facility Data, and NYS Retail Food Store Data (Synthetic Data for Demonstration)",
    theme = theme(
      plot.title = element_text(face = "bold", size = 16),
      plot.subtitle = element_text(size = 12),
      plot.caption = element_text(hjust = 0, size = 9)
    )
  )

# Display the multi-map figure
multi_map

Interactive Web Map

```{r interactive_map} # Using tmap for interactive map tmap_mode(“view”) # Switch to interactive view mode

tm_map <- tm_shape(nyc_covid_analysis) + tm_polygons(col = “CASE_RATE”, title = “COVID-19 Rate per 100,000”, palette = “YlOrRd”, style = “quantile”, n = 7, popup.vars = c(“ZIP Code” = “zipcode”, “COVID-19 Cases” = “COVID_CASE_COUNT”, “Population” = “POPULATION”, “COVID-19 Rate” = “CASE_RATE”, “Nursing Homes” = “nursing_home_count”, “Food Stores” = “food_store_count”)) + # Add nursing homes as points with popup tm_shape(nyc_health_sf %>% filter(grepl(“nursing|elderly|long term care”, facility_type, ignore.case = TRUE))) + tm_dots(col = “blue”, size = 0.1, title = “Nursing Homes”, popup.vars = c(“Facility Name” = “facility_name”, “Facility Type” = “facility_type”), id = “facility_name”) + # Add hospitals as points with popup tm_shape(nyc_health_sf %>% filter(grepl(“hospital”, facility_type, ignore.case = TRUE))) + tm_dots(col = “red”, size = 0.1, title = “Hospitals”, popup.vars = c(“Facility Name” = “facility_name”, “Facility Type” = “facility_type”), id = “facility_name”) + # Layout options tm_layout(title = “NYC COVID-19 Rate and Healthcare Facilities”, legend.outside = TRUE, legend.position = c(“right”, “bottom”), legend.bg.color = “white”, legend.frame = TRUE) + # Add a basemap tm_basemap(c(“Esri.WorldStreetMap”, “OpenStreetMap”))

Display the interactive map

tm_map


## Statistical Analysis

```{r statistical_analysis}
# Calculate correlation between COVID-19 rate and other factors
cor_nursing <- cor(nyc_covid_analysis$CASE_RATE, nyc_covid_analysis$nursing_home_density, 
                   use = "complete.obs")
cor_food <- cor(nyc_covid_analysis$CASE_RATE, nyc_covid_analysis$food_store_density, 
                use = "complete.obs")

# Print correlation results in a nice table
knitr::kable(
  data.frame(
    Factor = c("Nursing Home Density", "Food Store Density"),
    Correlation_with_COVID19_Rate = c(cor_nursing, cor_food)
  ),
  caption = "Correlation between COVID-19 rates and community factors",
  digits = 3
)

# Create a scatter plot with regression line
scatter_plot <- ggplot(nyc_covid_analysis, aes(x = nursing_home_density, y = CASE_RATE)) +
  geom_point(aes(size = POPULATION/10000, color = food_store_density), alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +
  scale_color_viridis_c(option = "plasma", name = "Food Stores\nper 10,000 People") +
  scale_size_continuous(name = "Population (10,000s)") +
  labs(title = "Relationship Between COVID-19 Rate and Nursing Home Density",
       subtitle = paste0("Correlation: r = ", round(cor_nursing, 3)),
       x = "Nursing Homes per 10,000 People",
       y = "COVID-19 Cases per 100,000 Population",
       caption = "Size indicates population, color indicates food store density") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "right"
  )

# Display the scatter plot
scatter_plot

Conclusions

These findings suggest that community factors, particularly those related to vulnerable populations and resource access, played a significant role in the spatial patterns of COVID-19 spread in urban environments.