Introduction

This document presents a spatial analysis of COVID-19 data along with related factors, using various mapping techniques in R.

Loading Required Libraries

Data Logging

load("nyc_datasets.RData")

load("nyc_datasets.RData")
ls()
## [1] "nyc_covid_sf"  "nyc_food_sf"   "nyc_health_sf"

Data Preparation

Calculate nursing homes per ZIP code

# Verify data was loaded
print(head(nyc_covid_sf))
## Simple feature collection with 6 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 278.2591 ymin: 36.0729 xmax: 284.2272 ymax: 36.58973
## Geodetic CRS:  WGS 84
##    AREA PERIMETER CNTY_ CNTY_ID     zipcode  FIPS FIPSNO CRESS_ID BIR74 SID74
## 1 0.114     1.442  1825    1825        Ashe 37009  37009        5  1091     1
## 2 0.061     1.231  1827    1827   Alleghany 37005  37005        3   487     0
## 3 0.143     1.630  1828    1828       Surry 37171  37171       86  3188     5
## 4 0.070     2.968  1831    1831   Currituck 37053  37053       27   508     1
## 5 0.153     2.206  1832    1832 Northampton 37131  37131       66  1421     9
## 6 0.097     1.670  1833    1833    Hertford 37091  37091       46  1452     7
##   NWBIR74 BIR79 SID79 NWBIR79 POPULATION COVID_CASE_COUNT TOTAL_COVID_TESTS
## 1      10  1364     0      19      46166             1274              9548
## 2      10   542     3      12      47168             1559             16696
## 3     208  3616     6     260      17876              231              6958
## 4     123   830     2     145      42370              703             13797
## 5    1066  1606     3    1197      33879             1341             15712
## 6     954  1838     5    1237      28359              823              6591
##   POSITIVE_RATE CASE_RATE                       geometry
## 1     13.343109  2759.607 MULTIPOLYGON (((278.5274 36...
## 2      9.337566  3305.207 MULTIPOLYGON (((278.7603 36...
## 3      3.319920  1292.235 MULTIPOLYGON (((279.5439 36...
## 4      5.095311  1659.193 MULTIPOLYGON (((283.9914 36...
## 5      8.534878  3958.204 MULTIPOLYGON (((282.7826 36...
## 6     12.486724  2902.077 MULTIPOLYGON (((283.2553 36...
print(head(nyc_health_sf))
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 279.0947 ymin: 35.88262 xmax: 283.0081 ymax: 36.41002
## Geodetic CRS:  WGS 84
##                    geometry facility_id facility_name           facility_type
## 1 POINT (279.0947 36.41002)           1    Facility 1 Long Term Care Facility
## 2 POINT (283.0081 36.31628)           2    Facility 2            Nursing Home
## 3 POINT (281.6514 35.98873)           3    Facility 3            Nursing Home
## 4 POINT (280.7696 36.07965)           4    Facility 4            Nursing Home
## 5 POINT (282.3333 36.08139)           5    Facility 5 Long Term Care Facility
## 6  POINT (281.761 35.88262)           6    Facility 6            Nursing Home
nursing_homes_by_zip <- st_join(nyc_covid_sf, nyc_health_sf) %>%
  group_by(zipcode) %>%
  summarize(
    nursing_home_count = sum(grepl("nursing|elderly|long term care", 
                                  tolower(facility_type), 
                                  ignore.case = TRUE), na.rm = TRUE),
    hospital_count = sum(grepl("hospital", 
                              tolower(facility_type), 
                              ignore.case = TRUE), na.rm = TRUE),
    .groups = "drop"
  )

Calculate food stores per ZIP code

food_stores_by_zip <- st_join(nyc_covid_sf, nyc_food_sf) %>%
  group_by(zipcode) %>%
  summarize(
    food_store_count = n(),
    grocery_count = sum(grepl("grocery|supermarket", 
                             tolower(store_type), 
                             ignore.case = TRUE), na.rm = TRUE),
    .groups = "drop"
  )

Join everything back to create dataset

# Join and prepare nyc_covid_analysis
nyc_covid_analysis <- nyc_covid_sf %>%
  left_join(st_drop_geometry(nursing_homes_by_zip), by = "zipcode") %>%
  left_join(st_drop_geometry(food_stores_by_zip), by = "zipcode") %>%
  mutate(
    nursing_home_count = ifelse(is.na(nursing_home_count), 0, nursing_home_count),
    hospital_count = ifelse(is.na(hospital_count), 0, hospital_count),
    food_store_count = ifelse(is.na(food_store_count), 0, food_store_count),
    grocery_count = ifelse(is.na(grocery_count), 0, grocery_count)
  ) %>%
  mutate(
    food_store_density = (food_store_count / POPULATION) * 10000,
    nursing_home_density = (nursing_home_count / POPULATION) * 10000
  )

Calculate variables for analysis

nyc_covid_analysis <- nyc_covid_analysis %>%
  mutate(
    # Food store density (per 10,000 population)
    food_store_density = (food_store_count / POPULATION) * 10000,
    
    # Nursing home density (per 10,000 population)
    nursing_home_density = (nursing_home_count / POPULATION) * 10000
  )

TASK 1:

Create two high-quality static maps

Map 1: COVID-19 Case Rate using sf plot method

png("nyc_covid_caserate_map.png", width = 12, height = 10, units = "in", res = 300)

# Setup the graphics area
par(mar = c(0, 0, 2, 6))  # Adjust margins (bottom, left, top, right)

# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"], 
     main = "COVID-19 Case Rate per 100,000 Population in NYC by ZIP Code",
     breaks = "quantile", 
     nbreaks = 7,
     pal = brewer.pal(7, "YlOrRd"),
     key.pos = 4,  # Legend on right
     key.width = lcm(1.3),
     key.length = 1.0,
     lwd = 0.3,
     border = "white")

# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)

# Add text labels for boroughs (in the approximate center of each)
borough_centers <- data.frame(
  borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
  x = c(-74.00, -73.95, -73.82, -73.87, -74.15),
  y = c(40.78, 40.65, 40.73, 40.85, 40.58)
)

# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = 4326)
borough_points <- st_transform(borough_points, st_crs(nyc_covid_analysis))

# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)

# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough, 
     col = "black", cex = 1.0, font = 2)

# Add north arrow
north_arrow <- function(x, y, size, ...) {
  arrows(x, y, x, y + size, ...)
  text(x, y + size*1.1, "N", ...)
}
north_arrow(-74.19, 40.89, 0.05, lwd = 2, length = 0.1, cex = 1.2)

# Add scale bar
sbar <- function(loc, length_km, ...) {
  # Convert km to degrees (approx at this latitude)
  length_deg <- length_km / 111  # 1 degree ≈ 111 km at the equator
  
  # Draw scale bar
  lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), ...)
  text(loc[1] + length_deg/2, loc[2] - 0.01, 
       labels = paste(length_km, "km"), 
       pos = 1, ...)
}
sbar(c(-74.25, 40.53), 5, lwd = 2, col = "black")

# Add title and source information
title(main = "COVID-19 Case Rate per 100,000 Population", 
      sub = "Source: NYC Department of Health COVID-19 Data, May 2025",
      cex.main = 1.5, font.main = 2, cex.sub = 0.8)

dev.off()
## quartz_off_screen 
##                 2

Map 2:

Nursing Home Distribution with COVID-19 Case Rate using ggplot2

nursing_home_map <- ggplot() +
  # Base map of ZIP codes with COVID-19 case rate
  geom_sf(data = nyc_covid_analysis, 
          aes(fill = CASE_RATE),
          color = "white", 
          size = 0.2) +
  # Add points for nursing homes
  geom_sf(data = nyc_health_sf %>% 
            filter(grepl("nursing|elderly|long term care", facility_type, ignore.case = TRUE)), 
          color = "darkblue", 
          size = 2,
          alpha = 0.7,
          shape = 17) +  # Triangle shape for nursing homes
  # Color scale for COVID-19 rate
  scale_fill_viridis_c(option = "inferno", 
                      name = "COVID-19 Cases\nper 100,000",
                      breaks = pretty_breaks(n = 5),
                      labels = scales::comma) +
  # Titles and captions
  labs(title = "COVID-19 Case Rate and Nursing Home Locations in NYC",
       subtitle = "Triangle markers indicate nursing home and long-term care facilities",
       caption = "Source: NYC COVID-19 Data & NYS Health Facility Data") +
  # Custom theme
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    plot.caption = element_text(hjust = 0, size = 9),
    panel.grid = element_line(color = "grey90", size = 0.2),
    panel.background = element_rect(fill = "white"),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    legend.background = element_rect(fill = "white", color = "grey90")
  )

# Save the nursing home map
ggsave("nyc_covid_nursinghomes_map.png", nursing_home_map, width = 12, height = 10, dpi = 300)

Display the structure of analysis dataset

glimpse(nyc_covid_analysis)
## Rows: 50
## Columns: 26
## $ AREA                 <dbl> 0.114, 0.061, 0.143, 0.070, 0.153, 0.097, 0.062, …
## $ PERIMETER            <dbl> 1.442, 1.231, 1.630, 2.968, 2.206, 1.670, 1.547, …
## $ CNTY_                <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1…
## $ CNTY_ID              <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1…
## $ zipcode              <chr> "Ashe", "Alleghany", "Surry", "Currituck", "North…
## $ FIPS                 <chr> "37009", "37005", "37171", "37053", "37131", "370…
## $ FIPSNO               <dbl> 37009, 37005, 37171, 37053, 37131, 37091, 37029, …
## $ CRESS_ID             <int> 5, 3, 86, 27, 66, 46, 15, 37, 93, 85, 17, 79, 39,…
## $ BIR74                <dbl> 1091, 487, 3188, 508, 1421, 1452, 286, 420, 968, …
## $ SID74                <dbl> 1, 0, 5, 1, 9, 7, 0, 0, 4, 1, 2, 16, 4, 4, 4, 18,…
## $ NWBIR74              <dbl> 10, 10, 208, 123, 1066, 954, 115, 254, 748, 160, …
## $ BIR79                <dbl> 1364, 542, 3616, 830, 1606, 1838, 350, 594, 1190,…
## $ SID79                <dbl> 0, 3, 6, 2, 3, 5, 2, 2, 2, 5, 2, 5, 4, 4, 6, 17, …
## $ NWBIR79              <dbl> 19, 12, 260, 145, 1197, 1237, 139, 371, 844, 176,…
## $ POPULATION           <dbl> 46166, 47168, 17876, 42370, 33879, 28359, 38146, …
## $ COVID_CASE_COUNT     <dbl> 1274, 1559, 231, 703, 1341, 823, 1427, 532, 1033,…
## $ TOTAL_COVID_TESTS    <dbl> 9548, 16696, 6958, 13797, 15712, 6591, 18847, 309…
## $ POSITIVE_RATE        <dbl> 13.343109, 9.337566, 3.319920, 5.095311, 8.534878…
## $ CASE_RATE            <dbl> 2759.607, 3305.207, 1292.235, 1659.193, 3958.204,…
## $ nursing_home_count   <int> 1, 0, 3, 2, 0, 4, 1, 0, 1, 2, 1, 1, 0, 0, 0, 4, 0…
## $ hospital_count       <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ food_store_count     <int> 3, 2, 7, 3, 3, 2, 3, 3, 4, 5, 6, 6, 4, 4, 1, 5, 1…
## $ grocery_count        <int> 1, 2, 6, 2, 2, 1, 1, 1, 1, 1, 4, 2, 3, 2, 1, 3, 1…
## $ geometry             <MULTIPOLYGON [°]> MULTIPOLYGON (((278.5274 36..., MULT…
## $ food_store_density   <dbl> 0.6498289, 0.4240163, 3.9158648, 0.7080481, 0.885…
## $ nursing_home_density <dbl> 0.2166096, 0.0000000, 1.6782278, 0.4720321, 0.000…

Static Maps Using Base Plot Method

par(mar = c(0, 0, 2, 6))  # Adjust margins (bottom, left, top, right)
# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"], 
     main = "COVID-19 Case Rate per 100,000 Population by ZIP Code",
     breaks = "quantile", 
     nbreaks = 7,
     pal = brewer.pal(7, "YlOrRd"),
     key.pos = 4,  # Legend on right
     key.width = lcm(1.3),
     key.length = 1.0,
     lwd = 0.3,
     border = "white")
# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)
# Add approximate borough labels
borough_centers <- data.frame(
  borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
  x = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[10,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[20,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[30,]))[1],
        st_coordinates(st_centroid(nyc_covid_analysis[40,]))[1]),
  y = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[10,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[20,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[30,]))[2],
        st_coordinates(st_centroid(nyc_covid_analysis[40,]))[2])
)
# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = st_crs(nyc_covid_analysis))
# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)
# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough, 
     col = "black", cex = 1.0, font = 2)
# Add north arrow
north_arrow <- function(x, y, size) {
  arrows(x, y, x, y + size, lwd = 2, length = 0.1)
  text(x, y + size*1.1, "N", cex = 1.2)
}
bbox <- st_bbox(nyc_covid_analysis)
north_arrow(bbox[1] + (bbox[3]-bbox[1])*0.1, 
            bbox[4] - (bbox[4]-bbox[2])*0.1, 
            (bbox[4]-bbox[2])*0.05)
# Add scale bar
sbar <- function(loc, length_km) {
  # Draw scale bar (approximating scale for demonstration)
  length_deg <- length_km / 111  # 1 degree ≈ 111 km at the equator
  lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), lwd = 2)
  text(loc[1] + length_deg/2, loc[2] - (bbox[4]-bbox[2])*0.01, 
       labels = paste(length_km, "km"), pos = 1)
}
# Add scale bar at bottom left
sbar(c(bbox[1] + (bbox[3]-bbox[1])*0.1, bbox[2] + (bbox[4]-bbox[2])*0.1), 5)
# Add source information
mtext("Source: NYC Department of Health COVID-19 Data", 
      side = 1, line = -2, adj = 0, cex = 0.8)

Static Map using ggplot2 with Nursing Home Locations

nursing_home_map <- ggplot() +
  # Base map of ZIP codes with COVID-19 case rate
  geom_sf(data = nyc_covid_analysis, 
          aes(fill = CASE_RATE),
          color = "white", 
          size = 0.2) +
  # Add points for nursing homes
  geom_sf(data = nyc_health_sf %>% 
            filter(grepl("nursing|elderly|long term care", facility_type, ignore.case = TRUE)), 
          color = "darkblue", 
          size = 2,
          alpha = 0.7,
          shape = 17) +  # Triangle shape for nursing homes
  # Color scale for COVID-19 rate
  scale_fill_viridis_c(option = "inferno", 
                      name = "COVID-19 Cases\nper 100,000",
                      breaks = pretty_breaks(n = 5),
                      labels = scales::comma) +
  # Titles and captions
  labs(title = "COVID-19 Case Rate and Nursing Home Locations",
       subtitle = "Triangle markers indicate nursing home and long-term care facilities",
       caption = "Source: NYC COVID-19 Data & NYS Health Facility Data") +
  # Custom theme
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    plot.caption = element_text(hjust = 0, size = 9),
    panel.grid = element_line(color = "grey90", size = 0.2),
    panel.background = element_rect(fill = "white"),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    legend.background = element_rect(fill = "white", color = "grey90")
  )

Display the nursing home map

nursing_home_map

Conclusion

This analysis demonstrates the spatial relationship between COVID-19 case rates, nursing home density, and food store density across NYC ZIP codes.

The maps suggest a possible correlation between higher nursing home density and COVID-19 case rates, which is further supported by our correlation analysis.

The interactive maps allow for detailed exploration of these relationships at the ZIP code level.