This document presents a spatial analysis of COVID-19 data along with related factors, using various mapping techniques in R.
load("nyc_datasets.RData")
load("nyc_datasets.RData")
ls()
## [1] "nyc_covid_sf" "nyc_food_sf" "nyc_health_sf"
# Verify data was loaded
print(head(nyc_covid_sf))
## Simple feature collection with 6 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 278.2591 ymin: 36.0729 xmax: 284.2272 ymax: 36.58973
## Geodetic CRS: WGS 84
## AREA PERIMETER CNTY_ CNTY_ID zipcode FIPS FIPSNO CRESS_ID BIR74 SID74
## 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1
## 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0
## 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5
## 4 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1
## 5 0.153 2.206 1832 1832 Northampton 37131 37131 66 1421 9
## 6 0.097 1.670 1833 1833 Hertford 37091 37091 46 1452 7
## NWBIR74 BIR79 SID79 NWBIR79 POPULATION COVID_CASE_COUNT TOTAL_COVID_TESTS
## 1 10 1364 0 19 46166 1274 9548
## 2 10 542 3 12 47168 1559 16696
## 3 208 3616 6 260 17876 231 6958
## 4 123 830 2 145 42370 703 13797
## 5 1066 1606 3 1197 33879 1341 15712
## 6 954 1838 5 1237 28359 823 6591
## POSITIVE_RATE CASE_RATE geometry
## 1 13.343109 2759.607 MULTIPOLYGON (((278.5274 36...
## 2 9.337566 3305.207 MULTIPOLYGON (((278.7603 36...
## 3 3.319920 1292.235 MULTIPOLYGON (((279.5439 36...
## 4 5.095311 1659.193 MULTIPOLYGON (((283.9914 36...
## 5 8.534878 3958.204 MULTIPOLYGON (((282.7826 36...
## 6 12.486724 2902.077 MULTIPOLYGON (((283.2553 36...
print(head(nyc_health_sf))
## Simple feature collection with 6 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 279.0947 ymin: 35.88262 xmax: 283.0081 ymax: 36.41002
## Geodetic CRS: WGS 84
## geometry facility_id facility_name facility_type
## 1 POINT (279.0947 36.41002) 1 Facility 1 Long Term Care Facility
## 2 POINT (283.0081 36.31628) 2 Facility 2 Nursing Home
## 3 POINT (281.6514 35.98873) 3 Facility 3 Nursing Home
## 4 POINT (280.7696 36.07965) 4 Facility 4 Nursing Home
## 5 POINT (282.3333 36.08139) 5 Facility 5 Long Term Care Facility
## 6 POINT (281.761 35.88262) 6 Facility 6 Nursing Home
nursing_homes_by_zip <- st_join(nyc_covid_sf, nyc_health_sf) %>%
group_by(zipcode) %>%
summarize(
nursing_home_count = sum(grepl("nursing|elderly|long term care",
tolower(facility_type),
ignore.case = TRUE), na.rm = TRUE),
hospital_count = sum(grepl("hospital",
tolower(facility_type),
ignore.case = TRUE), na.rm = TRUE),
.groups = "drop"
)
food_stores_by_zip <- st_join(nyc_covid_sf, nyc_food_sf) %>%
group_by(zipcode) %>%
summarize(
food_store_count = n(),
grocery_count = sum(grepl("grocery|supermarket",
tolower(store_type),
ignore.case = TRUE), na.rm = TRUE),
.groups = "drop"
)
# Join and prepare nyc_covid_analysis
nyc_covid_analysis <- nyc_covid_sf %>%
left_join(st_drop_geometry(nursing_homes_by_zip), by = "zipcode") %>%
left_join(st_drop_geometry(food_stores_by_zip), by = "zipcode") %>%
mutate(
nursing_home_count = ifelse(is.na(nursing_home_count), 0, nursing_home_count),
hospital_count = ifelse(is.na(hospital_count), 0, hospital_count),
food_store_count = ifelse(is.na(food_store_count), 0, food_store_count),
grocery_count = ifelse(is.na(grocery_count), 0, grocery_count)
) %>%
mutate(
food_store_density = (food_store_count / POPULATION) * 10000,
nursing_home_density = (nursing_home_count / POPULATION) * 10000
)
nyc_covid_analysis <- nyc_covid_analysis %>%
mutate(
# Food store density (per 10,000 population)
food_store_density = (food_store_count / POPULATION) * 10000,
# Nursing home density (per 10,000 population)
nursing_home_density = (nursing_home_count / POPULATION) * 10000
)
png("nyc_covid_caserate_map.png", width = 12, height = 10, units = "in", res = 300)
# Setup the graphics area
par(mar = c(0, 0, 2, 6)) # Adjust margins (bottom, left, top, right)
# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"],
main = "COVID-19 Case Rate per 100,000 Population in NYC by ZIP Code",
breaks = "quantile",
nbreaks = 7,
pal = brewer.pal(7, "YlOrRd"),
key.pos = 4, # Legend on right
key.width = lcm(1.3),
key.length = 1.0,
lwd = 0.3,
border = "white")
# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)
# Add text labels for boroughs (in the approximate center of each)
borough_centers <- data.frame(
borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
x = c(-74.00, -73.95, -73.82, -73.87, -74.15),
y = c(40.78, 40.65, 40.73, 40.85, 40.58)
)
# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = 4326)
borough_points <- st_transform(borough_points, st_crs(nyc_covid_analysis))
# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)
# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough,
col = "black", cex = 1.0, font = 2)
# Add north arrow
north_arrow <- function(x, y, size, ...) {
arrows(x, y, x, y + size, ...)
text(x, y + size*1.1, "N", ...)
}
north_arrow(-74.19, 40.89, 0.05, lwd = 2, length = 0.1, cex = 1.2)
# Add scale bar
sbar <- function(loc, length_km, ...) {
# Convert km to degrees (approx at this latitude)
length_deg <- length_km / 111 # 1 degree ≈ 111 km at the equator
# Draw scale bar
lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), ...)
text(loc[1] + length_deg/2, loc[2] - 0.01,
labels = paste(length_km, "km"),
pos = 1, ...)
}
sbar(c(-74.25, 40.53), 5, lwd = 2, col = "black")
# Add title and source information
title(main = "COVID-19 Case Rate per 100,000 Population",
sub = "Source: NYC Department of Health COVID-19 Data, May 2025",
cex.main = 1.5, font.main = 2, cex.sub = 0.8)
dev.off()
## quartz_off_screen
## 2
nursing_home_map <- ggplot() +
# Base map of ZIP codes with COVID-19 case rate
geom_sf(data = nyc_covid_analysis,
aes(fill = CASE_RATE),
color = "white",
size = 0.2) +
# Add points for nursing homes
geom_sf(data = nyc_health_sf %>%
filter(grepl("nursing|elderly|long term care", facility_type, ignore.case = TRUE)),
color = "darkblue",
size = 2,
alpha = 0.7,
shape = 17) + # Triangle shape for nursing homes
# Color scale for COVID-19 rate
scale_fill_viridis_c(option = "inferno",
name = "COVID-19 Cases\nper 100,000",
breaks = pretty_breaks(n = 5),
labels = scales::comma) +
# Titles and captions
labs(title = "COVID-19 Case Rate and Nursing Home Locations in NYC",
subtitle = "Triangle markers indicate nursing home and long-term care facilities",
caption = "Source: NYC COVID-19 Data & NYS Health Facility Data") +
# Custom theme
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(hjust = 0, size = 9),
panel.grid = element_line(color = "grey90", size = 0.2),
panel.background = element_rect(fill = "white"),
legend.position = "right",
legend.title = element_text(face = "bold"),
legend.background = element_rect(fill = "white", color = "grey90")
)
# Save the nursing home map
ggsave("nyc_covid_nursinghomes_map.png", nursing_home_map, width = 12, height = 10, dpi = 300)
glimpse(nyc_covid_analysis)
## Rows: 50
## Columns: 26
## $ AREA <dbl> 0.114, 0.061, 0.143, 0.070, 0.153, 0.097, 0.062, …
## $ PERIMETER <dbl> 1.442, 1.231, 1.630, 2.968, 2.206, 1.670, 1.547, …
## $ CNTY_ <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1…
## $ CNTY_ID <dbl> 1825, 1827, 1828, 1831, 1832, 1833, 1834, 1835, 1…
## $ zipcode <chr> "Ashe", "Alleghany", "Surry", "Currituck", "North…
## $ FIPS <chr> "37009", "37005", "37171", "37053", "37131", "370…
## $ FIPSNO <dbl> 37009, 37005, 37171, 37053, 37131, 37091, 37029, …
## $ CRESS_ID <int> 5, 3, 86, 27, 66, 46, 15, 37, 93, 85, 17, 79, 39,…
## $ BIR74 <dbl> 1091, 487, 3188, 508, 1421, 1452, 286, 420, 968, …
## $ SID74 <dbl> 1, 0, 5, 1, 9, 7, 0, 0, 4, 1, 2, 16, 4, 4, 4, 18,…
## $ NWBIR74 <dbl> 10, 10, 208, 123, 1066, 954, 115, 254, 748, 160, …
## $ BIR79 <dbl> 1364, 542, 3616, 830, 1606, 1838, 350, 594, 1190,…
## $ SID79 <dbl> 0, 3, 6, 2, 3, 5, 2, 2, 2, 5, 2, 5, 4, 4, 6, 17, …
## $ NWBIR79 <dbl> 19, 12, 260, 145, 1197, 1237, 139, 371, 844, 176,…
## $ POPULATION <dbl> 46166, 47168, 17876, 42370, 33879, 28359, 38146, …
## $ COVID_CASE_COUNT <dbl> 1274, 1559, 231, 703, 1341, 823, 1427, 532, 1033,…
## $ TOTAL_COVID_TESTS <dbl> 9548, 16696, 6958, 13797, 15712, 6591, 18847, 309…
## $ POSITIVE_RATE <dbl> 13.343109, 9.337566, 3.319920, 5.095311, 8.534878…
## $ CASE_RATE <dbl> 2759.607, 3305.207, 1292.235, 1659.193, 3958.204,…
## $ nursing_home_count <int> 1, 0, 3, 2, 0, 4, 1, 0, 1, 2, 1, 1, 0, 0, 0, 4, 0…
## $ hospital_count <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ food_store_count <int> 3, 2, 7, 3, 3, 2, 3, 3, 4, 5, 6, 6, 4, 4, 1, 5, 1…
## $ grocery_count <int> 1, 2, 6, 2, 2, 1, 1, 1, 1, 1, 4, 2, 3, 2, 1, 3, 1…
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((278.5274 36..., MULT…
## $ food_store_density <dbl> 0.6498289, 0.4240163, 3.9158648, 0.7080481, 0.885…
## $ nursing_home_density <dbl> 0.2166096, 0.0000000, 1.6782278, 0.4720321, 0.000…
par(mar = c(0, 0, 2, 6)) # Adjust margins (bottom, left, top, right)
# Plot the COVID-19 case rate
plot(nyc_covid_analysis["CASE_RATE"],
main = "COVID-19 Case Rate per 100,000 Population by ZIP Code",
breaks = "quantile",
nbreaks = 7,
pal = brewer.pal(7, "YlOrRd"),
key.pos = 4, # Legend on right
key.width = lcm(1.3),
key.length = 1.0,
lwd = 0.3,
border = "white")
# Add graticule
graticule <- st_graticule(nyc_covid_analysis)
plot(graticule, add = TRUE, col = "grey70", lty = 3)
# Add approximate borough labels
borough_centers <- data.frame(
borough = c("Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island"),
x = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[10,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[20,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[30,]))[1],
st_coordinates(st_centroid(nyc_covid_analysis[40,]))[1]),
y = c(st_coordinates(st_centroid(nyc_covid_analysis[1,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[10,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[20,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[30,]))[2],
st_coordinates(st_centroid(nyc_covid_analysis[40,]))[2])
)
# Convert to spatial points with same CRS
borough_points <- st_as_sf(borough_centers, coords = c("x", "y"), crs = st_crs(nyc_covid_analysis))
# Extract coordinates for labels
borough_coords <- st_coordinates(borough_points)
# Add text labels for boroughs
text(borough_coords[,1], borough_coords[,2], labels = borough_points$borough,
col = "black", cex = 1.0, font = 2)
# Add north arrow
north_arrow <- function(x, y, size) {
arrows(x, y, x, y + size, lwd = 2, length = 0.1)
text(x, y + size*1.1, "N", cex = 1.2)
}
bbox <- st_bbox(nyc_covid_analysis)
north_arrow(bbox[1] + (bbox[3]-bbox[1])*0.1,
bbox[4] - (bbox[4]-bbox[2])*0.1,
(bbox[4]-bbox[2])*0.05)
# Add scale bar
sbar <- function(loc, length_km) {
# Draw scale bar (approximating scale for demonstration)
length_deg <- length_km / 111 # 1 degree ≈ 111 km at the equator
lines(c(loc[1], loc[1] + length_deg), c(loc[2], loc[2]), lwd = 2)
text(loc[1] + length_deg/2, loc[2] - (bbox[4]-bbox[2])*0.01,
labels = paste(length_km, "km"), pos = 1)
}
# Add scale bar at bottom left
sbar(c(bbox[1] + (bbox[3]-bbox[1])*0.1, bbox[2] + (bbox[4]-bbox[2])*0.1), 5)
# Add source information
mtext("Source: NYC Department of Health COVID-19 Data",
side = 1, line = -2, adj = 0, cex = 0.8)
nursing_home_map <- ggplot() +
# Base map of ZIP codes with COVID-19 case rate
geom_sf(data = nyc_covid_analysis,
aes(fill = CASE_RATE),
color = "white",
size = 0.2) +
# Add points for nursing homes
geom_sf(data = nyc_health_sf %>%
filter(grepl("nursing|elderly|long term care", facility_type, ignore.case = TRUE)),
color = "darkblue",
size = 2,
alpha = 0.7,
shape = 17) + # Triangle shape for nursing homes
# Color scale for COVID-19 rate
scale_fill_viridis_c(option = "inferno",
name = "COVID-19 Cases\nper 100,000",
breaks = pretty_breaks(n = 5),
labels = scales::comma) +
# Titles and captions
labs(title = "COVID-19 Case Rate and Nursing Home Locations",
subtitle = "Triangle markers indicate nursing home and long-term care facilities",
caption = "Source: NYC COVID-19 Data & NYS Health Facility Data") +
# Custom theme
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(hjust = 0, size = 9),
panel.grid = element_line(color = "grey90", size = 0.2),
panel.background = element_rect(fill = "white"),
legend.position = "right",
legend.title = element_text(face = "bold"),
legend.background = element_rect(fill = "white", color = "grey90")
)
nursing_home_map
This analysis demonstrates the spatial relationship between COVID-19 case rates, nursing home density, and food store density across NYC ZIP codes.
The maps suggest a possible correlation between higher nursing home density and COVID-19 case rates, which is further supported by our correlation analysis.
The interactive maps allow for detailed exploration of these relationships at the ZIP code level.