Analytics of deaths and storms

Read the data table to a csv file

Deaths.dt <- data.table::fread(here::here("data", "outputs", "All_Deaths_Storm_Effects.csv"))
Deaths.df <- Deaths.dt |>
  as.data.frame()
Deaths.sf <- sf::st_read(dsn = "data/outputs/Deaths", 
                         layer = "All_Deaths_Storm_Effects")
## Reading layer `All_Deaths_Storm_Effects' from data source 
##   `/Users/jelsner/Desktop/Projects/HO-storms/data/outputs/Deaths' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 6166095 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -122.4109 ymin: 24.5465 xmax: -71.45662 ymax: 44.43933
## Geodetic CRS:  WGS 84
names(Deaths.sf)[1:5] <- names(Deaths.df)[1:5] # match the names
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#Deaths.df <- Deaths.df |>
#  mutate(SH = Storm_Category > 0)

total_deaths <- nrow(Deaths.df)
exposed_total <- Deaths.dt[Storm_Effect %in% c("Cleanup", "Impact", "Threat"), .N]

# Calculate death counts and rates for specified storm effects
death_rates <- Deaths.df |>
  filter(Storm_Effect %in% c("Cleanup", "Impact", "Threat")) |>
  group_by(Storm_Effect) |>
#  group_by(Storm_Effect, SH) |>
  summarise(Deaths = n(), .groups = "drop") |>
  mutate(Death_Rate = Deaths / exposed_total)

# View result
print(death_rates)
## # A tibble: 3 × 3
##   Storm_Effect Deaths Death_Rate
##   <chr>         <int>      <dbl>
## 1 Cleanup       18706      0.332
## 2 Impact        18743      0.333
## 3 Threat        18853      0.335

This tells you, for example, that .333% of all deaths occurred under an Impact storm condition.

Death rates per day

Get number of deaths per day by Storm_Effect

# Group deaths by date and storm effect
daily_deaths <- Deaths.df |>
#  group_by(Death_Date, Storm_Effect, SH) |>
  group_by(Death_Date, Storm_Effect) |>
  summarise(deaths = n(), .groups = "drop")

Get number of days for each storm effect This step assumes Storm_Effect applies to the whole day — i.e., each Death_Date has a unique Storm_Effect

num_days <- daily_deaths |>
#  distinct(Death_Date, Storm_Effect, SH) |>
  distinct(Death_Date, Storm_Effect) |>
#  count(Storm_Effect, SH, name = "num_days")
  count(Storm_Effect, name = "num_days")

Total deaths per storm effect

total_deaths <- daily_deaths |>
#  group_by(Storm_Effect, SH) |>
  group_by(Storm_Effect) |>
  summarise(total_deaths = sum(deaths), .groups = "drop")

Calculate daily death rate

# death_rates <- left_join(total_deaths, num_days, by = c("Storm_Effect", "SH")) |>
death_rates <- left_join(total_deaths, num_days, by = "Storm_Effect") |>
  mutate(daily_death_rate = total_deaths / num_days)

print(death_rates)
## # A tibble: 4 × 4
##   Storm_Effect total_deaths num_days daily_death_rate
##   <chr>               <int>    <int>            <dbl>
## 1 Cleanup             18706       45             416.
## 2 Impact              18743       45             417.
## 3 None              6109793    15205             402.
## 4 Threat              18853       45             419.

This tells us that in this sample the daily death rate is somewhat higher on storm-effect days than on non-storm-effect days

Test it statistically using a Poisson regression model with the reference level being a non-storm-effect day

model_data <- left_join(total_deaths, num_days, by = "Storm_Effect")
model_data$Storm_Effect <- relevel(factor(model_data$Storm_Effect), ref = "None")

poisson_model <- glm(total_deaths ~ Storm_Effect + offset(log(num_days)), data = model_data, family = poisson())
summary(poisson_model)
## 
## Call:
## glm(formula = total_deaths ~ Storm_Effect + offset(log(num_days)), 
##     family = poisson(), data = model_data)
## 
## Coefficients:
##                      Estimate Std. Error   z value Pr(>|z|)    
## (Intercept)         5.9960239  0.0004046 14820.969  < 2e-16 ***
## Storm_EffectCleanup 0.0339133  0.0073227     4.631 3.64e-06 ***
## Storm_EffectImpact  0.0358893  0.0073155     4.906 9.30e-07 ***
## Storm_EffectThreat  0.0417410  0.0072942     5.722 1.05e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance:  7.6839e+01  on 3  degrees of freedom
## Residual deviance: -4.7833e-10  on 0  degrees of freedom
## AIC: 60.497
## 
## Number of Fisher Scoring iterations: 2

This says that given the sample of data and a Poisson model, although the effect size is small mortality is significantly higher on storm-effect days compared to the reference level

Create a nice table of the model results. We exponentiate the model coefficients to express them as a rate ratio and add confidence intervals

# Extract coefficients and 95% confidence intervals
coefs <- coef(summary(poisson_model))
confint_vals <- confint(poisson_model)  # May take a few seconds
## Waiting for profiling to be done...
# Build data frame of results
rate_ratios <- data.frame(
  Term = rownames(coefs),
  Estimate = coefs[, "Estimate"],
  Std_Error = coefs[, "Std. Error"],
  z_value = coefs[, "z value"],
  p_value = coefs[, "Pr(>|z|)"],
  RR = exp(coefs[, "Estimate"]),
  RR_lower = exp(confint_vals[, 1]),
  RR_upper = exp(confint_vals[, 2])
)

# Clean up names
rownames(rate_ratios) <- NULL
rate_ratios <- rate_ratios |>
  dplyr::select(Term, RR, RR_lower, RR_upper, p_value)

# Print the table
options(digits = 4)
print(rate_ratios)
##                  Term      RR RR_lower RR_upper   p_value
## 1         (Intercept) 401.828  401.509  402.147 0.000e+00
## 2 Storm_EffectCleanup   1.034    1.020    1.049 3.635e-06
## 3  Storm_EffectImpact   1.037    1.022    1.051 9.300e-07
## 4  Storm_EffectThreat   1.043    1.028    1.058 1.050e-08

Spatialize results using zip code areas ZCTAs

  1. Get Florida ZCTAs
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
options(tigris_use_cache = TRUE)

# Get national ZCTAs and subset to those intersecting Florida
zctas_all <- zctas(cb = TRUE, starts_with = NULL, year = 2020)
florida <- states(cb = TRUE) %>% filter(STUSPS == "FL")
## Retrieving data for the year 2024
# Filter ZCTAs that intersect Florida
zctas_fl <- st_intersection(zctas_all, st_transform(florida, st_crs(zctas_all)))
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
zctas_fl <- st_make_valid(zctas_fl) # in case of invalid geometries
  1. Join deaths to ZCTAs
Deaths.sf <- st_transform(Deaths.sf, st_crs(zctas_fl))  # Ensure CRS matches
deaths_with_zcta <- st_join(Deaths.sf, zctas_fl["GEOID20"])  # Join only GEOID20
  1. Fit Poisson model at each ZCTA
library(dplyr)
library(purrr)
library(rlang)
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
deaths_with_zcta$Storm_Effect <- relevel(factor(deaths_with_zcta$Storm_Effect), ref = "None")

models_by_zcta <- deaths_with_zcta %>%
  st_drop_geometry() %>%
  group_split(GEOID20) %>%
  map_df(function(df) {
    if (nrow(df) < 30 || n_distinct(df$Storm_Effect) < 2) return(NULL)

    daily_counts <- df %>%
      group_by(Death_Date, Storm_Effect) %>%
      summarise(deaths = n(), .groups = "drop") %>%
      group_by(Storm_Effect) %>%
      summarise(
        total_deaths = sum(deaths),
        num_days = n_distinct(Death_Date),
        .groups = "drop"
      )

    if (any(daily_counts$num_days == 0)) return(NULL)

    model <- try(glm(total_deaths ~ Storm_Effect + offset(log(num_days)),
                     data = daily_counts, family = poisson()), silent = TRUE)
    if (inherits(model, "try-error")) return(NULL)

    rr <- exp(coef(model))
    tibble(
      GEOID20 = df$GEOID20[1],
      RR_Cleanup = rr["Storm_EffectCleanup"] %||% NA,
      RR_Impact  = rr["Storm_EffectImpact"] %||% NA,
      RR_Threat  = rr["Storm_EffectThreat"] %||% NA
    )
  })
  1. Join RRs to ZCTA geometry
zcta_results <- left_join(zctas_fl, models_by_zcta, by = "GEOID20")

Plot all 3 RR maps in a loop

library(ggplot2)
library(purrr)
library(rlang)

rr_vars <- c("RR_Cleanup", "RR_Impact", "RR_Threat")
titles <- c("RR: Cleanup vs None", "RR: Impact vs None", "RR: Threat vs None")

walk2(rr_vars, titles, function(rr_var, plot_title) {
  p <- ggplot(zcta_results) +
    geom_sf(aes(fill = !!sym(rr_var)), color = "gray70", size = 0.1) +
    scale_fill_gradient2(
      low = "blue", mid = "white", high = "red",
      midpoint = 1, na.value = "gray90", name = rr_var
    ) +
    theme_minimal() +
    labs(title = plot_title)
  
  print(p)
  ggsave(paste0(rr_var, "_ZCTA_map.png"), plot = p, width = 8, height = 6)
})

Hot spot analysis

Using Moran’s I and k-nearest neighbors for spatial contiguity

rr_sf <- zcta_results %>%
  filter(!is.na(RR_Threat)) %>%
  select(GEOID20, RR = RR_Threat, geometry)

rr_sf <- rr_sf %>%
  filter(st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
  st_cast("MULTIPOLYGON") # Ensure polygon geometry

Create spatial weights

centroids <- st_centroid(rr_sf)
## Warning: st_centroid assumes attributes are constant over geometries
coords <- st_coordinates(centroids)
# Choose k (e.g., 5 or 8 neighbors)
k <- 7
knn <- spdep::knearneigh(coords, k = k)
nb <- spdep::knn2nb(knn)

# Convert to spatial weights
lw <- spdep::nb2listw(nb, style = "W")

Run local hotspot

# Calculate Local Moran’s I
lisa <- spdep::localmoran(rr_sf$RR, lw)

# Add results to the sf object
rr_sf <- rr_sf %>%
  mutate(
    local_i = lisa[, 1],         # Local Moran's I
    p_value = lisa[, 5],         # p-value
    hotspot_type = case_when(
      p_value < 0.15 & RR > mean(RR, na.rm = TRUE) ~ "Hot Spot",
      p_value < 0.15 & RR < mean(RR, na.rm = TRUE) ~ "Cold Spot",
      TRUE ~ "Not Significant"
    )
  )

Map the results

library(ggplot2)

ggplot(rr_sf) +
  geom_sf(aes(fill = hotspot_type), color = "gray90", size = 0.1) +
  scale_fill_manual(values = c(
    "Hot Spot" = "red",
    "Cold Spot" = "blue",
    "Not Significant" = "gray90"
  )) +
  labs(title = "Local Hot Spot Analysis (Threat)", fill = "Cluster Type") +
  theme_minimal()