Read the data table to a csv file
Deaths.dt <- data.table::fread(here::here("data", "outputs", "All_Deaths_Storm_Effects.csv"))
Deaths.df <- Deaths.dt |>
as.data.frame()
Deaths.sf <- sf::st_read(dsn = "data/outputs/Deaths",
layer = "All_Deaths_Storm_Effects")
## Reading layer `All_Deaths_Storm_Effects' from data source
## `/Users/jelsner/Desktop/Projects/HO-storms/data/outputs/Deaths'
## using driver `ESRI Shapefile'
## Simple feature collection with 6166095 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -122.4109 ymin: 24.5465 xmax: -71.45662 ymax: 44.43933
## Geodetic CRS: WGS 84
names(Deaths.sf)[1:5] <- names(Deaths.df)[1:5] # match the names
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Deaths.df <- Deaths.df |>
# mutate(SH = Storm_Category > 0)
total_deaths <- nrow(Deaths.df)
exposed_total <- Deaths.dt[Storm_Effect %in% c("Cleanup", "Impact", "Threat"), .N]
# Calculate death counts and rates for specified storm effects
death_rates <- Deaths.df |>
filter(Storm_Effect %in% c("Cleanup", "Impact", "Threat")) |>
group_by(Storm_Effect) |>
# group_by(Storm_Effect, SH) |>
summarise(Deaths = n(), .groups = "drop") |>
mutate(Death_Rate = Deaths / exposed_total)
# View result
print(death_rates)
## # A tibble: 3 × 3
## Storm_Effect Deaths Death_Rate
## <chr> <int> <dbl>
## 1 Cleanup 18706 0.332
## 2 Impact 18743 0.333
## 3 Threat 18853 0.335
This tells you, for example, that .333% of all deaths occurred under an Impact storm condition.
Get number of deaths per day by Storm_Effect
# Group deaths by date and storm effect
daily_deaths <- Deaths.df |>
# group_by(Death_Date, Storm_Effect, SH) |>
group_by(Death_Date, Storm_Effect) |>
summarise(deaths = n(), .groups = "drop")
Get number of days for each storm effect This step assumes Storm_Effect applies to the whole day — i.e., each Death_Date has a unique Storm_Effect
num_days <- daily_deaths |>
# distinct(Death_Date, Storm_Effect, SH) |>
distinct(Death_Date, Storm_Effect) |>
# count(Storm_Effect, SH, name = "num_days")
count(Storm_Effect, name = "num_days")
Total deaths per storm effect
total_deaths <- daily_deaths |>
# group_by(Storm_Effect, SH) |>
group_by(Storm_Effect) |>
summarise(total_deaths = sum(deaths), .groups = "drop")
Calculate daily death rate
# death_rates <- left_join(total_deaths, num_days, by = c("Storm_Effect", "SH")) |>
death_rates <- left_join(total_deaths, num_days, by = "Storm_Effect") |>
mutate(daily_death_rate = total_deaths / num_days)
print(death_rates)
## # A tibble: 4 × 4
## Storm_Effect total_deaths num_days daily_death_rate
## <chr> <int> <int> <dbl>
## 1 Cleanup 18706 45 416.
## 2 Impact 18743 45 417.
## 3 None 6109793 15205 402.
## 4 Threat 18853 45 419.
This tells us that in this sample the daily death rate is somewhat higher on storm-effect days than on non-storm-effect days
Test it statistically using a Poisson regression model with the reference level being a non-storm-effect day
model_data <- left_join(total_deaths, num_days, by = "Storm_Effect")
model_data$Storm_Effect <- relevel(factor(model_data$Storm_Effect), ref = "None")
poisson_model <- glm(total_deaths ~ Storm_Effect + offset(log(num_days)), data = model_data, family = poisson())
summary(poisson_model)
##
## Call:
## glm(formula = total_deaths ~ Storm_Effect + offset(log(num_days)),
## family = poisson(), data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.9960239 0.0004046 14820.969 < 2e-16 ***
## Storm_EffectCleanup 0.0339133 0.0073227 4.631 3.64e-06 ***
## Storm_EffectImpact 0.0358893 0.0073155 4.906 9.30e-07 ***
## Storm_EffectThreat 0.0417410 0.0072942 5.722 1.05e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 7.6839e+01 on 3 degrees of freedom
## Residual deviance: -4.7833e-10 on 0 degrees of freedom
## AIC: 60.497
##
## Number of Fisher Scoring iterations: 2
This says that given the sample of data and a Poisson model, although the effect size is small mortality is significantly higher on storm-effect days compared to the reference level
Create a nice table of the model results. We exponentiate the model coefficients to express them as a rate ratio and add confidence intervals
# Extract coefficients and 95% confidence intervals
coefs <- coef(summary(poisson_model))
confint_vals <- confint(poisson_model) # May take a few seconds
## Waiting for profiling to be done...
# Build data frame of results
rate_ratios <- data.frame(
Term = rownames(coefs),
Estimate = coefs[, "Estimate"],
Std_Error = coefs[, "Std. Error"],
z_value = coefs[, "z value"],
p_value = coefs[, "Pr(>|z|)"],
RR = exp(coefs[, "Estimate"]),
RR_lower = exp(confint_vals[, 1]),
RR_upper = exp(confint_vals[, 2])
)
# Clean up names
rownames(rate_ratios) <- NULL
rate_ratios <- rate_ratios |>
dplyr::select(Term, RR, RR_lower, RR_upper, p_value)
# Print the table
options(digits = 4)
print(rate_ratios)
## Term RR RR_lower RR_upper p_value
## 1 (Intercept) 401.828 401.509 402.147 0.000e+00
## 2 Storm_EffectCleanup 1.034 1.020 1.049 3.635e-06
## 3 Storm_EffectImpact 1.037 1.022 1.051 9.300e-07
## 4 Storm_EffectThreat 1.043 1.028 1.058 1.050e-08
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
options(tigris_use_cache = TRUE)
# Get national ZCTAs and subset to those intersecting Florida
zctas_all <- zctas(cb = TRUE, starts_with = NULL, year = 2020)
florida <- states(cb = TRUE) %>% filter(STUSPS == "FL")
## Retrieving data for the year 2024
# Filter ZCTAs that intersect Florida
zctas_fl <- st_intersection(zctas_all, st_transform(florida, st_crs(zctas_all)))
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
zctas_fl <- st_make_valid(zctas_fl) # in case of invalid geometries
Deaths.sf <- st_transform(Deaths.sf, st_crs(zctas_fl)) # Ensure CRS matches
deaths_with_zcta <- st_join(Deaths.sf, zctas_fl["GEOID20"]) # Join only GEOID20
library(dplyr)
library(purrr)
library(rlang)
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
deaths_with_zcta$Storm_Effect <- relevel(factor(deaths_with_zcta$Storm_Effect), ref = "None")
models_by_zcta <- deaths_with_zcta %>%
st_drop_geometry() %>%
group_split(GEOID20) %>%
map_df(function(df) {
if (nrow(df) < 30 || n_distinct(df$Storm_Effect) < 2) return(NULL)
daily_counts <- df %>%
group_by(Death_Date, Storm_Effect) %>%
summarise(deaths = n(), .groups = "drop") %>%
group_by(Storm_Effect) %>%
summarise(
total_deaths = sum(deaths),
num_days = n_distinct(Death_Date),
.groups = "drop"
)
if (any(daily_counts$num_days == 0)) return(NULL)
model <- try(glm(total_deaths ~ Storm_Effect + offset(log(num_days)),
data = daily_counts, family = poisson()), silent = TRUE)
if (inherits(model, "try-error")) return(NULL)
rr <- exp(coef(model))
tibble(
GEOID20 = df$GEOID20[1],
RR_Cleanup = rr["Storm_EffectCleanup"] %||% NA,
RR_Impact = rr["Storm_EffectImpact"] %||% NA,
RR_Threat = rr["Storm_EffectThreat"] %||% NA
)
})
zcta_results <- left_join(zctas_fl, models_by_zcta, by = "GEOID20")
Plot all 3 RR maps in a loop
library(ggplot2)
library(purrr)
library(rlang)
rr_vars <- c("RR_Cleanup", "RR_Impact", "RR_Threat")
titles <- c("RR: Cleanup vs None", "RR: Impact vs None", "RR: Threat vs None")
walk2(rr_vars, titles, function(rr_var, plot_title) {
p <- ggplot(zcta_results) +
geom_sf(aes(fill = !!sym(rr_var)), color = "gray70", size = 0.1) +
scale_fill_gradient2(
low = "blue", mid = "white", high = "red",
midpoint = 1, na.value = "gray90", name = rr_var
) +
theme_minimal() +
labs(title = plot_title)
print(p)
ggsave(paste0(rr_var, "_ZCTA_map.png"), plot = p, width = 8, height = 6)
})
Using Moran’s I and k-nearest neighbors for spatial contiguity
rr_sf <- zcta_results %>%
filter(!is.na(RR_Threat)) %>%
select(GEOID20, RR = RR_Threat, geometry)
rr_sf <- rr_sf %>%
filter(st_geometry_type(geometry) %in% c("POLYGON", "MULTIPOLYGON")) %>%
st_cast("MULTIPOLYGON") # Ensure polygon geometry
Create spatial weights
centroids <- st_centroid(rr_sf)
## Warning: st_centroid assumes attributes are constant over geometries
coords <- st_coordinates(centroids)
# Choose k (e.g., 5 or 8 neighbors)
k <- 7
knn <- spdep::knearneigh(coords, k = k)
nb <- spdep::knn2nb(knn)
# Convert to spatial weights
lw <- spdep::nb2listw(nb, style = "W")
Run local hotspot
# Calculate Local Moran’s I
lisa <- spdep::localmoran(rr_sf$RR, lw)
# Add results to the sf object
rr_sf <- rr_sf %>%
mutate(
local_i = lisa[, 1], # Local Moran's I
p_value = lisa[, 5], # p-value
hotspot_type = case_when(
p_value < 0.15 & RR > mean(RR, na.rm = TRUE) ~ "Hot Spot",
p_value < 0.15 & RR < mean(RR, na.rm = TRUE) ~ "Cold Spot",
TRUE ~ "Not Significant"
)
)
Map the results
library(ggplot2)
ggplot(rr_sf) +
geom_sf(aes(fill = hotspot_type), color = "gray90", size = 0.1) +
scale_fill_manual(values = c(
"Hot Spot" = "red",
"Cold Spot" = "blue",
"Not Significant" = "gray90"
)) +
labs(title = "Local Hot Spot Analysis (Threat)", fill = "Cluster Type") +
theme_minimal()