library(kirkegaard)
load_packages(
dplyr, ggplot2, sf, tigris, ggrepel, arrow, glmnet, xgboost, e1071, knitr
)
theme_set(theme_bw())
options(digits = 3, tigris_use_cache = TRUE)
data_dir <- "data"
fig_dir <- "figs"
The BRFSS asked about household firearm ownership in its core questionnaire in 2002 and 2004. The SMART MMSA files provide survey-weighted estimates for ~146 metropolitan areas. After merging with ACS predictors and filtering to complete cases, we have 118 MMSAs for training.
train <- readRDS(file.path(data_dir, "firearm_training_data.rds")) |>
rename(firearm_ownership = firearm_rate)
cat("Training MMSAs:", nrow(train), "\n")
## Training MMSAs: 118
cat("Firearm ownership range:", round(range(train$firearm_ownership), 3), "\n")
## Firearm ownership range: 0.115 0.602
cat("Respondents range:", range(train$n_respondents), "\n")
## Respondents range: 460 8957
p <- ggplot(train, aes(x = reorder(gsub(" Metropolitan.*| Micropolitan.*", "", mmsa_name),
firearm_ownership),
y = firearm_ownership)) +
geom_col(fill = "steelblue", width = 0.7) +
coord_flip() +
labs(x = NULL, y = "Household firearm ownership rate",
title = "BRFSS SMART MMSA firearm ownership rates (2002-2004 pooled)") +
theme_minimal(base_size = 6)
ggsave(file.path(fig_dir, "barplot_mmsa_firearm_ownership.png"), p, width = 8, height = 16, dpi = 150)
p
models_obj <- readRDS(file.path(data_dir, "firearm_models.rds"))
pred_cols <- models_obj$pred_cols
We trained four model classes with leave-one-out cross-validation on the 118 MMSAs. Hyperparameters were tuned via cross-validation (lambda for ridge/lasso via cv.glmnet, cost/gamma/epsilon for SVM via tune.svm, tree parameters for XGBoost via xgb.cv).
# Compute LOOCV predictions for each model
svm_fit <- models_obj$svm
xgb_fit <- models_obj$xgb
svm_center <- models_obj$svm_center
svm_scale <- models_obj$svm_scale
X <- as.matrix(train[, pred_cols])
y <- train$firearm_ownership
# Ridge and Lasso LOOCV
ridge_cv <- cv.glmnet(scale(X), y, alpha = 0, nfolds = nrow(X), grouped = FALSE)
lasso_cv <- cv.glmnet(scale(X), y, alpha = 1, nfolds = nrow(X), grouped = FALSE)
# Manual LOOCV for SVM and XGBoost to get predicted values
n <- nrow(train)
svm_loo <- xgb_loo <- ridge_loo <- lasso_loo <- numeric(n)
ridge_fit_full <- glmnet(scale(X), y, alpha = 0, lambda = ridge_cv$lambda.min)
lasso_fit_full <- glmnet(scale(X), y, alpha = 1, lambda = lasso_cv$lambda.min)
for (i in 1:n) {
Xi <- X[-i, ]; yi <- y[-i]
Xi_s <- scale(Xi)
ci <- attr(Xi_s, "scaled:center"); si <- attr(Xi_s, "scaled:scale")
test_s <- matrix(scale(matrix(X[i,], nrow=1), center=ci, scale=si), nrow=1)
# Ridge
ridge_i <- glmnet(Xi_s, yi, alpha = 0, lambda = ridge_cv$lambda.min)
ridge_loo[i] <- predict(ridge_i, test_s)[1]
# Lasso
lasso_i <- glmnet(Xi_s, yi, alpha = 1, lambda = lasso_cv$lambda.min)
lasso_loo[i] <- predict(lasso_i, test_s)[1]
# SVM
svm_i <- svm(x = Xi_s, y = yi, kernel = "radial",
cost = 4, gamma = 0.0078125, epsilon = 0.01)
svm_loo[i] <- predict(svm_i, test_s)
# XGBoost
dti <- xgb.DMatrix(data = Xi, label = yi)
xgb_i <- xgb.train(params = models_obj$xgb_params, data = dti,
nrounds = models_obj$xgb_nrounds, verbose = 0)
xgb_loo[i] <- predict(xgb_i, matrix(X[i,], nrow=1))
}
loocv_results <- data.frame(
Model = c("Ridge", "Lasso", "SVM RBF", "XGBoost"),
LOOCV_R2 = c(cor(ridge_loo, y)^2, cor(lasso_loo, y)^2,
cor(svm_loo, y)^2, cor(xgb_loo, y)^2),
LOOCV_MAE = c(mean(abs(ridge_loo - y)), mean(abs(lasso_loo - y)),
mean(abs(svm_loo - y)), mean(abs(xgb_loo - y)))
)
loocv_results |> mutate(across(where(is.numeric), ~round(., 3))) |> kable()
| Model | LOOCV_R2 | LOOCV_MAE |
|---|---|---|
| Ridge | 0.671 | 0.054 |
| Lasso | 0.668 | 0.054 |
| SVM RBF | 0.792 | 0.042 |
| XGBoost | 0.737 | 0.047 |
train$svm_loo <- svm_loo
train$xgb_loo <- xgb_loo
train$ridge_loo <- ridge_loo
train$lasso_loo <- lasso_loo
train$short_name <- gsub(" Metropolitan.*| Micropolitan.*", "", train$mmsa_name)
p <- ggplot(train, aes(x = svm_loo, y = firearm_ownership)) +
geom_point(alpha = 0.7, size = 2) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
geom_smooth(method = "lm", se = FALSE, color = "steelblue", linewidth = 0.7) +
geom_text_repel(aes(label = short_name), size = 2.2, max.overlaps = 25,
segment.alpha = 0.3) +
labs(x = "SVM LOOCV predicted", y = "Actual HH firearm ownership (BRFSS)",
title = "LOOCV: SVM predicted vs actual (118 MMSAs)",
subtitle = sprintf("R² = %.3f", cor(svm_loo, y)^2)) +
coord_equal(xlim = c(0, 0.7), ylim = c(0, 0.7)) +
theme_minimal()
ggsave(file.path(fig_dir, "loocv_svm_mmsa.png"), p, width = 10, height = 10, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
p <- ggplot(train, aes(x = xgb_loo, y = firearm_ownership)) +
geom_point(alpha = 0.7, size = 2) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
geom_smooth(method = "lm", se = FALSE, color = "steelblue", linewidth = 0.7) +
geom_text_repel(aes(label = short_name), size = 2.2, max.overlaps = 25,
segment.alpha = 0.3) +
labs(x = "XGBoost LOOCV predicted", y = "Actual HH firearm ownership (BRFSS)",
title = "LOOCV: XGBoost predicted vs actual (118 MMSAs)",
subtitle = sprintf("R² = %.3f", cor(xgb_loo, y)^2)) +
coord_equal(xlim = c(0, 0.7), ylim = c(0, 0.7)) +
theme_minimal()
ggsave(file.path(fig_dir, "loocv_xgb_mmsa.png"), p, width = 10, height = 10, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
RAND Corporation provides state-level household firearm ownership rate (HFR) estimates for 1980-2016, derived from a structural equation model integrating 51 surveys with administrative data. We compare our predictions (aggregated or direct) against the RAND 2008-2012 average.
rand <- read_csv(file.path(data_dir, "RAND_HFR_state_firearm_ownership.csv"),
show_col_types = FALSE)
rand_match <- rand |>
filter(Year %in% 2008:2012) |>
group_by(FIP, STATE) |>
summarise(rand_hfr = mean(HFR, na.rm = TRUE), .groups = "drop") |>
mutate(state_fips = sprintf("%02d", FIP))
state_pred <- readRDS(file.path(data_dir, "state_firearm_predictions.rds"))
# State predictions file already has all 30 predictor columns
state_pred_v <- state_pred |> filter(!is.na(gun_law_idx))
X_state <- as.matrix(state_pred_v[, pred_cols])
X_state_s <- scale(X_state, center = svm_center, scale = svm_scale)
state_pred_v$svm_pred <- predict(svm_fit, X_state_s)
state_pred_v$xgb_pred <- pmin(pmax(predict(xgb_fit, X_state), 0), 1)
val <- state_pred_v |>
left_join(rand_match, by = "state_fips") |>
filter(!is.na(rand_hfr))
check_cal <- function(pred, actual, name) {
fit <- lm(actual ~ pred)
data.frame(
Model = name,
R2 = round(cor(pred, actual)^2, 3),
Slope = round(coef(fit)[2], 3),
Intercept = round(coef(fit)[1], 3),
MAE = round(mean(abs(pred - actual)), 3),
Pred_range = paste0("[", round(min(pred), 2), ", ", round(max(pred), 2), "]")
)
}
state_cal <- bind_rows(
check_cal(val$svm_pred, val$rand_hfr, "SVM RBF"),
check_cal(val$xgb_pred, val$rand_hfr, "XGBoost")
)
kable(state_cal, caption = "State-level validation vs RAND HFR (2008-2012 avg, n=50)")
| Model | R2 | Slope | Intercept | MAE | Pred_range | |
|---|---|---|---|---|---|---|
| pred…1 | SVM RBF | 0.888 | 0.985 | 0.023 | 0.035 | [0.11, 0.58] |
| pred…2 | XGBoost | 0.915 | 1.089 | -0.030 | 0.031 | [0.14, 0.57] |
p <- ggplot(val, aes(x = svm_pred, y = rand_hfr)) +
geom_point(alpha = 0.7, size = 2.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
geom_smooth(method = "lm", se = FALSE, color = "steelblue", linewidth = 0.7) +
geom_text_repel(aes(label = STATE), size = 3, max.overlaps = 50,
segment.alpha = 0.3) +
labs(x = "SVM predicted HFR", y = "RAND HFR (2008-2012 avg)",
title = "State validation: SVM predictions vs RAND estimates",
subtitle = sprintf("R² = %.3f, slope = %.3f, intercept = %.3f",
cor(val$svm_pred, val$rand_hfr)^2,
coef(lm(rand_hfr ~ svm_pred, data = val))[2],
coef(lm(rand_hfr ~ svm_pred, data = val))[1])) +
coord_equal(xlim = c(0, 0.7), ylim = c(0, 0.7)) +
theme_minimal()
ggsave(file.path(fig_dir, "validation_state_svm.png"), p, width = 9, height = 9, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
p <- ggplot(val, aes(x = xgb_pred, y = rand_hfr)) +
geom_point(alpha = 0.7, size = 2.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
geom_smooth(method = "lm", se = FALSE, color = "steelblue", linewidth = 0.7) +
geom_text_repel(aes(label = STATE), size = 3, max.overlaps = 50,
segment.alpha = 0.3) +
labs(x = "XGBoost predicted HFR", y = "RAND HFR (2008-2012 avg)",
title = "State validation: XGBoost predictions vs RAND estimates",
subtitle = sprintf("R² = %.3f, slope = %.3f, intercept = %.3f",
cor(val$xgb_pred, val$rand_hfr)^2,
coef(lm(rand_hfr ~ xgb_pred, data = val))[2],
coef(lm(rand_hfr ~ xgb_pred, data = val))[1])) +
coord_equal(xlim = c(0, 0.7), ylim = c(0, 0.7)) +
theme_minimal()
ggsave(file.path(fig_dir, "validation_state_xgb.png"), p, width = 9, height = 9, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
cat("Counties:", nrow(gun), "\n")
## Counties: 3115
cat("SVM range:", round(range(gun$firearm_rate_svm), 3), "\n")
## SVM range: 0.113 0.663
cat("SVM mean:", round(mean(gun$firearm_rate_svm), 3),
"sd:", round(sd(gun$firearm_rate_svm), 3), "\n")
## SVM mean: 0.477 sd: 0.088
counties_sf <- counties(year = 2020, cb = TRUE) |>
filter(!STATEFP %in% c("02", "15", "72", "78", "66", "60", "69")) |>
st_transform(5070)
map_gun <- counties_sf |> left_join(gun, by = "GEOID")
p <- ggplot(map_gun) +
geom_sf(aes(fill = firearm_rate_svm), color = NA) +
scale_fill_viridis_c(option = "inferno", labels = scales::percent,
name = "Estimated HH\nFirearm Rate",
breaks = seq(0.1, 0.7, 0.1)) +
labs(title = "Estimated household firearm ownership rate by county (SVM)",
subtitle = "Trained on 118 MMSAs (BRFSS 2002-04), 30 predictors | State R²=0.89 vs RAND | County FS/S slope validated") +
theme_void() +
theme(legend.position = c(0.92, 0.3),
panel.background = element_rect(fill = "white", color = NA))
ggsave(file.path(fig_dir, "map_firearm_rate_svm.png"), p, width = 12, height = 7, dpi = 200)
p
knitr::include_graphics(file.path(fig_dir, "table_imputation_model_comparison.png"))
SVM RBF selected as final model: best LOOCV R² (0.79), near-perfect state calibration (slope=0.99), and validated county-level FS/S scaling.
We fit three models at three levels of analysis:
Levels: (A) states using RAND HFR, (B) all counties using SVM-imputed HFR, (C) MMSAs only using BRFSS survey-based HFR.
library(modelsummary)
gof_map_short <- list(
list("raw" = "adj.r.squared", "clean" = "Adj. R²", "fmt" = 3),
list("raw" = "nobs", "clean" = "N", "fmt" = 0)
)
# Helper: fit 3 models, return named list for modelsummary
fit_3models <- function(df, gun_var = "firearm_ownership") {
f_demo <- as.formula("hom_rate ~ black_share + hispanic_share + native_share + asian_share + log_pwpd")
f_gun <- as.formula(paste("hom_rate ~", gun_var))
f_both <- as.formula(paste("hom_rate ~ black_share + hispanic_share + native_share + asian_share +", gun_var, "+ log_pwpd"))
list(
"Demographics" = lm(f_demo, data = df),
"Gun ownership" = lm(f_gun, data = df),
"Both" = lm(f_both, data = df)
)
}
# Helper: render modelsummary table and save to PNG
ms_table <- function(models, title, filename) {
coef_labels <- c(
"black_share" = "Black %",
"hispanic_share" = "Hispanic %",
"native_share" = "Native %",
"asian_share" = "Asian %",
"firearm_ownership" = "HH firearm ownership",
"black_share:firearm_ownership" = "Black % x HH firearm ownership",
"log_pwpd" = "log(pop-weighted density)",
"(Intercept)" = "(Intercept)"
)
tab <- modelsummary(
models,
estimate = "{estimate}{stars}",
statistic = "({std.error})",
stars = c("***" = 0.001, "**" = 0.01, "*" = 0.05),
coef_rename = coef_labels,
gof_map = gof_map_short,
title = title,
output = "gt"
)
gt::gtsave(tab, file.path(fig_dir, filename))
tab
}
# County-level data
d <- read_parquet(file.path(data_dir, "analysis_firearm_race.parquet"))
cat("All counties:", nrow(d), "\n")
## All counties: 3112
# State-level: aggregate counties, use RAND HFR
state_pwpd <- readRDS(file.path(data_dir, "state_pop_weighted_density.rds"))
state_d <- d |>
mutate(state_fips = substr(GEOID, 1, 2)) |>
group_by(state_fips) |>
summarise(
across(c(black_share, hispanic_share, native_share, asian_share),
~weighted.mean(., w = total)),
hom_rate = sum(hom_deaths) / sum(hom_pop) * 100000,
.groups = "drop"
) |>
left_join(rand_match |> select(state_fips, rand_hfr), by = "state_fips") |>
left_join(state_pwpd, by = "state_fips") |>
mutate(log_pwpd = log(pwpd + 1)) |>
rename(firearm_ownership = rand_hfr) |>
filter(!is.na(firearm_ownership))
cat("States (with RAND HFR):", nrow(state_d), "\n")
## States (with RAND HFR): 50
# MMSA-level: use survey-based BRFSS firearm rate (no imputation needed)
mmsa_gun <- readRDS(file.path(data_dir, "mmsa_firearm_ownership.rds")) |>
select(mmsa_code, firearm_ownership = firearm_rate, n_respondents)
# Need MMSA-level homicide and race data
# Aggregate counties to MMSAs using the crosswalk
county_cbsa <- readxl::read_xls(file.path(data_dir, "cbsa_delineation_2013.xls"), skip = 2) |>
transmute(
county_fips = paste0(`FIPS State Code`, `FIPS County Code`),
cbsa_code = stringr::str_pad(`CBSA Code`, 5, pad = "0")
)
mmsa_d <- d |>
inner_join(county_cbsa, by = c("GEOID" = "county_fips")) |>
group_by(cbsa_code) |>
summarise(
across(c(black_share, hispanic_share, native_share, asian_share),
~weighted.mean(., w = total)),
hom_rate = sum(hom_deaths) / sum(hom_pop) * 100000,
pwpd = weighted.mean(pwpd, w = pop_total),
.groups = "drop"
) |>
mutate(log_pwpd = log(pwpd + 1)) |>
inner_join(mmsa_gun, by = c("cbsa_code" = "mmsa_code")) |>
filter(!is.na(firearm_ownership), !is.na(hom_rate))
cat("MMSAs (with BRFSS survey data):", nrow(mmsa_d), "\n")
## MMSAs (with BRFSS survey data): 118
map_d <- counties_sf |> left_join(d, by = "GEOID")
p <- ggplot(map_d) +
geom_sf(aes(fill = firearm_ownership), color = NA) +
scale_fill_viridis_c(option = "inferno", labels = scales::percent,
name = "HH Firearm\nRate") +
labs(title = "Estimated household firearm ownership rate") +
theme_void() + theme(legend.position = c(0.92, 0.3),
panel.background = element_rect(fill = "white", color = NA))
ggsave(file.path(fig_dir, "map_analysis_firearm.png"), p, width = 12, height = 7, dpi = 200)
p
Homicide rate map: see figs/map_homicide_rate.png (from prior analysis).
Black share map: see figs/map_black_share.png (from prior analysis).
d |>
select(hom_rate, firearm_ownership, black_share, hispanic_share, native_share,
asian_share) |>
summary()
## hom_rate firearm_ownership black_share hispanic_share
## Min. : 0.00 Min. :0.113 Min. :0.00000 Min. :0.0034
## 1st Qu.: 1.99 1st Qu.:0.441 1st Qu.:0.00535 1st Qu.:0.0237
## Median : 3.26 Median :0.490 Median :0.02104 Median :0.0461
## Mean : 4.39 Mean :0.477 Mean :0.08786 Mean :0.0982
## 3rd Qu.: 5.32 3rd Qu.:0.536 3rd Qu.:0.10188 3rd Qu.:0.1054
## Max. :42.89 Max. :0.663 Max. :0.87455 Max. :0.9768
## native_share asian_share
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00273 1st Qu.:0.00371
## Median :0.00470 Median :0.00615
## Mean :0.01821 Mean :0.01516
## 3rd Qu.:0.00937 3rd Qu.:0.01382
## Max. :0.88733 Max. :0.53020
# State-level: include both RAND and BRFSS firearm ownership
state_brfss <- mmsa_d |>
inner_join(
county_cbsa |> mutate(state_fips = substr(county_fips, 1, 2)) |>
left_join(d |> select(GEOID, pop_total), by = c("county_fips" = "GEOID")) |>
filter(!is.na(pop_total)) |>
group_by(cbsa_code, state_fips) |> summarise(pop = sum(pop_total), .groups = "drop") |>
group_by(cbsa_code) |> slice_max(pop, n = 1) |> ungroup() |>
select(cbsa_code, state_fips),
by = "cbsa_code"
) |>
group_by(state_fips) |>
summarise(brfss_ownership = mean(firearm_ownership), .groups = "drop")
state_fss <- readRDS(file.path(data_dir, "state_fss_1999_2020.rds"))
state_d2 <- state_d |>
left_join(state_brfss, by = "state_fips") |>
left_join(state_fss |> select(state_fips, fss), by = "state_fips")
cor_vars_s <- c("hom_rate", "firearm_ownership", "brfss_ownership", "fss",
"black_share", "hispanic_share", "native_share", "asian_share", "log_pwpd")
cor_labels_s <- c("Homicide rate", "RAND HFR", "BRFSS HFR", "Firearm suicide %",
"Black %", "Hispanic %", "Native %", "Asian %", "log(pop-wtd density)")
sc <- cor(state_d2[!is.na(state_d2$brfss_ownership), cor_vars_s], use = "complete.obs")
colnames(sc) <- rownames(sc) <- cor_labels_s
n_s <- nrow(sc)
melted_s <- expand.grid(Var1 = cor_labels_s, Var2 = cor_labels_s) |>
mutate(value = as.vector(sc))
p <- ggplot(melted_s, aes(x = Var2, y = Var1, fill = value)) +
geom_tile(color = "white") +
geom_text(aes(label = sprintf("%.2f", value)), size = 2.8) +
scale_fill_gradient2(low = "steelblue", mid = "white", high = "firebrick",
midpoint = 0, limits = c(-1, 1), name = "r") +
labs(title = sprintf("State-level correlations (n=%d states with both RAND and BRFSS)",
sum(!is.na(state_d2$brfss_ownership)))) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title = element_blank())
ggsave(file.path(fig_dir, "cormat_states.png"), p, width = 9, height = 7.5, dpi = 150)
p
# County/MMSA split: upper = all counties (SVM), lower = MMSAs (BRFSS)
shared_vars <- c("hom_rate", "firearm_ownership", "black_share", "hispanic_share",
"native_share", "asian_share", "log_pwpd")
shared_labels <- c("Homicide rate", "Firearm own. (SVM)", "Black %", "Hispanic %",
"Native %", "Asian %", "log(pop-wtd density)")
county_cor <- cor(d[, shared_vars], use = "complete.obs")
mmsa_brfss <- mmsa_d # already has firearm_ownership = BRFSS
mmsa_cor <- cor(mmsa_brfss[, shared_vars], use = "complete.obs")
nv <- length(shared_vars)
combined <- matrix(NA, nv, nv)
rownames(combined) <- colnames(combined) <- shared_labels
for (i in 1:nv) for (j in 1:nv) {
if (i < j) combined[i, j] <- county_cor[i, j]
else if (i > j) combined[i, j] <- mmsa_cor[i, j]
}
melted_c <- expand.grid(Var1 = shared_labels, Var2 = shared_labels) |>
mutate(value = as.vector(combined))
p <- ggplot(melted_c, aes(x = Var2, y = Var1, fill = value)) +
geom_tile(color = "white") +
geom_text(aes(label = ifelse(is.na(value), "", sprintf("%.2f", value))), size = 2.8) +
scale_fill_gradient2(low = "steelblue", mid = "white", high = "firebrick",
midpoint = 0, limits = c(-1, 1), name = "r", na.value = "grey90") +
labs(title = "County/MMSA correlations",
subtitle = "Upper: all counties, n=3,112, SVM imputed | Lower: MMSAs, n=118, BRFSS survey") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title = element_blank())
ggsave(file.path(fig_dir, "cormat_counties.png"), p, width = 9, height = 7.5, dpi = 150)
p
mods_state <- fit_3models(state_d)
ms_table(mods_state, "State-level: homicide rate per 100k", "table_state_regressions.png")
## file:////tmp/RtmpaiogIy/file53e8a533f46f.html screenshot completed
| Demographics | Gun ownership | Both | |
|---|---|---|---|
| (Intercept) | 9.272* | 3.675** | -0.041 |
| (3.539) | (1.154) | (4.713) | |
| Black % | 25.855*** | 26.708*** | |
| (2.462) | (2.315) | ||
| Hispanic % | 4.928 | 6.720* | |
| (2.710) | (2.607) | ||
| Native % | 34.326** | 19.668 | |
| (9.968) | (10.696) | ||
| Asian % | 2.055 | 4.522 | |
| (3.650) | (3.517) | ||
| log(pop-weighted density) | -1.125* | -0.232 | |
| (0.530) | (0.590) | ||
| HH firearm ownership | 4.211 | 7.214** | |
| (2.807) | (2.608) | ||
| Adj. R² | 0.709 | 0.025 | 0.747 |
| N | 50 | 50 | 50 |
mods_county <- fit_3models(d)
ms_table(mods_county, "County-level (all): homicide rate per 100k", "table_county_regressions.png")
## file:////tmp/RtmpaiogIy/file53e8a30e46e03.html screenshot completed
| Demographics | Gun ownership | Both | |
|---|---|---|---|
| (Intercept) | 0.477 | 6.900*** | -0.743 |
| (0.399) | (0.413) | (0.704) | |
| Black % | 21.785*** | 21.912*** | |
| (0.365) | (0.369) | ||
| Hispanic % | 2.304*** | 2.502*** | |
| (0.397) | (0.408) | ||
| Native % | 9.831*** | 9.945*** | |
| (0.774) | (0.775) | ||
| Asian % | -8.830*** | -7.202*** | |
| (1.905) | (2.055) | ||
| log(pop-weighted density) | 0.294*** | 0.354*** | |
| (0.071) | (0.077) | ||
| HH firearm ownership | -5.252*** | 1.689* | |
| (0.851) | (0.803) | ||
| Adj. R² | 0.545 | 0.012 | 0.546 |
| N | 3112 | 3112 | 3112 |
mods_mmsa <- fit_3models(mmsa_d)
ms_table(mods_mmsa, "MMSA-level (survey data): homicide rate per 100k", "table_mmsa_regressions.png")
## file:////tmp/RtmpaiogIy/file53e8a13447319.html screenshot completed
| Demographics | Gun ownership | Both | |
|---|---|---|---|
| (Intercept) | -0.090 | 3.995*** | -7.336 |
| (3.223) | (1.077) | (3.904) | |
| Black % | 26.908*** | 27.421*** | |
| (1.736) | (1.682) | ||
| Hispanic % | 1.755 | 2.733 | |
| (1.519) | (1.499) | ||
| Native % | 22.375*** | 19.880*** | |
| (4.839) | (4.737) | ||
| Asian % | -2.822 | 1.780 | |
| (3.170) | (3.405) | ||
| log(pop-weighted density) | 0.340 | 0.980 | |
| (0.474) | (0.503) | ||
| HH firearm ownership | 5.515* | 6.488** | |
| (2.784) | (2.114) | ||
| Adj. R² | 0.685 | 0.024 | 0.707 |
| N | 118 | 118 | 118 |
Adding race x gun ownership interactions at each level:
int_models <- list(
"State" = lm(hom_rate ~ black_share * firearm_ownership + hispanic_share + native_share + asian_share + log_pwpd, data = state_d),
"County" = lm(hom_rate ~ black_share * firearm_ownership + hispanic_share + native_share + asian_share + log_pwpd, data = d),
"MMSA" = lm(hom_rate ~ black_share * firearm_ownership + hispanic_share + native_share + asian_share + log_pwpd, data = mmsa_d)
)
ms_table(int_models, "Interaction models: race x firearm ownership", "table_interaction_regressions.png")
## file:////tmp/RtmpaiogIy/file53e8a58aa8e10.html screenshot completed
| State | County | MMSA | |
|---|---|---|---|
| (Intercept) | -1.358 | -2.042** | -7.438 |
| (4.601) | (0.717) | (3.933) | |
| Black % | 8.769 | 41.267*** | 25.125** |
| (9.181) | (2.507) | (7.536) | |
| HH firearm ownership | 3.435 | 5.360*** | 5.873* |
| (3.141) | (0.924) | (2.894) | |
| Hispanic % | 5.427* | 2.985*** | 2.609 |
| (2.600) | (0.408) | (1.557) | |
| Native % | 23.228* | 10.093*** | 20.076*** |
| (10.484) | (0.768) | (4.798) | |
| Asian % | 1.232 | -6.000** | 1.379 |
| (3.770) | (2.041) | (3.652) | |
| log(pop-weighted density) | 0.226 | 0.259*** | 1.036 |
| (0.614) | (0.077) | (0.536) | |
| Black % x HH firearm ownership | 42.826 | -41.178*** | 5.438 |
| (21.259) | (5.276) | (17.393) | |
| Adj. R² | 0.764 | 0.554 | 0.705 |
| N | 50 | 3112 | 118 |
d$gun_tercile <- cut(d$firearm_ownership,
breaks = quantile(d$firearm_ownership, c(0, 1/3, 2/3, 1)),
labels = c("Low gun ownership", "Medium", "High gun ownership"),
include.lowest = TRUE)
p <- ggplot(d, aes(x = black_share, y = hom_rate, color = gun_tercile)) +
geom_point(alpha = 0.15, size = 0.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
labs(x = "Black population share", y = "Homicide rate per 100k",
color = "Firearm ownership",
title = "Black share vs homicide rate, by firearm ownership level") +
theme_minimal()
ggsave(file.path(fig_dir, "interaction_black_firearm_hom.png"), p, width = 9, height = 7, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
d$black_tercile <- cut(d$black_share,
breaks = quantile(d$black_share, c(0, 1/3, 2/3, 1)),
labels = c("Low Black share", "Medium", "High Black share"),
include.lowest = TRUE)
p <- ggplot(d, aes(x = firearm_ownership, y = hom_rate, color = black_tercile)) +
geom_point(alpha = 0.15, size = 0.8) +
geom_smooth(method = "lm", se = TRUE, linewidth = 1) +
labs(x = "Estimated firearm ownership rate", y = "Homicide rate per 100k",
color = "Black pop share",
title = "Firearm ownership vs homicide rate, by Black population share") +
theme_minimal()
ggsave(file.path(fig_dir, "interaction_firearm_black_hom.png"), p, width = 9, height = 7, dpi = 150)
## `geom_smooth()` using formula = 'y ~ x'
p
## `geom_smooth()` using formula = 'y ~ x'
sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] modelsummary_2.5.0 knitr_1.50 e1071_1.7-16
## [4] xgboost_1.7.11.1 glmnet_4.1-10 Matrix_1.7-4
## [7] arrow_23.0.1.1 ggrepel_0.9.6 tigris_2.2.1
## [10] sf_1.0-21 kirkegaard_2025-11-19 robustbase_0.99-6
## [13] psych_2.5.6 assertthat_0.2.1 weights_1.1.2
## [16] magrittr_2.0.4 lubridate_1.9.4 forcats_1.0.1
## [19] stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0
## [22] readr_2.1.5 tidyr_1.3.1 tibble_3.3.0
## [25] ggplot2_4.0.1.9000 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] splines_4.5.3 later_1.4.4 cellranger_1.1.0
## [4] datawizard_1.3.0 rpart_4.1.24 lifecycle_1.0.4
## [7] Rdpack_2.6.4 globals_0.18.0 processx_3.8.6
## [10] lattice_0.22-7 vroom_1.6.6 MASS_7.3-65
## [13] insight_1.4.2 backports_1.5.0 Hmisc_5.2-4
## [16] sass_0.4.10 rmarkdown_2.30 jquerylib_0.1.4
## [19] yaml_2.3.10 chromote_0.5.1 DBI_1.2.3
## [22] minqa_1.2.8 RColorBrewer_1.1-3 multcomp_1.4-28
## [25] nnet_7.3-20 TH.data_1.1-4 rappdirs_0.3.3
## [28] webshot2_0.1.2 sandwich_3.1-1 listenv_0.9.1
## [31] gdata_3.0.1 units_1.0-0 performance_0.15.2
## [34] parallelly_1.45.1 codetools_0.2-20 xml2_1.4.0
## [37] tidyselect_1.2.1 shape_1.4.6.1 farver_2.1.2
## [40] lme4_1.1-37 effectsize_1.0.1 base64enc_0.1-3
## [43] jsonlite_2.0.0 mitml_0.4-5 Formula_1.2-5
## [46] survival_3.8-6 iterators_1.0.14 emmeans_1.11.2-8
## [49] systemfonts_1.3.1 foreach_1.5.2 tools_4.5.3
## [52] ragg_1.5.0 Rcpp_1.1.0 glue_1.8.0
## [55] mnormt_2.1.1 gridExtra_2.3 pan_1.9
## [58] xfun_0.57 mgcv_1.9-3 websocket_1.4.4
## [61] withr_3.0.2 fastmap_1.2.0 boot_1.3-32
## [64] digest_0.6.39 timechange_0.3.0 R6_2.6.1
## [67] estimability_1.5.1 mice_3.18.0 textshaping_1.0.4
## [70] colorspace_2.1-2 gtools_3.9.5 generics_0.1.4
## [73] data.table_1.17.8 class_7.3-23 httr_1.4.7
## [76] htmlwidgets_1.6.4 parameters_0.28.2 pkgconfig_2.0.3
## [79] gtable_0.3.6 lmtest_0.9-40 S7_0.2.1
## [82] htmltools_0.5.8.1 scales_1.4.0 png_0.1-8
## [85] reformulas_0.4.1 rstudioapi_0.17.1 tzdb_0.5.0
## [88] uuid_1.2-1 coda_0.19-4.1 checkmate_2.3.3
## [91] nlme_3.1-168 nloptr_2.2.1 proxy_0.4-27
## [94] cachem_1.1.0 zoo_1.8-14 KernSmooth_2.23-26
## [97] parallel_4.5.3 foreign_0.8-91 pillar_1.11.1
## [100] grid_4.5.3 vctrs_0.6.5 promises_1.3.3
## [103] jomo_2.7-6 xtable_1.8-4 cluster_2.1.8.2
## [106] archive_1.1.12 htmlTable_2.4.3 evaluate_1.0.5
## [109] mvtnorm_1.3-3 cli_3.6.5 compiler_4.5.3
## [112] rlang_1.1.6.9000 crayon_1.5.3 future.apply_1.20.0
## [115] labeling_0.4.3 classInt_0.4-11 ps_1.9.1
## [118] fs_1.6.6 stringi_1.8.7 viridisLite_0.4.2
## [121] tables_0.9.31 bayestestR_0.17.0 hms_1.1.3
## [124] bit64_4.6.0-1 future_1.67.0 rbibutils_2.3
## [127] gt_1.3.0 broom_1.0.11 bslib_0.9.0
## [130] DEoptimR_1.1-4 bit_4.6.0 readxl_1.4.5