Init
library(kirkegaard)
## Loading required package: tidyverse
## ── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
## [32m✔[39m [34mforcats [39m 1.0.1 [32m✔[39m [34mstringr [39m 1.6.0
## [32m✔[39m [34mlubridate[39m 1.9.4 [32m✔[39m [34mtibble [39m 3.3.0
## [32m✔[39m [34mpurrr [39m 1.2.0 [32m✔[39m [34mtidyr [39m 1.3.1
## ── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
## [31m✖[39m [34me1071[39m::[32melement()[39m masks [34mggplot2[39m::element()
## [31m✖[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
## [31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
## [31m✖[39m [34mdplyr[39m::[32mlag()[39m masks [34mstats[39m::lag()
## [31m✖[39m [34mtidyr[39m::[32mpack()[39m masks [34mMatrix[39m::pack()
## [31m✖[39m [34mcar[39m::[32mrecode()[39m masks [34mdplyr[39m::recode()
## [31m✖[39m [34mxgboost[39m::[32mslice()[39m masks [34mdplyr[39m::slice()
## [31m✖[39m [34mpurrr[39m::[32msome()[39m masks [34mcar[39m::some()
## [31m✖[39m [34mtidyr[39m::[32munpack()[39m masks [34mMatrix[39m::unpack()
## [36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:effectsize':
##
## phi
##
##
## The following object is masked from 'package:car':
##
## logit
##
##
## The following object is masked from 'package:modelsummary':
##
## SD
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## Loading required package: robustbase
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:effectsize':
##
## standardize
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
dplyr,
readr,
ggplot2,
fixest,
modelsummary,
effectsize,
spatialreg,
spdep,
sf,
tigris,
car,
gt
)
theme_set(theme_bw())
options(
digits = 3,
tigris_use_cache = TRUE
)
fig_dir <- "figs"
dir.create(fig_dir, showWarnings = FALSE)
# modelsummary formatting
gof_map_short <- list(
list("raw" = "nobs", "clean" = "N", "fmt" = 0),
list("raw" = "adj.r.squared", "clean" = "Adj. R2", "fmt" = 3),
list("raw" = "r2.within", "clean" = "Within R2", "fmt" = 3)
)
coef_labels <- c(
black_share = "Black %", hispanic_share = "Hispanic %",
native_share = "Native %", asian_share = "Asian %",
white_share = "White %",
nh_black_share = "NH Black %", nh_native_share = "NH Native %",
nh_asian_share = "NH Asian %",
z_nh_black_share = "NH Black %", z_hispanic_share = "Hispanic %",
z_nh_native_share = "NH Native %", z_nh_asian_share = "NH Asian %",
z_poverty_rate = "Poverty rate", z_median_income = "Median income",
z_gini = "Gini coefficient", z_employment_rate = "Employment rate",
poverty_rate = "Poverty rate", median_income = "Median income ($10k)",
gini = "Gini coefficient", employment_rate = "Employment rate",
`(Intercept)` = "(Intercept)"
)
# pop-weighted z-score function
weighted_scale <- function(x, w) {
mu <- weighted.mean(x, w, na.rm = TRUE)
sigma <- sqrt(sum(w * (x - mu)^2, na.rm = TRUE) / sum(w[!is.na(x)]))
(x - mu) / sigma
}
Data
# NH race shares (2020 census)
nh_2020 <- readRDS("data/county_nhrace_panel_1990_2020.rds") |>
filter(year == 2020) |>
select(GEOID, nh_black_share, nh_native_share, nh_asian_share, hispanic_share, total)
# homicide (2010-2020, state-imputed)
hom <- readRDS("data/county_homicide_cross_2010_2020_imputed.rds")
# ACS econ data
econ_acs <- readRDS("data/county_econ_acs.rds")
# ACS 2022 for cross-sectional standardized models
econ_2022 <- econ_acs |> filter(acs_year == 2022) |>
transmute(GEOID,
median_income = median_incomeE / 10000,
poverty_rate = poverty_belowE / poverty_totalE,
gini = giniE,
employment_rate = employ_employedE / employ_totalE)
# ACS 2018 for model comparison / bivariate models
econ_2018 <- econ_acs |> filter(acs_year == 2018) |>
transmute(GEOID,
median_income = median_incomeE / 10000,
poverty_rate = poverty_belowE / poverty_totalE,
gini = giniE,
employment_rate = employ_employedE / employ_totalE)
# cross-sectional for standardized models (ACS 2022)
cs <- nh_2020 |>
inner_join(hom, by = "GEOID") |>
inner_join(econ_2022, by = "GEOID") |>
mutate(state_fips = substr(GEOID, 1, 2)) |>
filter(!is.na(hom_rate), !is.na(poverty_rate))
# cross-sectional for bivariate/model comparison (ACS 2018)
cs_18 <- nh_2020 |>
inner_join(hom, by = "GEOID") |>
inner_join(econ_2018, by = "GEOID") |>
mutate(state_fips = substr(GEOID, 1, 2)) |>
filter(!is.na(hom_rate), !is.na(poverty_rate))
# longitudinal panel (1970-2020, 6 periods, homicide only)
panel <- readRDS("data/analysis_longitudinal_panel.rds")
# panel with NH race shares and multiple outcomes (1990-2020)
panel_nh <- readRDS("data/panel_1990_2020_all_outcomes.rds")
cat("Cross-sectional (ACS 2022):", nrow(cs), "counties\n")
## Cross-sectional (ACS 2022): 3135 counties
cat("Cross-sectional (ACS 2018):", nrow(cs_18), "counties\n")
## Cross-sectional (ACS 2018): 3140 counties
cat("Longitudinal panel:", nrow(panel), "rows,", n_distinct(panel$GEOID), "counties\n")
## Longitudinal panel: 19079 rows, 3248 counties
cat("NH panel 1990-2020:", nrow(panel_nh), "rows,", n_distinct(panel_nh$GEOID), "counties\n")
## NH panel 1990-2020: 12802 rows, 3233 counties
Analysis
Cross-sectional: standardized coefficients
# pop-weighted z-score all variables
cs_z <- cs |> mutate(across(
c(nh_black_share, hispanic_share, nh_native_share, nh_asian_share,
poverty_rate, median_income, gini, employment_rate, hom_rate),
~ weighted_scale(., total),
.names = "z_{.col}"
))
# 3 models: demographic, socioeconomic, full (all pop-weighted)
m_demo <- feols(z_hom_rate ~ z_nh_black_share + z_hispanic_share + z_nh_native_share + z_nh_asian_share,
data = cs_z, weights = ~total)
m_ses <- feols(z_hom_rate ~ z_poverty_rate + z_median_income + z_gini + z_employment_rate,
data = cs_z, weights = ~total)
## NOTE: 1 observation removed because of NA values (RHS: 1).
m_full <- feols(z_hom_rate ~ z_nh_black_share + z_hispanic_share + z_nh_native_share + z_nh_asian_share +
z_poverty_rate + z_median_income + z_gini + z_employment_rate,
data = cs_z, weights = ~total)
## NOTE: 1 observation removed because of NA values (RHS: 1).
ms_save(
list("Demographic" = m_demo, "Socioeconomic" = m_ses, "Full" = m_full),
"Cross-sectional OLS: Homicide rate (standardized coefficients)",
"table_cs_standardized.png"
)
## file:////tmp/RtmpDLJUrO/file4c21f5ac16ed8.html screenshot completed
## Saved table_cs_standardized.png
Epsilon-squared variance decomposition
# full model for epsilon² (pop-weighted)
m_full_lm <- lm(z_hom_rate ~ z_nh_black_share + z_hispanic_share + z_nh_native_share + z_nh_asian_share +
z_poverty_rate + z_median_income + z_gini + z_employment_rate,
data = cs_z, weights = total)
eps2 <- epsilon_squared(m_full_lm, alternative = "two.sided", ci = 0.95)
eps2_df <- data.frame(
variable = c("NH Black %", "Hispanic %", "NH Native %", "NH Asian %",
"Poverty rate", "Median income", "Gini coefficient", "Employment rate"),
epsilon2 = eps2$Epsilon2_partial,
ci_low = eps2$CI_low,
ci_high = eps2$CI_high,
type = c(rep("Demographic", 4), rep("Socioeconomic", 4))
) |> mutate(
sqrt_eps = sqrt(epsilon2),
sqrt_ci_low = sqrt(pmax(ci_low, 0)),
sqrt_ci_high = sqrt(ci_high)
)
eps2_df$variable <- factor(eps2_df$variable, levels = rev(eps2_df$variable))
p_eps <- ggplot(eps2_df, aes(x = sqrt_eps, y = variable, fill = type)) +
geom_col() +
geom_errorbar(aes(xmin = sqrt_ci_low, xmax = sqrt_ci_high), width = 0.3, orientation = "y") +
scale_fill_manual(values = c("Demographic" = "red", "Socioeconomic" = "steelblue")) +
labs(x = expression(sqrt(epsilon^2) ~ "(partial, Type III)"),
y = NULL, fill = NULL,
title = "Effect sizes: Full model predicting county homicide rate") +
theme(legend.position = "bottom")
ggsave(file.path(fig_dir, "barplot_epsilon.png"), p_eps, width = 9, height = 5, dpi = 150)
p_eps

OLS vs Spatial Error Model
# spatial weights for cross-sectional sample
counties_sf <- readRDS("data/spatial_weights.rds")$counties_sf
counties_proj <- st_transform(counties_sf, 5070)
# filter to complete cases and build weights
cs_z_complete <- cs_z |> filter(!is.na(z_employment_rate))
idx <- match(cs_z_complete$GEOID, counties_sf$GEOID)
cs_z_sp <- cs_z_complete[!is.na(idx), ]
counties_matched <- counties_proj[idx[!is.na(idx)], ]
centroids <- st_centroid(counties_matched)
## Warning: st_centroid assumes attributes are constant over geometries
coords <- st_coordinates(centroids)
W_cs <- nb2listw(knn2nb(knearneigh(coords, k = 5)), style = "W")
# OLS (weighted)
m_ols <- lm(z_hom_rate ~ z_nh_black_share + z_hispanic_share + z_nh_native_share + z_nh_asian_share +
z_poverty_rate + z_median_income + z_gini + z_employment_rate,
data = cs_z_sp, weights = total)
# spatial error model
m_sem <- errorsarlm(z_hom_rate ~ z_nh_black_share + z_hispanic_share + z_nh_native_share + z_nh_asian_share +
z_poverty_rate + z_median_income + z_gini + z_employment_rate,
data = cs_z_sp, listw = W_cs)
# build comparison table
ols_s <- summary(m_ols)$coefficients
sem_s <- summary(m_sem)$Coef
var_keys <- rownames(ols_s)[-1]
var_names <- c("NH Black %", "Hispanic %", "NH Native %", "NH Asian %",
"Poverty rate", "Median income", "Gini coefficient", "Employment rate")
fmt_coef <- function(b, se, p) {
stars <- ifelse(p < 0.001, "***", ifelse(p < 0.01, "**", ifelse(p < 0.05, "*", "")))
sprintf("%.3f%s (%.3f)", b, stars, se)
}
comp_df <- data.frame(
Variable = c(var_names, "Lambda (spatial)", "N", "AIC"),
OLS = c(mapply(fmt_coef, ols_s[var_keys, 1], ols_s[var_keys, 2], ols_s[var_keys, 4]),
"\u2014", as.character(nrow(cs_z_sp)), as.character(round(AIC(m_ols)))),
`Spatial Error` = c(mapply(fmt_coef, sem_s[var_keys, 1], sem_s[var_keys, 2], sem_s[var_keys, 4]),
sprintf("%.3f*** (%.3f)", m_sem$lambda, m_sem$lambda.se),
as.character(nrow(cs_z_sp)), as.character(round(AIC(m_sem)))),
check.names = FALSE
)
tab_spatial <- gt(comp_df) |>
tab_header(title = "Cross-sectional models: Homicide rate (standardized coefficients)") |>
tab_footnote("* p < 0.05, ** p < 0.01, *** p < 0.001. All variables pop-weighted z-scored. SE in parentheses.")
gtsave(tab_spatial, file.path(fig_dir, "table_ols_vs_spatial.png"))
## file:////tmp/RtmpDLJUrO/file4c21f654cb75f.html screenshot completed
Black share model comparison
# bivariate regressions: Black % as sole predictor of homicide rate
# all pop-weighted, hetero-robust SEs
# 1. OLS cross-sectional (ACS 2018, NH Black)
m_comp_ols <- feols(hom_rate ~ nh_black_share, data = cs_18, weights = ~total)
# 2. Spatial Error (ACS 2018, NH Black)
idx_18 <- match(cs_18$GEOID, counties_sf$GEOID)
cs_18_sp <- cs_18[!is.na(idx_18), ]
counties_18 <- counties_proj[idx_18[!is.na(idx_18)], ]
W_18 <- nb2listw(knn2nb(knearneigh(st_coordinates(st_centroid(counties_18)), k = 5)), style = "W")
## Warning: st_centroid assumes attributes are constant over geometries
m_comp_sem <- errorsarlm(hom_rate ~ nh_black_share, data = cs_18_sp, listw = W_18)
# 3. OLS + State FE (ACS 2018, NH Black)
m_comp_sfe <- feols(hom_rate ~ nh_black_share | state_fips, data = cs_18, weights = ~total)
## NOTE: 1 fixed-effect singleton was removed (1 observation).
# 4. Panel county+year FE (1990-2020, NH Black)
panel_hom_nh <- panel_nh |> filter(!is.na(hom_rate))
m_comp_panel_nh <- feols(hom_rate ~ nh_black_share | GEOID + year,
data = panel_hom_nh, weights = ~total)
## NOTE: 9/0 fixed-effect singletons were removed (9 observations).
# 5. Panel county+year FE (1970-2020, total Black)
m_comp_panel_70 <- feols(hom_rate ~ black_share | GEOID + census_year,
data = panel, weights = ~total)
## NOTES: 234 observations removed because of NA values (LHS: 234).
## 18/0 fixed-effect singletons were removed (18 observations).
# summary table
comp_models <- data.frame(
Model = c("OLS", "Spatial Error", "OLS + State FE",
"County + Year FE", "County + Year FE"),
Design = c("CS", "CS", "CS", "Panel", "Panel"),
Years = c("2010-2020", "2010-2020", "2010-2020", "1990-2020", "1970-2020"),
`Black % coefficient` = c(
sprintf("%.2f*** (%.2f)", coef(m_comp_ols)["nh_black_share"],
se(m_comp_ols, se = "hetero")["nh_black_share"]),
sprintf("%.2f*** (%.2f)", coef(m_comp_sem)["nh_black_share"],
summary(m_comp_sem)$Coef["nh_black_share", 2]),
sprintf("%.2f*** (%.2f)", coef(m_comp_sfe)["nh_black_share"],
se(m_comp_sfe, se = "hetero")["nh_black_share"]),
sprintf("%.2f*** (%.2f)", coef(m_comp_panel_nh)["nh_black_share"],
se(m_comp_panel_nh, se = "hetero")["nh_black_share"]),
sprintf("%.2f*** (%.2f)", coef(m_comp_panel_70)["black_share"],
se(m_comp_panel_70, se = "hetero")["black_share"])
),
N = c(m_comp_ols$nobs, nrow(cs_18_sp), m_comp_sfe$nobs,
m_comp_panel_nh$nobs, m_comp_panel_70$nobs),
check.names = FALSE
)
tab_comp <- gt(comp_models) |>
tab_header(title = "Black share \u2192 Homicide rate: Model comparison") |>
tab_footnote("* p < 0.05, ** p < 0.01, *** p < 0.001. Robust SE in parentheses. All models pop-weighted. CS models use NH Black %; 1970-2020 panel uses total Black % (NH unavailable pre-1990).")
gtsave(tab_comp, file.path(fig_dir, "table_black_model_comparison.png"))
## file:////tmp/RtmpDLJUrO/file4c21f7bb4ee44.html screenshot completed
Cross-sectional: multiple outcomes with state FE
# use panel_nh 2020 slice (has z-scored outcomes from within-year standardization)
p2_2020 <- panel_nh |> filter(year == 2020)
outcomes_5 <- c("poverty_rate_z", "median_income_z", "unemployment_rate_z", "pct_bachelors_z", "hom_rate_z")
outcome_labels_5 <- c("Poverty", "Income", "Unemployment", "Education", "Homicide")
state_fe_models <- lapply(outcomes_5, function(y) {
f <- as.formula(paste(y, "~ nh_black_share + hispanic_share + nh_native_share + nh_asian_share | state_fips"))
p2_2020$state_fips <- substr(p2_2020$GEOID, 1, 2)
feols(f, data = p2_2020, weights = ~total)
})
## NOTES: 8 observations removed because of NA values (LHS: 8).
## 1 fixed-effect singleton was removed (1 observation).
## NOTES: 9 observations removed because of NA values (LHS: 9).
## 1 fixed-effect singleton was removed (1 observation).
## NOTES: 8 observations removed because of NA values (LHS: 8).
## 1 fixed-effect singleton was removed (1 observation).
## NOTES: 8 observations removed because of NA values (LHS: 8).
## 1 fixed-effect singleton was removed (1 observation).
## NOTES: 78 observations removed because of NA values (LHS: 78).
## 1 fixed-effect singleton was removed (1 observation).
names(state_fe_models) <- outcome_labels_5
ms_save(
state_fe_models,
"Cross-sectional + State FE (2020, z-scored outcomes)",
"table_state_fe.png"
)
## file:////tmp/RtmpDLJUrO/file4c21f129c335b.html screenshot completed
## Saved table_state_fe.png
Panel models: multiple outcomes (1990-2020)
panel_fe_models <- lapply(outcomes_5, function(y) {
f <- as.formula(paste(y, "~ nh_black_share + hispanic_share + nh_native_share + nh_asian_share | GEOID + year"))
feols(f, data = panel_nh, weights = ~total, cluster = ~GEOID)
})
## NOTES: 87 observations removed because of NA values (LHS: 87).
## 9/0 fixed-effect singletons were removed (9 observations).
## NOTES: 88 observations removed because of NA values (LHS: 88).
## 9/0 fixed-effect singletons were removed (9 observations).
## NOTES: 87 observations removed because of NA values (LHS: 87).
## 9/0 fixed-effect singletons were removed (9 observations).
## NOTES: 87 observations removed because of NA values (LHS: 87).
## 9/0 fixed-effect singletons were removed (9 observations).
## NOTES: 234 observations removed because of NA values (LHS: 234).
## 9/0 fixed-effect singletons were removed (9 observations).
names(panel_fe_models) <- outcome_labels_5
ms_save(
panel_fe_models,
"County + Year FE (1990-2020, z-scored outcomes)",
"table_fe_1990_2020.png"
)
## file:////tmp/RtmpDLJUrO/file4c21f73a30f24.html screenshot completed
## Saved table_fe_1990_2020.png
Scatter: Black % vs homicide rate
p_scatter <- ggplot(cs_18, aes(x = nh_black_share * 100, y = hom_rate)) +
geom_point(alpha = 0.2, size = 0.8, color = "steelblue") +
geom_smooth(method = "lm", color = "red", fill = "red", alpha = 0.2, linewidth = 0.8) +
labs(x = "Black share (2020 Census)", y = "Homicide rate per 100k (2010-2020)",
title = "Cross-sectional: Black share vs homicide rate") +
scale_x_continuous(labels = function(x) paste0(x, "%")) +
coord_cartesian(ylim = c(0, 50))
ggsave(file.path(fig_dir, "scatter_cs_black_hom.png"), p_scatter, width = 7, height = 6, dpi = 150)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
p_scatter
## [37m`geom_smooth()` using formula = 'y ~ x'[39m

Maps
# county homicide rate map (CONUS)
counties_map <- counties(year = 2020, cb = TRUE) |>
filter(!STATEFP %in% c("02", "15", "72", "78", "66", "60", "69")) |>
st_transform(5070)
map_d <- counties_map |> left_join(cs_18 |> select(GEOID, hom_rate), by = "GEOID")
p_hom_map <- ggplot(map_d) +
geom_sf(aes(fill = hom_rate), color = NA) +
scale_fill_viridis_c(option = "inferno", trans = "sqrt",
name = "Homicide rate\nper 100k",
breaks = c(0, 5, 10, 20, 40)) +
labs(title = "County homicide rate (2010-2020)") +
theme_void() +
theme(legend.position = c(0.92, 0.3))
ggsave(file.path(fig_dir, "map_homicide_rate.png"), p_hom_map, width = 12, height = 7, dpi = 200)
p_hom_map

# Native American population share map
race_2020 <- readRDS("data/county_race_panel_1970_2020.rds") |> filter(year == 2020)
map_native <- counties_map |> left_join(race_2020 |> select(GEOID, native_share), by = "GEOID")
p_native_map <- ggplot(map_native) +
geom_sf(aes(fill = native_share), color = NA) +
scale_fill_viridis_c(option = "viridis", trans = "sqrt",
name = "Native American\nshare",
labels = scales::percent) +
labs(title = "Native American population share by county (2020)") +
theme_void() +
theme(legend.position = c(0.92, 0.3))
ggsave(file.path(fig_dir, "map_native_share.png"), p_native_map, width = 12, height = 7, dpi = 200)
p_native_map
