library(readxl)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
Convictions <- read_excel("Strict Liability spread sheet.xlsx")
Arrest <- read_excel("NJ DIH Arrest data 2017-2024.xlsx")
Overdose <-Underlying_Cause_of_Death_1999_2023 <- read_csv("Underlying Cause of Death 1999-2023.csv",
col_types = cols(Notes = col_skip(),
Year = col_integer(), `Year Code` = col_integer(),
Population = col_integer()))
State_OD_18 <- read_csv("Statewide_data_month_2018-2025-5.csv",
col_types = cols(Notes = col_skip(),
State = col_skip(), `State Code` = col_skip(),
Population = col_skip(), `Crude Rate` = col_skip()))
State_OD_mon <- read_csv("State Month Underlying Cause of Death, 1999-2025.csv")
## Rows: 317 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Month, Month Code, Deaths
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Arrest <- Arrest %>%
mutate(
Race = ifelse(Race %in% c("Unknown", "Not Provided"), NA, Race)
)
Convictions <- Convictions %>%
mutate(
Race = ifelse(Race %in% c("Unknown", "Not Provided"), NA, Race)
)
Convictions <- Convictions %>%
mutate(
Ethnicity = ifelse(Ethnicity %in% c("Unknown", "Not Provided", "NA", ""), NA, Ethnicity)
)
# Create Conviction Year Variable in
Convictions$Sentence.Date <- as.Date(Convictions$`Date of Sentence`)
Convictions$Sentence.Year <- as.numeric(format(Convictions$Sentence.Date, "%Y"))
#Create Conviction Month & Year code
Convictions$`Month Code` <- format(Convictions$Sentence.Date, "%Y/%m")
#Create Year & Month Variable in State_OD
library(dplyr)
State_OD_18 <- State_OD_18 %>%
mutate(
Year = as.numeric(sub("/.*", "", `Month Code`)),
Month = as.numeric(sub(".*/", "", `Month Code`))
)
State_OD_mon <- State_OD_mon %>%
mutate(
Year = as.numeric(sub("/.*", "", `Month Code`)),
Month = as.numeric(sub(".*/", "", `Month Code`))
)
# Create Month Code
Arrest$Month_NUM <- match(Arrest$`Arrest Month`, month.name)
Arrest$`Month Code` <- paste0(Arrest$`Arrest Year`, "/", sprintf("%02d", Arrest$Month_NUM))
#Create list of all months to include; dates: 2017, through 2024.
mons_list <- data.frame(`Month Code` = format(
seq.Date(
from = as.Date("2017-01-01"), # set start
to = as.Date("2024-12-31"), # set end
by = "month"
),
"%Y/%m"
))
mons_list <- mons_list %>%
rename(`Month Code` = Month.Code)
#Count arrests
arrests_by_mon <- Arrest %>%
group_by(`Month Code`) %>%
summarise(Arrest_Count = n(), .groups = "drop")
# Insert 0's for months not reported as having arrests
arrests_by_mon <- mons_list %>%
left_join(arrests_by_mon, by = "Month Code") %>%
mutate(Arrest_Count = ifelse(is.na(Arrest_Count), 0, Arrest_Count))
library(dplyr)
library(tidyr)
# Step 1: Create list of all months to include; dates: August 5, 1988, through June 23, 2025.
all_months <- data.frame(`Month Code` = format(
seq.Date(
from = as.Date("1988-01-01"), # set start
to = as.Date("2022-12-01"), # set end
by = "month"
),
"%Y/%m"
))
all_months <- all_months %>%
rename(`Month Code` = Month.Code)
# Step 2: Count convictions
conv_by_mon <- Convictions %>%
group_by(`Month Code`) %>%
summarise(Conv_Count = n(), .groups = "drop")
# Insert 0's for months not reported as having arrests
conv_by_mon <- all_months %>%
left_join(conv_by_mon, by = "Month Code") %>%
mutate(Conv_Count = ifelse(is.na(Conv_Count), 0, Conv_Count))
For the state level data – I am using the population from the counties in the CDC Wonder dataset and just combining them – but may be more accurate if we just find a population dataset for NJ state. I added population estimates from 2024 from the Census Burea website before it was rendered inoperable by the government shutdown.
# 1) Statewide population by year (summing population data across counties to determine state population -- may be best to find population data from another source for the state)
pop_by_year <- Overdose %>%
group_by(Year) %>%
summarise(Population = sum(Population, na.rm = TRUE), .groups = "drop")
#Added 2024 population estimates from Census bureau
library(dplyr)
pop_by_year <- pop_by_year %>%
bind_rows(data.frame(
Year = 2024,
Population = 9500851
))
# 2) Arrests by year
arrests_by_year <- Arrest %>%
group_by(`Arrest Year`) %>%
summarise(Arrest_Count = n(), .groups = "drop") %>%
rename(Year = `Arrest Year`)
# 3) Convictions by year
#convictions_by_year_filter code removed
convictions_by_year<- Convictions %>%
group_by(Sentence.Year) %>%
summarise(Conviction_Count = n(), .groups = "drop") %>%
rename(Year = Sentence.Year)
# 4) Deaths by year (statewide)
deaths_by_year <- State_OD_18 %>%
group_by(Year) %>%
summarise(Count = sum(Deaths, na.rm = TRUE), .groups = "drop")
#ARRESTS 2017-2024
library(ggplot2)
ggplot(arrests_by_year, aes(x = `Year`, y = Arrest_Count)) +
geom_line(group = 1, color = "steelblue", size = 1.2) +
geom_point(size = 2) +
geom_text(aes(label = Arrest_Count), vjust = -0.5) +
labs(title = "Drug-Induced Arrests by Year",
x = "Year", y = "Arrest Count") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
@Jennifer, We need to double check that that arrest data is complete through 2024. I did not pull it…. looking at the dataset the last arrest included is September of 2024.
library(knitr)
county_summary_arr <- Arrest %>%
count(County) %>%
mutate(Percent = round(n / sum(n) * 100, 1)) %>%
arrange(desc(n))
kable(county_summary_arr,
caption = "Table: Arrests by County",
col.names = c("County", "Count", "Percent"),
align = "lrr")
| County | Count | Percent |
|---|---|---|
| Atlantic | 57 | 15.5 |
| Burlington | 56 | 15.3 |
| Cape May | 47 | 12.8 |
| Ocean | 37 | 10.1 |
| Middlesex | 31 | 8.4 |
| Statewide or Regional LEA | 22 | 6.0 |
| Monmouth | 20 | 5.4 |
| Morris | 20 | 5.4 |
| Bergen | 18 | 4.9 |
| Gloucester | 14 | 3.8 |
| Passaic | 13 | 3.5 |
| Essex | 9 | 2.5 |
| Cumberland | 5 | 1.4 |
| Hunterdon | 4 | 1.1 |
| Camden | 3 | 0.8 |
| Mercer | 3 | 0.8 |
| Salem | 2 | 0.5 |
| Union | 2 | 0.5 |
| Warren | 2 | 0.5 |
| Hudson | 1 | 0.3 |
| Somerset | 1 | 0.3 |
##Strict Liability Conviction Data – 1988 - April 2025
library(dplyr)
library(ggplot2)
# Line plot
ggplot(convictions_by_year, aes(x = Year, y = Conviction_Count)) +
geom_line(color = "darkblue", size = 1.2) +
geom_point(size = 2) +
geom_text(aes(label = Conviction_Count), vjust = -0.7, size = 3.5) + # adds counts above points
scale_x_continuous(breaks = seq(min(convictions_by_year$Year, na.rm = TRUE),
max(convictions_by_year$Year, na.rm = TRUE),
by = 2)) +
labs(title = "Number of DID Convictions by Year",
x = "Year",
y = "Number of Convictions") +
theme_minimal()
library(knitr)
county_summary_conv <- Convictions %>%
count(County) %>%
mutate(Percent = round(n / sum(n) * 100, 1)) %>%
arrange(desc(n))
kable(county_summary_arr,
caption = "Table: Convictions by County 1999- 2024(partial)",
col.names = c("County", "Count", "Percent"),
align = "lrr")
| County | Count | Percent |
|---|---|---|
| Atlantic | 57 | 15.5 |
| Burlington | 56 | 15.3 |
| Cape May | 47 | 12.8 |
| Ocean | 37 | 10.1 |
| Middlesex | 31 | 8.4 |
| Statewide or Regional LEA | 22 | 6.0 |
| Monmouth | 20 | 5.4 |
| Morris | 20 | 5.4 |
| Bergen | 18 | 4.9 |
| Gloucester | 14 | 3.8 |
| Passaic | 13 | 3.5 |
| Essex | 9 | 2.5 |
| Cumberland | 5 | 1.4 |
| Hunterdon | 4 | 1.1 |
| Camden | 3 | 0.8 |
| Mercer | 3 | 0.8 |
| Salem | 2 | 0.5 |
| Union | 2 | 0.5 |
| Warren | 2 | 0.5 |
| Hudson | 1 | 0.3 |
| Somerset | 1 | 0.3 |
library(dplyr)
library(ggplot2)
# Summarize total deaths by year
State_OD_mon$Deaths <- as.numeric(State_OD_mon$Deaths)
## Warning: NAs introduced by coercion
deaths_by_year_tot <- State_OD_mon %>%
group_by(Year) %>%
summarise(Count = sum(Deaths, na.rm = TRUE), .groups = "drop")
# Line chart of total deaths over time
ggplot(deaths_by_year_tot, aes(x = Year, y = Count)) +
geom_line(color = "darkred", size = 1.2) +
geom_point(size = 2) +
geom_text(aes(label = Count), vjust = -0.5, size = 3) +
labs(title = "Total Overdose Deaths by Year",
x = "Year", y = "Number of Deaths") +
theme_minimal()
library(dplyr)
#Combine into one dataset
combined_data <- full_join(
dplyr::select(arrests_by_year, Year, Arrest_Count),
dplyr::select(deaths_by_year, Year, Count),
by = "Year"
)
library(ggplot2)
# Choose a scaling factor to bring arrests to the same ballpark as deaths
scale_factor <- 30 # adjust as needed
ggplot(combined_data, aes(x = Year)) +
geom_line(aes(y = Count, color = "Overdose Deaths"), size = 1.2) +
geom_line(aes(y = Arrest_Count * scale_factor, color = "Arrests"), size = 1.2) +
scale_y_continuous(
name = "Overdose Deaths",
sec.axis = sec_axis(~./scale_factor, name = "Arrests")
) +
scale_color_manual(values = c("Overdose Deaths" = "blue", "Arrests" = "red")) +
labs(
title = "Overdose Deaths vs Arrests by Year (Dual Y-Axis)",
x = "Year",
color = "Metric"
) +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
Visually there does not appear to be a relationships whereby Arrests
decrease when overdose deaths decrease. This means that the number of
overdose deaths (the underlying incident rate) does not seem to predict
whether or not there are arrests. Of course, we will test this later. It
also doesn’t appear that arrests result in less overdoses the next
year…
library(tidyr)
combined_long <- combined_data %>%
pivot_longer(cols = c(Arrest_Count, Count), names_to = "Metric", values_to = "Value")
ggplot(combined_long, aes(x = Year, y = Value)) +
geom_line(color = "steelblue", size = 1.2) +
facet_wrap(
~ Metric,
scales = "free_y",
labeller = as_labeller(
c("Count" = "Overdose Deaths", "Arrest_Count" = "Arrests")
)
) +
labs(
title = "Arrests vs Overdose Deaths by Year",
x = "Year",
y = "Count"
) +
theme_minimal()
Transformed to Z scores…
combined_data_z <- combined_data %>%
mutate(
Arrest_z = scale(Arrest_Count),
Deaths_z = scale(Count)
)
ggplot(combined_data_z, aes(x = Year)) +
geom_line(aes(y = Arrest_z, color = "Arrests"), size = 1.2) +
geom_line(aes(y = Deaths_z, color = "Overdose Deaths"), size = 1.2) +
labs(title = "Z-Score Comparison: Arrests vs Overdose Deaths",
y = "Z-Score (Standardized Values)",
x = "Year",
color = "Metric") +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
Arrest_OD_month <- left_join(arrests_by_mon, State_OD_mon, by = "Month Code")
Arrest_OD_month <- Arrest_OD_month %>%
left_join(pop_by_year, by = "Year")
#Convictions/Overdoses
convictions_filtered <- Convictions %>%
filter(Sentence.Year >= 2018, Sentence.Year <= 2025)
#Create aggregated count of convictions by month for the entire dataset time period
conv_sum <- Convictions %>%
filter(!is.na(`Month Code`)) %>%
group_by(`Month Code`) %>%
summarise(Conviction_Count = n()) %>%
arrange(`Month Code`)
library(dplyr)
combined_df_3 <- conv_by_mon %>%
left_join(State_OD_mon, by = "Month Code")
#Filter out prior to 1999
combined_df_3 <- combined_df_3 %>%
mutate(Month_Date = as.Date(paste0(`Month Code`, "/01"), format = "%Y/%m/%d"))
combined_df_3_filtered <- combined_df_3 %>%
filter(Month_Date >= as.Date("1999-01-01")) #because this is as far back as the OD data goes back
#Add population to the dataset
combined_df_3 <-combined_df_3 %>%
left_join(pop_by_year, by = "Year")
combined_df_3_filtered <- combined_df_3_filtered %>%
left_join(pop_by_year, by = "Year")
combined_yearly <- deaths_by_year_tot %>%
left_join(convictions_by_year, by = "Year")
#filter dates
combined_yearly <- combined_yearly %>%
filter(Year >= as.Date(1999))
# Add 0's for convictions for N/A
combined_yearly <- combined_yearly %>%
replace_na(list(
Conviction_Count = 0
))
library(ggplot2)
scale_factor <- 20 # Adjust to visually match scales
ggplot(combined_yearly, aes(x = Year)) +
geom_line(aes(y = Count, color = "Overdose Deaths"), size = 1.2) +
geom_line(aes(y = Conviction_Count * scale_factor, color = "Convictions"), size = 1.2) +
scale_y_continuous(
name = "Overdose Deaths",
sec.axis = sec_axis(~./scale_factor, name = "Convictions")
) +
scale_color_manual(
values = c("Overdose Deaths" = "blue", "Convictions" = "red")
) +
labs(
title = "Overdose Deaths vs Convictions by Year",
x = "Year",
color = "Metric"
) +
theme_minimal()
Correlations between arrests & overdose deaths since data is count – ran a spearman rank correlation
cor.test(Arrest_OD_month$Arrest_Count,
Arrest_OD_month$Deaths,
method = "spearman")
## Warning in cor.test.default(Arrest_OD_month$Arrest_Count,
## Arrest_OD_month$Deaths, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: Arrest_OD_month$Arrest_Count and Arrest_OD_month$Deaths
## S = 113708, p-value = 0.02496
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2287815
There was a statistically significant, positive association between the two variables, ρ = 0.229, p = .025. This suggests that months with higher numbers of arrests tend to also have higher numbers of overdose deaths.
###Poisson Model Arrest_OD_month
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Poisson regression: model overdose deaths as count
poisson_model1 <- glm(
Deaths ~ Arrest_Count + Population,
family = poisson(link = "log"),
data = Arrest_OD_month
)
summary(poisson_model1)
##
## Call:
## glm(formula = Deaths ~ Arrest_Count + Population, family = poisson(link = "log"),
## data = Arrest_OD_month)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.405e+00 3.181e-01 26.423 <2e-16 ***
## Arrest_Count 4.468e-03 2.332e-03 1.916 0.0554 .
## Population -3.296e-07 3.448e-08 -9.557 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 686.71 on 95 degrees of freedom
## Residual deviance: 551.89 on 93 degrees of freedom
## AIC: 1252.9
##
## Number of Fisher Scoring iterations: 4
exp(coef(poisson_model1))
## (Intercept) Arrest_Count Population
## 4470.6019937 1.0044780 0.9999997
# Overdispersion test
overdispersion <- sum(residuals(poisson_model1, type = "pearson")^2) / df.residual(poisson_model1)
overdispersion
## [1] 5.718123
Model: Deaths ~ Arrest_Count + Population
Overdispersion Detected - 5.718123 (A value > 1 means variance > mean → Poisson assumptions are violated)
A Poisson regression model indicated that arrest counts were marginally associated with overdose deaths (p = .055), with a 1-unit increase in arrests associated with a 0.45% increase in expected deaths. Population size, however, was negatively associated with deaths and reached statistical significance (p < .001). Despite these findings, the model exhibited substantial overdispersion (dispersion = 5.72), suggesting that the Poisson assumptions were violated and that standard errors and significance tests may be unreliable.
Negative-Binomial model may be more appropriate for model.
library(MASS)
nb_model <- glm.nb(Deaths ~ Arrest_Count + Population, data = Arrest_OD_month)
summary(nb_model)
##
## Call:
## glm.nb(formula = Deaths ~ Arrest_Count + Population, data = Arrest_OD_month,
## init.theta = 43.83360034, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.601e+00 7.852e-01 10.954 < 2e-16 ***
## Arrest_Count 5.112e-03 5.909e-03 0.865 0.387
## Population -3.513e-07 8.496e-08 -4.135 3.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(43.8336) family taken to be 1)
##
## Null deviance: 121.922 on 95 degrees of freedom
## Residual deviance: 98.233 on 93 degrees of freedom
## AIC: 974.65
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 43.83
## Std. Err.: 7.70
##
## 2 x log-likelihood: -966.651
# Overdispersion test
overdispersion2 <- sum(residuals(nb_model, type = "pearson")^2) / df.residual(nb_model)
overdispersion2
## [1] 0.9765794
glm.nb(Deaths ~ Arrest_Count + Population, data = Arrest_OD_month)
overdispersion2= 0.9765794, no overdispersion
AIC = 974.65 — much lower than Poisson model (1252.9), so NB is clearly a better fit.
A Negative Binomial regression model was fitted to examine the association between monthly overdose deaths, arrest counts, and population size, addressing overdispersion observed in the earlier Poisson model. Arrest counts were not statistically significant (β = 0.0051, p = .387), suggesting no meaningful association between arrests and overdose deaths in the observed data.
The model showed no evidence of overdispersion (dispersion = 0.98), indicating a good fit. Compared to the Poisson model, the negative binomial regression provided a substantially improved fit (AIC = 974.65 vs. 1252.9), confirming it as the more appropriate model for overdispersed count data.
library(dplyr)
Arrest_OD_month <- Arrest_OD_month %>%
mutate(Arrest_Count_lag1 = lag(Arrest_Count, n = 1))
library(MASS)
nb_model_lag <- glm.nb(Deaths ~ Arrest_Count_lag1 + Population, data = Arrest_OD_month)
summary(nb_model_lag)
##
## Call:
## glm.nb(formula = Deaths ~ Arrest_Count_lag1 + Population, data = Arrest_OD_month,
## init.theta = 42.94998848, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.818e+00 7.921e-01 11.133 < 2e-16 ***
## Arrest_Count_lag1 1.081e-03 6.001e-03 0.180 0.857
## Population -3.735e-07 8.569e-08 -4.359 1.31e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(42.95) family taken to be 1)
##
## Null deviance: 119.530 on 94 degrees of freedom
## Residual deviance: 97.187 on 92 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 965.97
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 42.95
## Std. Err.: 7.56
##
## 2 x log-likelihood: -957.967
| Term | Estimate | exp(β) | p-value | Interpretation |
|---|---|---|---|---|
| Intercept | 8.818 | 6,760.39 | < .001 | Baseline deaths |
| Arrest_Count_lag1 | 0.00108 | 1.0011 | 0.857 | Not significant |
| Population | -3.735e-07 | ~0.9999996 | < .001 | Significant |
A negative binomial regression was conducted to test whether lagged arrest counts predict monthly overdose deaths, while accounting for population size. Arrest counts were lagged by one month to assess potential delayed effects. The model revealed that lagged arrests were not significantly associated with overdose deaths (β = 0.00108, p = .857), indicating no measurable delayed impact. Population size remained a significant negative predictor (β = -3.735e-07, p < .001).
The model demonstrated good fit to the data, with a dispersion statistic near 1 and a lower AIC (965.97) compared to the non-lagged model. These findings suggest that short-term changes in arrests do not meaningfully influence monthly overdose mortality, even when accounting for temporal lag of 1 month.
library(dplyr)
conv_data <- combined_df_3_filtered %>%
dplyr::select(Month_Date, Year, Deaths, Conv_Count, Population) %>%
arrange(Month_Date) %>%
mutate(Conv_Count_lag1 = lag(Conv_Count, 1))
cor.test(conv_data$Conv_Count,
conv_data$Deaths,
method = "spearman")
## Warning in cor.test.default(conv_data$Conv_Count, conv_data$Deaths, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: conv_data$Conv_Count and conv_data$Deaths
## S = 2323669, p-value = 4.469e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4102266
poisson_conv <- glm(
Deaths ~ Conv_Count + Population,
family = poisson(link = "log"),
data = conv_data
)
summary(poisson_conv)
##
## Call:
## glm(formula = Deaths ~ Conv_Count + Population, family = poisson(link = "log"),
## data = conv_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.283e+01 2.302e-01 -55.75 <2e-16 ***
## Conv_Count 4.470e-02 2.567e-03 17.42 <2e-16 ***
## Population 1.984e-06 2.604e-08 76.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 14017.4 on 286 degrees of freedom
## Residual deviance: 5190.6 on 284 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 7029.4
##
## Number of Fisher Scoring iterations: 4
# Overdispersion check
overdispersion_conv <- sum(residuals(poisson_conv, type = "pearson")^2) / df.residual(poisson_conv)
overdispersion_conv
## [1] 19.11486
library(MASS)
nb_conv <- glm.nb(
Deaths ~ Conv_Count + Population,
data = conv_data
)
summary(nb_conv)
##
## Call:
## glm.nb(formula = Deaths ~ Conv_Count + Population, data = conv_data,
## init.theta = 6.541899928, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.394e+01 1.008e+00 -13.831 < 2e-16 ***
## Conv_Count 6.506e-02 1.395e-02 4.665 3.09e-06 ***
## Population 2.106e-06 1.150e-07 18.305 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(6.5419) family taken to be 1)
##
## Null deviance: 782.51 on 286 degrees of freedom
## Residual deviance: 294.78 on 284 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 2931.6
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 6.542
## Std. Err.: 0.576
##
## 2 x log-likelihood: -2923.581
# Overdispersion check
overdispersion_nb <- sum(residuals(nb_conv, type = "pearson")^2) / df.residual(nb_conv)
overdispersion_nb
## [1] 0.989071
Deaths ~ Conv_Count + Population
| Term | Estimate | z | p-value |
|---|---|---|---|
| Intercept | -13.94 | -13.44 | < .001 *** |
| Conv_Count | 0.06603 | 4.60 | < .001 *** |
| Population | 2.11e-06 | 17.78 | < .001 *** |
| AIC: | 2955.9 | ||
| Dispersion | ~0.95 | acceptable |
nb_conv_lag <- glm.nb(
Deaths ~ Conv_Count_lag1 + Population,
data = conv_data
)
summary(nb_conv_lag)
##
## Call:
## glm.nb(formula = Deaths ~ Conv_Count_lag1 + Population, data = conv_data,
## init.theta = 6.513924753, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.407e+01 1.014e+00 -13.877 < 2e-16 ***
## Conv_Count_lag1 6.440e-02 1.394e-02 4.619 3.85e-06 ***
## Population 2.121e-06 1.157e-07 18.328 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(6.5139) family taken to be 1)
##
## Null deviance: 775.69 on 285 degrees of freedom
## Residual deviance: 293.71 on 283 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 2923.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 6.514
## Std. Err.: 0.574
##
## 2 x log-likelihood: -2915.822
# Overdispersion check
overdispersion_nb2 <- sum(residuals(nb_conv_lag, type = "pearson")^2) / df.residual(nb_conv_lag)
overdispersion_nb2
## [1] 0.9911637
Deaths ~ Conv_Count_lag1 + Population | Term | Estimate | z | p-value | | ————— | ——– | —— | ———— | | Intercept | -14.07 | -13.49 | < .001 *** | | Conv_Count_lag1 | 0.06538 | 4.55 | < .001 *** | | Population | 2.12e-06 | 17.81 | < .001 *** | | AIC: | 2948.1 | | | | Dispersion | ~0.95 | | ✔ acceptable |
# For the NB model without lag
exp_coef_nb <- exp(coef(nb_conv))
exp_coef_nb
## (Intercept) Conv_Count Population
## 8.873064e-07 1.067222e+00 1.000002e+00
# For the lagged NB model
exp_coef_lag <- exp(coef(nb_conv_lag))
exp_coef_lag
## (Intercept) Conv_Count_lag1 Population
## 7.774756e-07 1.066519e+00 1.000002e+00
# Confidence intervals (exp) for the original NB model
exp_confint_nb <- exp(confint(nb_conv))
## Waiting for profiling to be done...
# Confidence intervals (exp) for the lagged NB model
exp_confint_lag <- exp(confint(nb_conv_lag))
## Waiting for profiling to be done...
anova(nb_conv_lag, nb_conv, test = "Chisq")
## Likelihood ratio tests of Negative Binomial Models
##
## Response: Deaths
## Model theta Resid. df 2 x log-lik. Test df
## 1 Conv_Count + Population 6.541900 284 -2923.581
## 2 Conv_Count_lag1 + Population 6.513925 283 -2915.822 1 vs 2 1
## LR stat. Pr(Chi)
## 1
## 2 7.759112 0.005344223
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
# For original model
sim1 <- simulateResiduals(nb_conv)
plot(sim1)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
# For lagged model
sim2 <- simulateResiduals(nb_conv_lag)
plot(sim2)
Spearman’s rank correlation showed a moderate positive relationship between convictions and overdose mortality (ρ = 0.41, p < .001).
Poisson regression was initially attempted but found to be highly overdispersed (ϕ = 19.4), justifying the use of a negative binomial model (ϕ ≈ 0.95).
A series of negative binomial regression models were fit to examine the association between conviction counts and overdose deaths from 1999 to 2024.
In the adjusted model without a lag, convictions were significantly associated with overdose deaths (β = 0.066, p < .001), controlling for population size. This association remained statistically significant when a 1-month lag of conviction counts was introduced (β = 0.065, p < .001), suggesting that monthly changes in convictions may predict subsequent changes in overdose mortality. In both models, population size was a robust predictor of overdose deaths (p < .001). Model fit improved with the lagged specification (AIC = 2948.1 vs. 2955.9).
In the Negative Binomial model, the incidence rate ratio (IRR) for conviction counts was 1.07 (IRR = exp(0.066)), indicating that each additional conviction in a given month was associated with a 7% increase in the expected count of overdose deaths, holding population constant.
In the lagged model, the IRR for 1-month lagged convictions was 1.068, again showing a 6.8% increase in overdose deaths for every one-unit increase in convictions from the prior month. This suggests a potential short-term temporal association.
The IRR for population was approximately 1.000002, indicating that increases in population were also associated with slightly higher overdose deaths, as expected in population-scaled outcomes.
library(dplyr)
library(gt)
# Install broom
# install.packages("broom")
library(broom)
# Tidy the model and exponentiate coefficients + CI
conv_nb_lag_table <- tidy(nb_conv_lag, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)
# Display the table
print(conv_nb_lag_table)
## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.000000777 1.01 -13.9 8.76e-44 1.09e-7 5.52e-6
## 2 Conv_Count_lag1 1.07 0.0139 4.62 3.85e- 6 1.04e+0 1.10e+0
## 3 Population 1.00 0.000000116 18.3 4.91e-75 1.00e+0 1.00e+0
conv_nb_lag_table %>%
mutate(across(estimate:conf.high, ~ round(.x, 6))) %>%
rename(
Term = term,
IRR = estimate,
`CI Lower` = conf.low,
`CI Upper` = conf.high,
`P-Value` = p.value
) %>%
gt() %>%
tab_header(
title = "Incidence Rate Ratios (IRRs) from Lagged Negative Binomial Model"
)
| Incidence Rate Ratios (IRRs) from Lagged Negative Binomial Model | ||||||
| Term | IRR | std.error | statistic | P-Value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 0.000001 | 1.013723 | -13.876779 | 0e+00 | 0.000000 | 0.000006 |
| Conv_Count_lag1 | 1.066519 | 0.013942 | 4.619333 | 4e-06 | 1.037923 | 1.096825 |
| Population | 1.000002 | 0.000000 | 18.328406 | 0e+00 | 1.000002 | 1.000002 |
controlling for COVID-era effects using a factor variable, which allows the model to compare multiple time periods to the reference category (2023/2024).
conv_data <- conv_data %>%
mutate(
COVID_era = case_when(
Year < 2020 ~ "Pre",
Year == 2020 ~ "2020",
Year == 2021 ~ "2021",
Year == 2022 ~ "2022",
TRUE ~ "2023_24"
)
)
nb_conv_covid_fe <- glm.nb(
Deaths ~ Conv_Count_lag1 + Population + factor(COVID_era),
data = conv_data
)
nb_conv_covid_fe
##
## Call: glm.nb(formula = Deaths ~ Conv_Count_lag1 + Population + factor(COVID_era),
## data = conv_data, init.theta = 7.834860144, link = log)
##
## Coefficients:
## (Intercept) Conv_Count_lag1 Population
## -1.490e+01 6.917e-02 2.281e-06
## factor(COVID_era)2021 factor(COVID_era)2022 factor(COVID_era)Pre
## -8.295e-01 -9.871e-01 -5.897e-01
##
## Degrees of Freedom: 285 Total (i.e. Null); 280 Residual
## (2 observations deleted due to missingness)
## Null Deviance: 920.9
## Residual Deviance: 295.3 AIC: 2882
A final model was estimated including time-period fixed effects to account for the COVID-19 pandemic and its aftermath. The results show that lagged convictions remain significantly associated with overdose deaths, even after controlling for population and temporal factors (IRR = 1.073, p < .001). Each additional conviction in the prior month was associated with a 7.3% increase in overdose deaths (IRR = 1.073, 95% CI: [1.044145, 1.102787], p < 0.001), controlling for population and pandemic time periods.
Overdose deaths were significantly lower in pre-COVID (IRR = 0.55, 95% CI: …), as well as in the early pandemic years of 2021 (IRR = 0.44) and 2022 (IRR = 0.37), relative to the 2023–2024 period.
# Load required packages
library(broom)
library(dplyr)
library(gt)
# Step 1: Extract IRRs + Confidence Intervals
covid_nb_table <- tidy(nb_conv_covid_fe, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)
# Step 2: Format for clean presentation
covid_nb_table %>%
mutate(across(estimate:conf.high, ~ round(.x, 6))) %>%
rename(
Term = term,
IRR = estimate,
`CI Lower` = conf.low,
`CI Upper` = conf.high,
`P-Value` = p.value
) %>%
gt() %>%
tab_header(
title = "Incidence Rate Ratios (IRRs) from COVID-Adjusted Negative Binomial Model"
)
| Incidence Rate Ratios (IRRs) from COVID-Adjusted Negative Binomial Model | ||||||
| Term | IRR | std.error | statistic | P-Value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 0.000000 | 1.189977 | -12.523406 | 0 | 0.000000 | 0.000003 |
| Conv_Count_lag1 | 1.071613 | 0.013373 | 5.172089 | 0 | 1.044087 | 1.100659 |
| Population | 1.000002 | 0.000000 | 17.044558 | 0 | 1.000002 | 1.000003 |
| factor(COVID_era)2021 | 0.436256 | 0.156688 | -5.294128 | 0 | 0.321383 | 0.592213 |
| factor(COVID_era)2022 | 0.372670 | 0.158385 | -6.232041 | 0 | 0.273592 | 0.507602 |
| factor(COVID_era)Pre | 0.554513 | 0.109757 | -5.372464 | 0 | 0.445103 | 0.682471 |
library(dplyr)
Overdose %>%
filter(Deaths == "Suppressed") %>%
tally()
## # A tibble: 1 × 1
## n
## <int>
## 1 60
#total observations in dataset
library(dplyr)
Overdose %>%
tally()
## # A tibble: 1 × 1
## n
## <int>
## 1 525
60 out of 525, or 11.4% of the overdose observations per month by county is suppressed.
# Standardize County names in Overdose dataframe
Overdose <- Overdose %>%
mutate(County = gsub(" County, NJ", "", County))
# 1. Convictions by county and year
convictions_by_county_year <- Convictions %>%
filter(!is.na(Sentence.Year)) %>%
group_by(County, Year = Sentence.Year) %>%
summarise(Conviction_Count = n(), .groups = "drop")
# 2. Arrests by county and year
arrests_by_county_year <- Arrest %>%
group_by(County, Year = `Arrest Year`) %>%
summarise(Arrest_Count = n(), .groups = "drop")
# 3. Combine into one dataset
combined_df_county <- Overdose %>%
left_join(arrests_by_county_year, by = c("County", "Year")) %>%
left_join(convictions_by_county_year, by = c("County", "Year")) %>%
mutate(
Arrest_Count = ifelse(is.na(Arrest_Count) & Year >= 2017, 0, Arrest_Count),
Conviction_Count = ifelse(is.na(Conviction_Count), 0, Conviction_Count)
)
#Midpoint Method (setting them all to 5 which is the median)
combined_df_county_md <- combined_df_county %>%
mutate(
Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths), # Step 1: Set Suppressed to NA
Deaths_num = as.numeric(Deaths_clean), # Step 2: Convert remaining to numeric
Deaths_num = ifelse(is.na(Deaths_num), 5, Deaths_num) # Step 3: Set NAs (formerly Suppressed) to 5
)
#Sensitivity Analysis
##lower bound
df_lower <- combined_df_county %>%
mutate(
Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths), # Step 1: Set Suppressed to NA
Deaths_num = as.numeric(Deaths_clean), # Step 2: Convert remaining to numeric
Deaths_num = ifelse(is.na(Deaths_num), 1, Deaths_num) # Step 3: Set NAs (formerly Suppressed) to 1
)
##upper bound
df_upper <- combined_df_county %>%
mutate(
Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths), # Step 1: Set Suppressed to NA
Deaths_num = as.numeric(Deaths_clean), # Step 2: Convert remaining to numeric
Deaths_num = ifelse(is.na(Deaths_num), 9, Deaths_num) # Step 3: Set NAs (formerly Suppressed) to 9
)
# Load required libraries
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(truncnorm)
library(dplyr)
# Step 1: Prepare dataset and flag suppressed deaths as NA
combined_df_county_MI <- combined_df_county %>%
mutate(
Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths)),
County = as.factor(County)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
## as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Define custom truncated Poisson-style imputation function
mice.impute.truncated_poisson <- function(y, ry, x, ...) {
n_miss <- sum(!ry)
mean_est <- 5 # fixed midpoint-based mean
imputed <- round(rtruncnorm(n = n_miss, a = 1, b = 9, mean = mean_est, sd = 2))
return(imputed)
}
# Step 3: Specify imputation methods
mice_methods <- make.method(combined_df_county_MI)
mice_methods["Deaths_num"] <- "truncated_poisson"
# Step 4: Build predictor matrix — only safe variables
mice_pred <- make.predictorMatrix(combined_df_county_MI)
mice_pred[,] <- 0 # turn off all predictors by default
mice_pred["Deaths_num", "Population"] <- 1 # use population to inform imputation
# Optional: include County if it's a factor and not a primary predictor
mice_pred["Deaths_num", "County"] <- 1
# Step 5: Run multiple imputation
imp <- mice(
combined_df_county_MI,
method = mice_methods,
predictorMatrix = mice_pred,
m = 5,
seed = 123
)
##
## iter imp variable
## 1 1 Arrest_Count Deaths_num
## 1 2 Arrest_Count Deaths_num
## 1 3 Arrest_Count Deaths_num
## 1 4 Arrest_Count Deaths_num
## 1 5 Arrest_Count Deaths_num
## 2 1 Arrest_Count Deaths_num
## 2 2 Arrest_Count Deaths_num
## 2 3 Arrest_Count Deaths_num
## 2 4 Arrest_Count Deaths_num
## 2 5 Arrest_Count Deaths_num
## 3 1 Arrest_Count Deaths_num
## 3 2 Arrest_Count Deaths_num
## 3 3 Arrest_Count Deaths_num
## 3 4 Arrest_Count Deaths_num
## 3 5 Arrest_Count Deaths_num
## 4 1 Arrest_Count Deaths_num
## 4 2 Arrest_Count Deaths_num
## 4 3 Arrest_Count Deaths_num
## 4 4 Arrest_Count Deaths_num
## 4 5 Arrest_Count Deaths_num
## 5 1 Arrest_Count Deaths_num
## 5 2 Arrest_Count Deaths_num
## 5 3 Arrest_Count Deaths_num
## 5 4 Arrest_Count Deaths_num
## 5 5 Arrest_Count Deaths_num
## Warning: Number of logged events: 1
# Step 6: Get completed dataset
# Option A: View all imputations (long format)
completed_data_long <- complete(imp, "long")
# Option B: Use one completed dataset (e.g., the first)
imputed_df_MI <- complete(imp, 1)
library(ggplot2)
completed_data_long %>%
ggplot(aes(x = Deaths_num)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
facet_wrap(~.imp) +
labs(
title = "Distribution of Imputed Deaths_num Values",
x = "Imputed Deaths",
y = "Frequency"
)
# Load required libraries
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
##
## ar
library(dplyr)
# Step 1: Convert 'Deaths' to numeric with NAs for "Suppressed"
combined_df_county <- combined_df_county %>%
mutate(
Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths))
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
## as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Subset to observed (non-missing) death counts
df_obs <- combined_df_county %>%
filter(!is.na(Deaths_num))
#Step 3: Scale Population and Year
df_obs <- df_obs %>%
mutate(
Year_scaled = scale(Year),
Pop_scaled = scale(Population),
County = factor(County)
)
# Step 4: Fit Bayesian Negative Binomial model (simplest first)
library(brms)
bayes_model <- brm(
Deaths_num ~ Year_scaled + Pop_scaled,
data = df_obs,
family = negbinomial(),
chains = 2,
iter = 2000,
seed = 123,
control = list(adapt_delta = 0.95)
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 6.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.63 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.212 seconds (Warm-up)
## Chain 1: 0.196 seconds (Sampling)
## Chain 1: 0.408 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 2.8e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.207 seconds (Warm-up)
## Chain 2: 0.193 seconds (Sampling)
## Chain 2: 0.4 seconds (Total)
## Chain 2:
# Step 5: Subset missing
df_missing <- combined_df_county %>%
filter(is.na(Deaths_num)) %>%
mutate(
County = factor(County, levels = levels(df_obs$County)),
Year_scaled = scale(Year,
center = attr(df_obs$Year_scaled, "scaled:center"),
scale = attr(df_obs$Year_scaled, "scaled:scale")),
Pop_scaled = scale(Population,
center = attr(df_obs$Pop_scaled, "scaled:center"),
scale = attr(df_obs$Pop_scaled, "scaled:scale"))
)
# Step 6: Predict and constrain to [1, 9]
predicted_draws <- posterior_predict(bayes_model, newdata = df_missing)
set.seed(123)
imputed_values <- apply(predicted_draws, 2, function(x) sample(x, 1))
imputed_values <- pmin(pmax(imputed_values, 1), 9)
# Step 7: Insert into original data
combined_df_county_Bay1 <- combined_df_county
combined_df_county_Bay1$Deaths_num[is.na(combined_df_county_Bay1$Deaths_num)] <- imputed_values
summary(bayes_model)
## Family: negbinomial
## Links: mu = log
## Formula: Deaths_num ~ Year_scaled + Pop_scaled
## Data: df_obs (Number of observations: 465)
## Draws: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 2000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.12 0.02 4.08 4.17 1.00 3023 1666
## Year_scaled 0.51 0.02 0.47 0.55 1.00 2556 1369
## Pop_scaled 0.51 0.03 0.46 0.56 1.00 2363 1326
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 4.67 0.31 4.07 5.30 1.00 2414 1664
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Rhat = 1.00 for all parameters → indicates convergence.
Bulk_ESS and Tail_ESS > 1000 for all → good effective sample sizes.
No warnings → model ran smoothly.
plot(bayes_model) # Trace + density
pp_check(bayes_model) # Posterior predictive checks
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Matches the shape of the observed data.
Captures the overdispersion well.
Is producing plausible predictions
#Check imputation distribution for Bayesian pooling
hist(imputed_values, breaks = 9, col = "orange", main = "Imputed Deaths_num via Bayesian Model")
Issue here is that there are no 0’s but that could be ok…
# Histogram before applying pmin/pmax
hist(as.vector(apply(predicted_draws, 2, function(x) sample(x, 1))),
main = "Posterior Samples Before Truncation",
xlab = "Raw predicted values")
fitted_vals <- fitted(bayes_model, newdata = df_missing)
summary(fitted_vals)
## Estimate Est.Error Q2.5 Q97.5
## Min. : 11.14 Min. :0.7248 Min. : 9.776 Min. : 12.58
## 1st Qu.: 14.47 1st Qu.:0.8201 1st Qu.: 12.929 1st Qu.: 16.09
## Median : 20.52 Median :0.9837 Median : 18.657 Median : 22.41
## Mean : 26.25 Mean :1.1636 Mean : 24.040 Mean : 28.54
## 3rd Qu.: 26.30 3rd Qu.:1.1268 3rd Qu.: 24.055 3rd Qu.: 28.54
## Max. :130.51 Max. :6.6528 Max. :118.169 Max. :144.51
# Load required libraries
library(brms)
library(dplyr)
# Step 1: Create new dataset to preserve original
combined_df_county_Bay2 <- combined_df_county %>%
mutate(
Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths))
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
## as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Subset non-missing observations
df_obs_Bay2 <- combined_df_county_Bay2 %>%
filter(!is.na(Deaths_num)) %>%
mutate(
Year_scaled = scale(Year),
Pop_scaled = scale(Population),
County = factor(County) # Ensure County is a factor
)
# Step 3: Fit Bayesian model with County as a random effect
bayes_model_Bay2 <- brm(
Deaths_num ~ Year_scaled + Pop_scaled + (1 | County),
data = df_obs_Bay2,
family = negbinomial(),
chains = 4,
iter = 3000,
seed = 123,
control = list(adapt_delta = 0.95)
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000132 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.32 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 1: Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 1: Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 1: Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 1: Iteration: 1200 / 3000 [ 40%] (Warmup)
## Chain 1: Iteration: 1500 / 3000 [ 50%] (Warmup)
## Chain 1: Iteration: 1501 / 3000 [ 50%] (Sampling)
## Chain 1: Iteration: 1800 / 3000 [ 60%] (Sampling)
## Chain 1: Iteration: 2100 / 3000 [ 70%] (Sampling)
## Chain 1: Iteration: 2400 / 3000 [ 80%] (Sampling)
## Chain 1: Iteration: 2700 / 3000 [ 90%] (Sampling)
## Chain 1: Iteration: 3000 / 3000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 3.563 seconds (Warm-up)
## Chain 1: 2.853 seconds (Sampling)
## Chain 1: 6.416 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 2: Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 2: Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 2: Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 2: Iteration: 1200 / 3000 [ 40%] (Warmup)
## Chain 2: Iteration: 1500 / 3000 [ 50%] (Warmup)
## Chain 2: Iteration: 1501 / 3000 [ 50%] (Sampling)
## Chain 2: Iteration: 1800 / 3000 [ 60%] (Sampling)
## Chain 2: Iteration: 2100 / 3000 [ 70%] (Sampling)
## Chain 2: Iteration: 2400 / 3000 [ 80%] (Sampling)
## Chain 2: Iteration: 2700 / 3000 [ 90%] (Sampling)
## Chain 2: Iteration: 3000 / 3000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 3.193 seconds (Warm-up)
## Chain 2: 2.738 seconds (Sampling)
## Chain 2: 5.931 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 5.8e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 3: Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 3: Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 3: Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 3: Iteration: 1200 / 3000 [ 40%] (Warmup)
## Chain 3: Iteration: 1500 / 3000 [ 50%] (Warmup)
## Chain 3: Iteration: 1501 / 3000 [ 50%] (Sampling)
## Chain 3: Iteration: 1800 / 3000 [ 60%] (Sampling)
## Chain 3: Iteration: 2100 / 3000 [ 70%] (Sampling)
## Chain 3: Iteration: 2400 / 3000 [ 80%] (Sampling)
## Chain 3: Iteration: 2700 / 3000 [ 90%] (Sampling)
## Chain 3: Iteration: 3000 / 3000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 3.194 seconds (Warm-up)
## Chain 3: 2.765 seconds (Sampling)
## Chain 3: 5.959 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 4.9e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 3000 [ 0%] (Warmup)
## Chain 4: Iteration: 300 / 3000 [ 10%] (Warmup)
## Chain 4: Iteration: 600 / 3000 [ 20%] (Warmup)
## Chain 4: Iteration: 900 / 3000 [ 30%] (Warmup)
## Chain 4: Iteration: 1200 / 3000 [ 40%] (Warmup)
## Chain 4: Iteration: 1500 / 3000 [ 50%] (Warmup)
## Chain 4: Iteration: 1501 / 3000 [ 50%] (Sampling)
## Chain 4: Iteration: 1800 / 3000 [ 60%] (Sampling)
## Chain 4: Iteration: 2100 / 3000 [ 70%] (Sampling)
## Chain 4: Iteration: 2400 / 3000 [ 80%] (Sampling)
## Chain 4: Iteration: 2700 / 3000 [ 90%] (Sampling)
## Chain 4: Iteration: 3000 / 3000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 3.234 seconds (Warm-up)
## Chain 4: 2.887 seconds (Sampling)
## Chain 4: 6.121 seconds (Total)
## Chain 4:
# Step 4: Subset missing values and apply scaling
df_missing_Bay2 <- combined_df_county_Bay2 %>%
filter(is.na(Deaths_num)) %>%
mutate(
County = factor(County, levels = levels(df_obs_Bay2$County)), # match levels
Year_scaled = scale(Year,
center = attr(df_obs_Bay2$Year_scaled, "scaled:center"),
scale = attr(df_obs_Bay2$Year_scaled, "scaled:scale")),
Pop_scaled = scale(Population,
center = attr(df_obs_Bay2$Pop_scaled, "scaled:center"),
scale = attr(df_obs_Bay2$Pop_scaled, "scaled:scale"))
)
# Step 5: Posterior prediction and imputation
predicted_draws_Bay2 <- posterior_predict(bayes_model_Bay2, newdata = df_missing_Bay2)
set.seed(123)
imputed_values_Bay2 <- apply(predicted_draws_Bay2, 2, function(x) sample(x, 1))
imputed_values_Bay2 <- pmin(pmax(imputed_values_Bay2, 1), 9) # constrain to 1–9
# Step 6: Fill in the imputed values
combined_df_county_Bay2$Deaths_num[is.na(combined_df_county_Bay2$Deaths_num)] <- imputed_values_Bay2
#Check imputation distribution for Bayesian pooling
hist(imputed_values_Bay2, breaks = 9, col = "orange", main = "Imputed Deaths_num via Bayesian Model 2")
summary(bayes_model)
## Family: negbinomial
## Links: mu = log
## Formula: Deaths_num ~ Year_scaled + Pop_scaled
## Data: df_obs (Number of observations: 465)
## Draws: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 2000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.12 0.02 4.08 4.17 1.00 3023 1666
## Year_scaled 0.51 0.02 0.47 0.55 1.00 2556 1369
## Pop_scaled 0.51 0.03 0.46 0.56 1.00 2363 1326
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 4.67 0.31 4.07 5.30 1.00 2414 1664
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(bayes_model_Bay2) # Trace + density
pp_check(bayes_model_Bay2) # Posterior predictive checks
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
In the provided overdose data, 60 out of 525 county-month observations (~11.4%) were labeled as “Suppressed,” indicating that the true value ranged between 1 and 9 but was withheld for privacy reasons. Since these suppressed values represent a non-trivial portion of the dataset, appropriate imputation methods were employed to estimate plausible values while preserving statistical integrity.
We explored four different imputation strategies to address the suppressed values in the Deaths variable: 1. Midpoint Imputation (Deterministic)
All suppressed death values were replaced with the midpoint of the known range:
Value used: 5
This approach is simple and transparent but does not reflect variability in the imputed values.
Output: combined_df_county_md
2. Sensitivity Analysis
To assess the bounds of potential impact, we created two alternative datasets:
Lower-bound scenario: All suppressed values set to 1
Upper-bound scenario: All suppressed values set to 9
These datasets allow for robustness checks in downstream models by comparing how extreme assumptions influence the results.
Output:
df_lower
df_upper
3. Multiple Imputation via MICE with Truncated Distribution
A more flexible approach used the mice package to perform multiple imputation with a custom imputation function drawing from a truncated normal distribution constrained to values between 1 and 9. This method incorporates uncertainty across imputations while maintaining the known bounds.
We used Population (scaled) as a predictor. County was included as an optional random effect via its factor representation, as it serves as a control variable in future analyses.
Imputation settings: 5 imputations (m = 5)
Custom imputation function using rtruncnorm(mean = 5, sd = 2, a = 1, b = 9)
Random draws ensure variability across imputations
Output:
A long-format dataset with multiple imputations (completed_data_long)
A single completed dataset (imputed_df) was extracted for use in modeling
A histogram of the imputed values showed a plausible, near-symmetric distribution centered around the midpoint, consistent with prior expectations.
4. Bayesian Imputation via Negative Binomial Models
We fit two Bayesian models using the brms package, leveraging posterior predictive sampling to estimate the suppressed values.
A. Bayesian Model 1 (No County Effects)
Formula: Deaths_num ~ Year_scaled + Pop_scaled
Family: Negative Binomial (handles overdispersion)
Random effects: None
Dataset created: combined_df_county_Bay1
Imputation:
Posterior predictive distributions were sampled for missing rows.
A single draw per observation was taken and constrained to the 1–9 range.
Model output:
Rhat = 1.00 for all parameters → convergence achieved
Effective sample sizes (ESS) > 1000
Posterior predictive checks showed good fit to observed data
B. Bayesian Model 2 (Hierarchical: Random Intercepts by County)
Formula: Deaths_num ~ Year_scaled + Pop_scaled + (1 | County)
Family: Negative Binomial
Random effects: County (partial pooling)
Dataset created: combined_df_county_Bay2
Imputation:
Same approach as Model 1 (posterior predictions + truncation), but with better fit for county-level heterogeneity.
Model output:
Converged across all 4 chains
Posterior predictive distributions captured both mean and dispersion
Histogram of imputed values showed reasonable spread (1–9), with central tendency near observed mean
Summary of Imputation Methods
| Method | Variables Used | Output Dataset |
|---|---|---|
| Midpoint (Set all to 5) | — | combined_df_county_md |
| Sensitivity: Lower Bound (1) | — | df_lower |
| Sensitivity: Upper Bound (9) | — | df_upper |
| Multiple Imputation (MICE) | Population |
imputed_df, completed_data_long |
| Bayesian Model 1 | Year, Population |
combined_df_county_Bay1 |
| Bayesian Model 2 (with County) | Year, Population,
County (random intercept) |
combined_df_county_Bay2 |
Summary of Model Justification: To address the 11.4% of observations in our dataset with suppressed overdose death counts (reported as “Suppressed” and known to fall within the range of 1 to 9), we implemented a series of imputation strategies to ensure robustness and transparency. We began with simple imputation methods, including a midpoint substitution (setting all suppressed values to 5) and sensitivity analyses using the lower and upper bounds (1 and 9, respectively). These provided boundary checks for how assumptions about suppressed values might influence our findings.
We then applied Multiple Imputation by Chained Equations (MICE) using a truncated distribution constrained between 1 and 9. In this model, we included Population and County as predictors. Population was retained as a meaningful, independent contextual variable that reflects the expected scale of overdose events. County was added as a categorical predictor to allow the imputation process to borrow information across geographic units, thereby accounting for unobserved regional variation in overdose trends.We chose not to include Year as a predictor in this imputation step to avoid conflating temporal patterns with structural variation in death reporting practices, and to reduce the risk of overfitting imputation models based on potentially nonstationary time trends. Importantly, we excluded Conviction_Count, which is a key predictor in subsequent outcome modeling, to avoid biasing those future estimates through the imputation mechanism.
Finally, we implemented two Bayesian pooling models using the brms package. The first included Year and Population as predictors to estimate the likely distribution of suppressed counts, leveraging both temporal and demographic trends. The second model added County as a random intercept to account for potential unobserved heterogeneity across counties. These models allowed us to borrow strength from the observed data while explicitly modeling uncertainty and variability in suppressed values. Each imputation approach was stored as a separate dataset for comparison and downstream analysis. We included Year as a predictor in the Bayesian imputation models because those models are explicitly designed to capture structured trends and uncertainty in the observed data.
@Eli, do you think this is sufficient? Or should I run a Bayesian imputation model without Year and compare it as well?
I just ran this with the second Bayesian Pooling model but can re-run it with others if you all want to see it….
library(dplyr)
combined_df_county_Bay2 %>%
group_by(County) %>%
summarise(Total_Overdose_Deaths = sum(Deaths_num)) %>%
arrange(desc(Total_Overdose_Deaths))
## # A tibble: 21 × 2
## County Total_Overdose_Deaths
## <chr> <dbl>
## 1 Essex 4019
## 2 Camden 3881
## 3 Ocean 2916
## 4 Middlesex 2792
## 5 Monmouth 2450
## 6 Bergen 2280
## 7 Hudson 2111
## 8 Burlington 2061
## 9 Atlantic 1954
## 10 Gloucester 1910
## # ℹ 11 more rows
library(dplyr)
library(ggplot2)
# Summarize overdose deaths by county
county_death_totals <- combined_df_county_Bay2 %>%
group_by(County) %>%
summarise(Total_Deaths = sum(Deaths_num, na.rm = TRUE)) %>%
arrange(desc(Total_Deaths))
# Plot the bar graph
ggplot(county_death_totals, aes(x = reorder(County, -Total_Deaths), y = Total_Deaths)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(
title = "Total Overdose Deaths by County",
x = "County",
y = "Total Overdose Deaths"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(ggplot2)
library(dplyr)
# Optional: arrange by descending conviction count
county_summary_conv <- county_summary_conv %>%
arrange(desc(n))
# Plot bar chart
ggplot(county_summary_conv, aes(x = reorder(County, -n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(
title = "Total Convictions by County",
x = "County",
y = "Number of Convictions"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Visually –by comparing the two graphs – it is clear that the counties
with the largest number of overdose deaths do not have the most
convictions.
library(ggplot2)
library(dplyr)
# Optional: arrange by descending arr count
county_summary_arr <- county_summary_arr %>%
arrange(desc(n))
# Plot bar chart
ggplot(county_summary_arr, aes(x = reorder(County, -n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(
title = "Total Arrests by County",
x = "County",
y = "Number of Arrests"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Same comments for arrests.
imputed_df_MI$Deaths <- as.numeric(imputed_df_MI$Deaths)
## Warning: NAs introduced by coercion
imputed_df_MI$County <- factor(imputed_df_MI$County)
run_models <- function(data, label) {
message("===== Running models for: ", label, " =====")
# Ensure required variables are correctly formatted and drop missing values
data <- data %>%
mutate(
Deaths_num = as.numeric(Deaths),
Year = as.numeric(Year),
County = as.factor(County),
Population = as.numeric(Population),
Conviction_Count = as.numeric(Conviction_Count)
) %>%
filter(
!is.na(Deaths_num),
!is.na(Conviction_Count),
!is.na(Population)
)
# Fit Poisson regression
poisson_model <- glm(
Deaths_num ~ Conviction_Count + Year + Population + County,
data = data,
family = poisson
)
# Fit Negative Binomial regression
nb_model <- MASS::glm.nb(
Deaths_num ~ Conviction_Count + Year + Population + County,
data = data
)
# Return AIC summary and model objects
return(list(
summary = data.frame(
Method = label,
Poisson_AIC = AIC(poisson_model),
NegBin_AIC = AIC(nb_model)
),
models = list(
poisson = poisson_model,
negbin = nb_model
)
))
}
results_md <- run_models(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower <- run_models(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper <- run_models(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi <- run_models(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes1 <- run_models(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes2 <- run_models(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)
# Combine AIC results from all model runs
model_summaries <- bind_rows(
results_md$summary,
results_lower$summary,
results_upper$summary,
results_mi$summary,
results_bayes1$summary,
results_bayes2$summary
)
# Print as a markdown table
kable(model_summaries, caption = "Model Fit Comparison: AIC Values")
| Method | Poisson_AIC | NegBin_AIC |
|---|---|---|
| Midpoint Imputation | 5466.882 | 4018.577 |
| Lower Bound Imputation | 5466.882 | 4018.577 |
| Upper Bound Imputation | 5466.882 | 4018.577 |
| Multiple Imputation | 5466.882 | 4018.577 |
| Bayesian Imputation 1 | 5466.882 | 4018.577 |
| Bayesian Imputation 2 | 5466.882 | 4018.577 |
The table summarizes AIC values for Poisson and Negative Binomial (NB) models across six different imputation methods for suppressed overdose death counts. Across all imputation strategies, the NB models consistently demonstrate lower AIC values compared to the Poisson models, indicating a better fit — particularly important given the overdispersion typically present in count data like overdose deaths. The best-fitting model (lowest AIC) was observed for: Multiple Imputation (MICE) Bayesian Imputation 1 Bayesian Imputation 2
All three had identical AICs for both Poisson and NB models (NB AIC = 4018.58).
library(dplyr)
# Helper function to extract and label model summaries
extract_model_summary <- function(model, method_label) {
coef_summary <- summary(model)$coefficients
as.data.frame(coef_summary) %>%
tibble::rownames_to_column("Term") %>%
mutate(Method = method_label)
}
# Extract coefficient summaries from each NegBin model
coef_mi <- extract_model_summary(results_mi$models$negbin, "Multiple Imputation")
coef_bayes1 <- extract_model_summary(results_bayes1$models$negbin, "Bayesian Imputation 1")
coef_bayes2 <- extract_model_summary(results_bayes2$models$negbin, "Bayesian Imputation 2")
# Combine all into one table
combined_coefs <- bind_rows(coef_mi, coef_bayes1, coef_bayes2)
# Optional: re-order for readability
combined_coefs <- combined_coefs %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
filtered_coefs <- combined_coefs %>%
filter(Term %in% c("Conviction_Count", "Year", "Population")) %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
| Method | Term | Estimate | Std. Error | z value | Pr(>|z|) |
|---|---|---|---|---|---|
| Multiple Imputation | Conviction_Count | 0.008 | 0.008 | 1.039 | 0.299 |
| Multiple Imputation | Year | 0.080 | 0.003 | 26.819 | 0.000 |
| Multiple Imputation | Population | 0.000 | 0.000 | -1.311 | 0.190 |
| Bayesian Imputation 1 | Conviction_Count | 0.008 | 0.008 | 1.039 | 0.299 |
| Bayesian Imputation 1 | Year | 0.080 | 0.003 | 26.819 | 0.000 |
| Bayesian Imputation 1 | Population | 0.000 | 0.000 | -1.311 | 0.190 |
| Bayesian Imputation 2 | Conviction_Count | 0.008 | 0.008 | 1.039 | 0.299 |
| Bayesian Imputation 2 | Year | 0.080 | 0.003 | 26.819 | 0.000 |
| Bayesian Imputation 2 | Population | 0.000 | 0.000 | -1.311 | 0.190 |
combined_coefs %>%
filter(grepl("^County", Term)) %>%
summarize(
avg_effect = mean(Estimate),
max_effect = max(Estimate),
min_effect = min(Estimate)
)
## avg_effect max_effect min_effect
## 1 -0.06762637 1.56226 -1.984721
Across all three imputation strategies:
Conviction_Count was not statistically significant (p ≈ 0.299), suggesting no strong evidence that convictions are associated with monthly overdose deaths at the county level after controlling for time and population.
#Testing Colinearity of Population and County
summary(lm(Population ~ County, data = combined_df_county_Bay1))
##
## Call:
## lm(formula = Population ~ County, data = combined_df_county_Bay1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74724 -5586 186 4683 80466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 268753 3504 76.708 < 2e-16 ***
## CountyBergen 646467 4955 130.472 < 2e-16 ***
## CountyBurlington 178114 4955 35.947 < 2e-16 ***
## CountyCamden 243642 4955 49.172 < 2e-16 ***
## CountyCape May -171674 4955 -34.648 < 2e-16 ***
## CountyCumberland -116336 4955 -23.479 < 2e-16 ***
## CountyEssex 529876 4955 106.941 < 2e-16 ***
## CountyGloucester 15276 4955 3.083 0.00216 **
## CountyHudson 379152 4955 76.522 < 2e-16 ***
## CountyHunterdon -142180 4955 -28.695 < 2e-16 ***
## CountyMercer 97651 4955 19.708 < 2e-16 ***
## CountyMiddlesex 541174 4955 109.221 < 2e-16 ***
## CountyMonmouth 358691 4955 72.392 < 2e-16 ***
## CountyMorris 222895 4955 44.985 < 2e-16 ***
## CountyOcean 309978 4955 62.561 < 2e-16 ***
## CountyPassaic 232437 4955 46.911 < 2e-16 ***
## CountySalem -204085 4955 -41.189 < 2e-16 ***
## CountySomerset 54227 4955 10.944 < 2e-16 ***
## CountySussex -122602 4955 -24.744 < 2e-16 ***
## CountyUnion 273983 4955 55.296 < 2e-16 ***
## CountyWarren -161313 4955 -32.557 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17520 on 504 degrees of freedom
## Multiple R-squared: 0.9953, Adjusted R-squared: 0.9951
## F-statistic: 5342 on 20 and 504 DF, p-value: < 2.2e-16
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# Exclude County or reduce levels if needed
model <- glm(Deaths_num ~ County + Population, data = combined_df_county_Bay1, family = poisson)
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## County 120.727 20 1.127315
## Population 120.727 1 10.987584
I am going to run the models without Population next to avoid multicollinearity. (In the Bayesian imputation models — including one with County as a random intercept — we retained Population as a control for exposure size.)
run_models_2 <- function(data, label) {
message("===== Running models for: ", label, " =====")
# Ensure required variables are correctly formatted and drop missing values
data <- data %>%
mutate(
Deaths_num = as.numeric(Deaths),
Year = as.numeric(Year),
County = as.factor(County),
Conviction_Count = as.numeric(Conviction_Count)
) %>%
filter(
!is.na(Deaths_num),
!is.na(Conviction_Count)
)
# Fit Poisson regression
poisson_model_2 <- glm(
Deaths_num ~ Conviction_Count + Year + County,
data = data,
family = poisson
)
# Fit Negative Binomial regression
nb_model_2 <- MASS::glm.nb(
Deaths_num ~ Conviction_Count + Year + County,
data = data
)
# Return AIC summary and model objects
return(list(
summary = data.frame(
Method = label,
Poisson_AIC_2 = AIC(poisson_model_2),
NegBin_AIC_2 = AIC(nb_model_2)
),
models = list(
poisson = poisson_model_2,
negbin = nb_model_2
)
))
}
results_md2 <- run_models_2(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower2 <- run_models_2(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper2 <- run_models_2(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi2 <- run_models_2(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes3 <- run_models_2(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes4 <- run_models_2(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)
# Combine AIC results from all model runs
model_summaries_2 <- bind_rows(
results_md2$summary,
results_lower2$summary,
results_upper2$summary,
results_mi2$summary,
results_bayes3$summary,
results_bayes4$summary
)
# Print as a markdown table
kable(model_summaries_2, caption = "Model Fit Comparison: AIC Values")
| Method | Poisson_AIC_2 | NegBin_AIC_2 |
|---|---|---|
| Midpoint Imputation | 5477.669 | 4018.365 |
| Lower Bound Imputation | 5477.669 | 4018.365 |
| Upper Bound Imputation | 5477.669 | 4018.365 |
| Multiple Imputation | 5477.669 | 4018.365 |
| Bayesian Imputation 1 | 5477.669 | 4018.365 |
| Bayesian Imputation 2 | 5477.669 | 4018.365 |
library(dplyr)
# Extract coefficient summaries from each NegBin model
coef_mi2 <- extract_model_summary(results_mi2$models$negbin, "Multiple Imputation")
coef_bayes3 <- extract_model_summary(results_bayes3$models$negbin, "Bayesian Imputation 1")
coef_bayes4 <- extract_model_summary(results_bayes4$models$negbin, "Bayesian Imputation 2")
# Combine all into one table
combined_coefs2 <- bind_rows(coef_mi2, coef_bayes3, coef_bayes4)
# Optional: re-order for readability
combined_coefs2 <- combined_coefs2 %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
filtered_coefs2 <- combined_coefs2 %>%
filter(Term %in% c("Conviction_Count", "Year")) %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
| Method | Term | Estimate | Std. Error | z value | Pr(>|z|) |
|---|---|---|---|---|---|
| Multiple Imputation | Conviction_Count | 0.010 | 0.008 | 1.237 | 0.216 |
| Multiple Imputation | Year | 0.077 | 0.002 | 36.129 | 0.000 |
| Bayesian Imputation 1 | Conviction_Count | 0.010 | 0.008 | 1.237 | 0.216 |
| Bayesian Imputation 1 | Year | 0.077 | 0.002 | 36.129 | 0.000 |
| Bayesian Imputation 2 | Conviction_Count | 0.010 | 0.008 | 1.237 | 0.216 |
| Bayesian Imputation 2 | Year | 0.077 | 0.002 | 36.129 | 0.000 |
The coefficient on Conviction_Count is not statistically significant in any of the top 3 models (p = 0.216). The model fit did not worsen (same AICs), and coefficients stayed nearly identical – after removing Population
Finally, I am going to try treating County and Year as factors.
run_models_3 <- function(data, label) {
message("===== Running models for: ", label, " =====")
# Ensure required variables are correctly formatted and drop missing values
data <- data %>%
mutate(
Deaths_num = as.numeric(Deaths),
Year = as.numeric(Year),
County = as.factor(County),
Conviction_Count = as.numeric(Conviction_Count)
) %>%
filter(
!is.na(Deaths_num),
!is.na(Conviction_Count)
)
# Fit Poisson regression
poisson_model_3 <- glm(
Deaths_num ~ Conviction_Count + factor(Year) + factor(County),
data = data,
family = poisson
)
# Fit Negative Binomial regression
nb_model_3 <- MASS::glm.nb(
Deaths_num ~ Conviction_Count + factor(Year) + factor(County),
data = data
)
# Return AIC summary and model objects
return(list(
summary = data.frame(
Method = label,
Poisson_AIC_2 = AIC(poisson_model_3),
NegBin_AIC_2 = AIC(nb_model_3)
),
models = list(
poisson = poisson_model_3,
negbin = nb_model_3
)
))
}
results_md3 <- run_models_3(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower3 <- run_models_3(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper3 <- run_models_3(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi3 <- run_models_3(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes5 <- run_models_3(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes6 <- run_models_3(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)
# Combine AIC results from all model runs
model_summaries_4 <- bind_rows(
results_md3$summary,
results_lower3$summary,
results_upper3$summary,
results_mi3$summary,
results_bayes5$summary,
results_bayes6$summary
)
# Print as a markdown table
kable(model_summaries_4, caption = "Model Fit Comparison: AIC Values")
| Method | Poisson_AIC_2 | NegBin_AIC_2 |
|---|---|---|
| Midpoint Imputation | 4073.933 | 3734.332 |
| Lower Bound Imputation | 4073.933 | 3734.332 |
| Upper Bound Imputation | 4073.933 | 3734.332 |
| Multiple Imputation | 4073.933 | 3734.332 |
| Bayesian Imputation 1 | 4073.933 | 3734.332 |
| Bayesian Imputation 2 | 4073.933 | 3734.332 |
library(dplyr)
# Extract coefficient summaries from each NegBin model
coef_mi3 <- extract_model_summary(results_mi3$models$negbin, "Multiple Imputation")
coef_bayes5 <- extract_model_summary(results_bayes5$models$negbin, "Bayesian Imputation 1")
coef_bayes6 <- extract_model_summary(results_bayes6$models$negbin, "Bayesian Imputation 2")
# Combine all into one table
combined_coefs2 <- bind_rows(coef_mi3, coef_bayes5, coef_bayes6)
# Optional: re-order for readability
combined_coefs2 <- combined_coefs2 %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
filtered_coefs2 <- combined_coefs2 %>%
filter(Term %in% c("Conviction_Count", "Year")) %>%
dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)
# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
| Method | Term | Estimate | Std. Error | z value | Pr(>|z|) |
|---|---|---|---|---|---|
| Multiple Imputation | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
| Bayesian Imputation 1 | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
| Bayesian Imputation 2 | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
knitr::kable(combined_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
| Method | Term | Estimate | Std. Error | z value | Pr(>|z|) |
|---|---|---|---|---|---|
| Multiple Imputation | (Intercept) | 3.411 | 0.073 | 46.731 | 0.000 |
| Multiple Imputation | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
| Multiple Imputation | factor(Year)2000 | 0.145 | 0.084 | 1.719 | 0.086 |
| Multiple Imputation | factor(Year)2001 | 0.204 | 0.083 | 2.475 | 0.013 |
| Multiple Imputation | factor(Year)2002 | 0.362 | 0.081 | 4.454 | 0.000 |
| Multiple Imputation | factor(Year)2003 | 0.240 | 0.082 | 2.931 | 0.003 |
| Multiple Imputation | factor(Year)2004 | 0.031 | 0.084 | 0.363 | 0.717 |
| Multiple Imputation | factor(Year)2005 | 0.346 | 0.080 | 4.311 | 0.000 |
| Multiple Imputation | factor(Year)2006 | 0.379 | 0.080 | 4.731 | 0.000 |
| Multiple Imputation | factor(Year)2007 | 0.236 | 0.082 | 2.867 | 0.004 |
| Multiple Imputation | factor(Year)2008 | 0.231 | 0.083 | 2.794 | 0.005 |
| Multiple Imputation | factor(Year)2009 | -0.359 | 0.114 | -3.153 | 0.002 |
| Multiple Imputation | factor(Year)2010 | 0.437 | 0.080 | 5.456 | 0.000 |
| Multiple Imputation | factor(Year)2011 | 0.568 | 0.078 | 7.255 | 0.000 |
| Multiple Imputation | factor(Year)2012 | 0.747 | 0.077 | 9.683 | 0.000 |
| Multiple Imputation | factor(Year)2013 | 0.818 | 0.077 | 10.614 | 0.000 |
| Multiple Imputation | factor(Year)2014 | 0.774 | 0.077 | 10.080 | 0.000 |
| Multiple Imputation | factor(Year)2015 | 0.940 | 0.076 | 12.382 | 0.000 |
| Multiple Imputation | factor(Year)2016 | 1.276 | 0.075 | 17.115 | 0.000 |
| Multiple Imputation | factor(Year)2017 | 1.545 | 0.074 | 20.940 | 0.000 |
| Multiple Imputation | factor(Year)2018 | 1.632 | 0.073 | 22.220 | 0.000 |
| Multiple Imputation | factor(Year)2019 | 1.585 | 0.074 | 21.356 | 0.000 |
| Multiple Imputation | factor(Year)2020 | 1.605 | 0.074 | 21.814 | 0.000 |
| Multiple Imputation | factor(Year)2021 | 1.663 | 0.074 | 22.586 | 0.000 |
| Multiple Imputation | factor(Year)2022 | 1.627 | 0.075 | 21.808 | 0.000 |
| Multiple Imputation | factor(Year)2023 | 1.514 | 0.075 | 20.266 | 0.000 |
| Multiple Imputation | factor(County)Bergen | 0.226 | 0.058 | 3.911 | 0.000 |
| Multiple Imputation | factor(County)Burlington | 0.071 | 0.057 | 1.244 | 0.213 |
| Multiple Imputation | factor(County)Camden | 0.718 | 0.056 | 12.886 | 0.000 |
| Multiple Imputation | factor(County)Cape May | -1.041 | 0.071 | -14.590 | 0.000 |
| Multiple Imputation | factor(County)Cumberland | -0.775 | 0.067 | -11.645 | 0.000 |
| Multiple Imputation | factor(County)Essex | 0.733 | 0.056 | 13.046 | 0.000 |
| Multiple Imputation | factor(County)Gloucester | 0.011 | 0.058 | 0.187 | 0.852 |
| Multiple Imputation | factor(County)Hudson | 0.098 | 0.059 | 1.675 | 0.094 |
| Multiple Imputation | factor(County)Hunterdon | -1.877 | 0.096 | -19.524 | 0.000 |
| Multiple Imputation | factor(County)Mercer | -0.377 | 0.061 | -6.166 | 0.000 |
| Multiple Imputation | factor(County)Middlesex | 0.371 | 0.056 | 6.619 | 0.000 |
| Multiple Imputation | factor(County)Monmouth | 0.236 | 0.057 | 4.132 | 0.000 |
| Multiple Imputation | factor(County)Morris | -0.357 | 0.060 | -5.900 | 0.000 |
| Multiple Imputation | factor(County)Ocean | 0.399 | 0.056 | 7.063 | 0.000 |
| Multiple Imputation | factor(County)Passaic | -0.062 | 0.059 | -1.060 | 0.289 |
| Multiple Imputation | factor(County)Salem | -1.488 | 0.082 | -18.202 | 0.000 |
| Multiple Imputation | factor(County)Somerset | -0.882 | 0.066 | -13.350 | 0.000 |
| Multiple Imputation | factor(County)Sussex | -1.205 | 0.072 | -16.727 | 0.000 |
| Multiple Imputation | factor(County)Union | -0.167 | 0.060 | -2.798 | 0.005 |
| Multiple Imputation | factor(County)Warren | -1.438 | 0.080 | -17.958 | 0.000 |
| Bayesian Imputation 1 | (Intercept) | 3.411 | 0.073 | 46.731 | 0.000 |
| Bayesian Imputation 1 | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
| Bayesian Imputation 1 | factor(Year)2000 | 0.145 | 0.084 | 1.719 | 0.086 |
| Bayesian Imputation 1 | factor(Year)2001 | 0.204 | 0.083 | 2.475 | 0.013 |
| Bayesian Imputation 1 | factor(Year)2002 | 0.362 | 0.081 | 4.454 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2003 | 0.240 | 0.082 | 2.931 | 0.003 |
| Bayesian Imputation 1 | factor(Year)2004 | 0.031 | 0.084 | 0.363 | 0.717 |
| Bayesian Imputation 1 | factor(Year)2005 | 0.346 | 0.080 | 4.311 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2006 | 0.379 | 0.080 | 4.731 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2007 | 0.236 | 0.082 | 2.867 | 0.004 |
| Bayesian Imputation 1 | factor(Year)2008 | 0.231 | 0.083 | 2.794 | 0.005 |
| Bayesian Imputation 1 | factor(Year)2009 | -0.359 | 0.114 | -3.153 | 0.002 |
| Bayesian Imputation 1 | factor(Year)2010 | 0.437 | 0.080 | 5.456 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2011 | 0.568 | 0.078 | 7.255 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2012 | 0.747 | 0.077 | 9.683 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2013 | 0.818 | 0.077 | 10.614 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2014 | 0.774 | 0.077 | 10.080 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2015 | 0.940 | 0.076 | 12.382 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2016 | 1.276 | 0.075 | 17.115 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2017 | 1.545 | 0.074 | 20.940 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2018 | 1.632 | 0.073 | 22.220 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2019 | 1.585 | 0.074 | 21.356 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2020 | 1.605 | 0.074 | 21.814 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2021 | 1.663 | 0.074 | 22.586 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2022 | 1.627 | 0.075 | 21.808 | 0.000 |
| Bayesian Imputation 1 | factor(Year)2023 | 1.514 | 0.075 | 20.266 | 0.000 |
| Bayesian Imputation 1 | factor(County)Bergen | 0.226 | 0.058 | 3.911 | 0.000 |
| Bayesian Imputation 1 | factor(County)Burlington | 0.071 | 0.057 | 1.244 | 0.213 |
| Bayesian Imputation 1 | factor(County)Camden | 0.718 | 0.056 | 12.886 | 0.000 |
| Bayesian Imputation 1 | factor(County)Cape May | -1.041 | 0.071 | -14.590 | 0.000 |
| Bayesian Imputation 1 | factor(County)Cumberland | -0.775 | 0.067 | -11.645 | 0.000 |
| Bayesian Imputation 1 | factor(County)Essex | 0.733 | 0.056 | 13.046 | 0.000 |
| Bayesian Imputation 1 | factor(County)Gloucester | 0.011 | 0.058 | 0.187 | 0.852 |
| Bayesian Imputation 1 | factor(County)Hudson | 0.098 | 0.059 | 1.675 | 0.094 |
| Bayesian Imputation 1 | factor(County)Hunterdon | -1.877 | 0.096 | -19.524 | 0.000 |
| Bayesian Imputation 1 | factor(County)Mercer | -0.377 | 0.061 | -6.166 | 0.000 |
| Bayesian Imputation 1 | factor(County)Middlesex | 0.371 | 0.056 | 6.619 | 0.000 |
| Bayesian Imputation 1 | factor(County)Monmouth | 0.236 | 0.057 | 4.132 | 0.000 |
| Bayesian Imputation 1 | factor(County)Morris | -0.357 | 0.060 | -5.900 | 0.000 |
| Bayesian Imputation 1 | factor(County)Ocean | 0.399 | 0.056 | 7.063 | 0.000 |
| Bayesian Imputation 1 | factor(County)Passaic | -0.062 | 0.059 | -1.060 | 0.289 |
| Bayesian Imputation 1 | factor(County)Salem | -1.488 | 0.082 | -18.202 | 0.000 |
| Bayesian Imputation 1 | factor(County)Somerset | -0.882 | 0.066 | -13.350 | 0.000 |
| Bayesian Imputation 1 | factor(County)Sussex | -1.205 | 0.072 | -16.727 | 0.000 |
| Bayesian Imputation 1 | factor(County)Union | -0.167 | 0.060 | -2.798 | 0.005 |
| Bayesian Imputation 1 | factor(County)Warren | -1.438 | 0.080 | -17.958 | 0.000 |
| Bayesian Imputation 2 | (Intercept) | 3.411 | 0.073 | 46.731 | 0.000 |
| Bayesian Imputation 2 | Conviction_Count | -0.002 | 0.005 | -0.446 | 0.656 |
| Bayesian Imputation 2 | factor(Year)2000 | 0.145 | 0.084 | 1.719 | 0.086 |
| Bayesian Imputation 2 | factor(Year)2001 | 0.204 | 0.083 | 2.475 | 0.013 |
| Bayesian Imputation 2 | factor(Year)2002 | 0.362 | 0.081 | 4.454 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2003 | 0.240 | 0.082 | 2.931 | 0.003 |
| Bayesian Imputation 2 | factor(Year)2004 | 0.031 | 0.084 | 0.363 | 0.717 |
| Bayesian Imputation 2 | factor(Year)2005 | 0.346 | 0.080 | 4.311 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2006 | 0.379 | 0.080 | 4.731 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2007 | 0.236 | 0.082 | 2.867 | 0.004 |
| Bayesian Imputation 2 | factor(Year)2008 | 0.231 | 0.083 | 2.794 | 0.005 |
| Bayesian Imputation 2 | factor(Year)2009 | -0.359 | 0.114 | -3.153 | 0.002 |
| Bayesian Imputation 2 | factor(Year)2010 | 0.437 | 0.080 | 5.456 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2011 | 0.568 | 0.078 | 7.255 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2012 | 0.747 | 0.077 | 9.683 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2013 | 0.818 | 0.077 | 10.614 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2014 | 0.774 | 0.077 | 10.080 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2015 | 0.940 | 0.076 | 12.382 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2016 | 1.276 | 0.075 | 17.115 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2017 | 1.545 | 0.074 | 20.940 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2018 | 1.632 | 0.073 | 22.220 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2019 | 1.585 | 0.074 | 21.356 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2020 | 1.605 | 0.074 | 21.814 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2021 | 1.663 | 0.074 | 22.586 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2022 | 1.627 | 0.075 | 21.808 | 0.000 |
| Bayesian Imputation 2 | factor(Year)2023 | 1.514 | 0.075 | 20.266 | 0.000 |
| Bayesian Imputation 2 | factor(County)Bergen | 0.226 | 0.058 | 3.911 | 0.000 |
| Bayesian Imputation 2 | factor(County)Burlington | 0.071 | 0.057 | 1.244 | 0.213 |
| Bayesian Imputation 2 | factor(County)Camden | 0.718 | 0.056 | 12.886 | 0.000 |
| Bayesian Imputation 2 | factor(County)Cape May | -1.041 | 0.071 | -14.590 | 0.000 |
| Bayesian Imputation 2 | factor(County)Cumberland | -0.775 | 0.067 | -11.645 | 0.000 |
| Bayesian Imputation 2 | factor(County)Essex | 0.733 | 0.056 | 13.046 | 0.000 |
| Bayesian Imputation 2 | factor(County)Gloucester | 0.011 | 0.058 | 0.187 | 0.852 |
| Bayesian Imputation 2 | factor(County)Hudson | 0.098 | 0.059 | 1.675 | 0.094 |
| Bayesian Imputation 2 | factor(County)Hunterdon | -1.877 | 0.096 | -19.524 | 0.000 |
| Bayesian Imputation 2 | factor(County)Mercer | -0.377 | 0.061 | -6.166 | 0.000 |
| Bayesian Imputation 2 | factor(County)Middlesex | 0.371 | 0.056 | 6.619 | 0.000 |
| Bayesian Imputation 2 | factor(County)Monmouth | 0.236 | 0.057 | 4.132 | 0.000 |
| Bayesian Imputation 2 | factor(County)Morris | -0.357 | 0.060 | -5.900 | 0.000 |
| Bayesian Imputation 2 | factor(County)Ocean | 0.399 | 0.056 | 7.063 | 0.000 |
| Bayesian Imputation 2 | factor(County)Passaic | -0.062 | 0.059 | -1.060 | 0.289 |
| Bayesian Imputation 2 | factor(County)Salem | -1.488 | 0.082 | -18.202 | 0.000 |
| Bayesian Imputation 2 | factor(County)Somerset | -0.882 | 0.066 | -13.350 | 0.000 |
| Bayesian Imputation 2 | factor(County)Sussex | -1.205 | 0.072 | -16.727 | 0.000 |
| Bayesian Imputation 2 | factor(County)Union | -0.167 | 0.060 | -2.798 | 0.005 |
| Bayesian Imputation 2 | factor(County)Warren | -1.438 | 0.080 | -17.958 | 0.000 |
The Negative Binomial model with County and Year treated as factors are the best model fits, but Conviction_Count is still not statistically significant. Now let’s do a time lag.
library(dplyr)
# Create 1-year lagged variables by County
combined_df_county_lagged <- combined_df_county %>%
arrange(County, Year) %>%
group_by(County) %>%
mutate(
Conviction_Count_lag1 = lag(Conviction_Count, 1),
Arrest_Count_lag1 = lag(Arrest_Count, 1)
) %>%
ungroup()
# Load required package
library(dplyr)
# Define function to add lagged conviction count
lag_and_return <- function(df) {
df %>%
arrange(County, Year) %>%
group_by(County) %>%
mutate(
Conviction_Count_lag1 = lag(Conviction_Count, 1)
) %>%
ungroup()
}
# Apply function to each dataset
combined_df_county_md_lag <- lag_and_return(combined_df_county_md)
df_lower_lag <- lag_and_return(df_lower)
df_upper_lag <- lag_and_return(df_upper)
imputed_df_MI_lag <- lag_and_return(imputed_df_MI)
combined_df_county_Bay1_lag <- lag_and_return(combined_df_county_Bay1)
combined_df_county_Bay2_lag <- lag_and_return(combined_df_county_Bay2)
library(MASS)
library(dplyr)
run_lagged_models <- function(data, label) {
cat("\n=== Running Models for:", label, "===\n")
# Fit Poisson model
poisson_model <- glm(
Deaths_num ~ Conviction_Count_lag1 + County + Year,
family = poisson,
data = data
)
# Calculate dispersion
dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
cat("Overdispersion ratio:", round(dispersion, 2), "\n")
# Fit Negative Binomial model
nb_model <- glm.nb(
Deaths_num ~ Conviction_Count_lag1 + County + Year,
data = data
)
return(list(
label = label,
poisson_model = poisson_model,
nb_model = nb_model,
poisson_summary = summary(poisson_model),
nb_summary = summary(nb_model),
poisson_AIC = AIC(poisson_model),
nb_AIC = AIC(nb_model),
dispersion = dispersion
))
}
# Run models
results_md_lag <- run_lagged_models(combined_df_county_md_lag, "Midpoint Imputation")
##
## === Running Models for: Midpoint Imputation ===
## Overdispersion ratio: 6.77
results_lower_lag <- run_lagged_models(df_lower_lag, "Lower Bound Imputation")
##
## === Running Models for: Lower Bound Imputation ===
## Overdispersion ratio: 7.48
results_upper_lag <- run_lagged_models(df_upper_lag, "Upper Bound Imputation")
##
## === Running Models for: Upper Bound Imputation ===
## Overdispersion ratio: 6.45
results_mi_lag <- run_lagged_models(imputed_df_MI_lag, "Multiple Imputation")
##
## === Running Models for: Multiple Imputation ===
## Overdispersion ratio: 6.9
results_bayes1_lag <- run_lagged_models(combined_df_county_Bay1_lag, "Bayesian Imputation 1")
##
## === Running Models for: Bayesian Imputation 1 ===
## Overdispersion ratio: 6.47
results_bayes2_lag <- run_lagged_models(combined_df_county_Bay2_lag, "Bayesian Imputation 2")
##
## === Running Models for: Bayesian Imputation 2 ===
## Overdispersion ratio: 6.45
library(knitr)
lagged_model_summary <- bind_rows(
data.frame(Method = results_md_lag$label, Poisson_AIC = results_md_lag$poisson_AIC, NegBin_AIC = results_md_lag$nb_AIC),
data.frame(Method = results_lower_lag$label, Poisson_AIC = results_lower_lag$poisson_AIC, NegBin_AIC = results_lower_lag$nb_AIC),
data.frame(Method = results_upper_lag$label, Poisson_AIC = results_upper_lag$poisson_AIC, NegBin_AIC = results_upper_lag$nb_AIC),
data.frame(Method = results_mi_lag$label, Poisson_AIC = results_mi_lag$poisson_AIC, NegBin_AIC = results_mi_lag$nb_AIC),
data.frame(Method = results_bayes1_lag$label, Poisson_AIC = results_bayes1_lag$poisson_AIC, NegBin_AIC = results_bayes1_lag$nb_AIC),
data.frame(Method = results_bayes2_lag$label, Poisson_AIC = results_bayes2_lag$poisson_AIC, NegBin_AIC = results_bayes2_lag$nb_AIC)
)
kable(lagged_model_summary, caption = "Model AICs with Lagged Conviction Count (1-Year Lag)")
| Method | Poisson_AIC | NegBin_AIC |
|---|---|---|
| Midpoint Imputation | 6392.913 | 4382.249 |
| Lower Bound Imputation | 6896.009 | 4568.021 |
| Upper Bound Imputation | 6193.911 | 4329.119 |
| Multiple Imputation | 6501.502 | 4430.688 |
| Bayesian Imputation 1 | 6202.982 | 4332.581 |
| Bayesian Imputation 2 | 6183.374 | 4321.602 |
library(dplyr)
library(knitr)
library(broom)
# function to extract coefficients from a model
extract_lagged_coef2 <- function(model, method_label) {
tidy(model) %>%
filter(term == "Conviction_Count_lag1") %>%
mutate(Method = method_label) %>%
dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}
# Helper function to extract coefficients from a model
extract_lagged_coef <- function(model, method_label) {
tidy(model) %>%
filter(term == "Conviction_Count_lag1") %>%
mutate(Method = method_label) %>%
dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}
# Apply to all models
coef_md_lag <- extract_lagged_coef(results_md_lag$nb_model, "Midpoint Imputation")
coef_lower_lag <- extract_lagged_coef(results_lower_lag$nb_model, "Lower Bound Imputation")
coef_upper_lag <- extract_lagged_coef(results_upper_lag$nb_model, "Upper Bound Imputation")
coef_mi_lag <- extract_lagged_coef(results_mi_lag$nb_model, "Multiple Imputation")
coef_bayes1_lag <- extract_lagged_coef(results_bayes1_lag$nb_model, "Bayesian Imputation 1")
coef_bayes2_lag <- extract_lagged_coef(results_bayes2_lag$nb_model, "Bayesian Imputation 2")
# Combine all into one table
combined_lagged_coefs <- bind_rows(
coef_md_lag,
coef_lower_lag,
coef_upper_lag,
coef_mi_lag,
coef_bayes1_lag,
coef_bayes2_lag
)
# Pretty print
kable(combined_lagged_coefs, digits = 3, caption = "Effect of 1-Year Lagged Conviction Count on Overdose Deaths (Negative Binomial Models)")
| Method | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|
| Midpoint Imputation | Conviction_Count_lag1 | 0.009 | 0.011 | 0.814 | 0.415 |
| Lower Bound Imputation | Conviction_Count_lag1 | 0.007 | 0.014 | 0.501 | 0.616 |
| Upper Bound Imputation | Conviction_Count_lag1 | 0.010 | 0.010 | 1.018 | 0.309 |
| Multiple Imputation | Conviction_Count_lag1 | 0.010 | 0.011 | 0.884 | 0.377 |
| Bayesian Imputation 1 | Conviction_Count_lag1 | 0.010 | 0.010 | 1.017 | 0.309 |
| Bayesian Imputation 2 | Conviction_Count_lag1 | 0.009 | 0.010 | 0.915 | 0.360 |
library(dplyr)
library(knitr)
library(broom)
# Helper function to extract all coefficients from a model and label them
extract_all_coefs <- function(model, method_label) {
tidy(model) %>%
mutate(Method = method_label) %>%
dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}
# Apply to all lagged Negative Binomial models
coef_md_lag_all <- extract_all_coefs(results_md_lag$nb_model, "Midpoint Imputation")
coef_lower_lag_all <- extract_all_coefs(results_lower_lag$nb_model, "Lower Bound Imputation")
coef_upper_lag_all <- extract_all_coefs(results_upper_lag$nb_model, "Upper Bound Imputation")
coef_mi_lag_all <- extract_all_coefs(results_mi_lag$nb_model, "Multiple Imputation")
coef_bayes1_lag_all <- extract_all_coefs(results_bayes1_lag$nb_model, "Bayesian Imputation 1")
coef_bayes2_lag_all <- extract_all_coefs(results_bayes2_lag$nb_model, "Bayesian Imputation 2")
# Combine into one big table
combined_all_coefs <- bind_rows(
coef_md_lag_all,
coef_lower_lag_all,
coef_upper_lag_all,
coef_mi_lag_all,
coef_bayes1_lag_all,
coef_bayes2_lag_all
)
# Optional: Print (first 20 rows only if table is too large)
kable(
head(combined_all_coefs, 20),
digits = 3,
caption = "Sample of Coefficients from Lagged Negative Binomial Models"
)
| Method | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|
| Midpoint Imputation | (Intercept) | -163.886 | 5.255 | -31.186 | 0.000 |
| Midpoint Imputation | Conviction_Count_lag1 | 0.009 | 0.011 | 0.814 | 0.415 |
| Midpoint Imputation | CountyBergen | 0.268 | 0.108 | 2.489 | 0.013 |
| Midpoint Imputation | CountyBurlington | 0.075 | 0.107 | 0.696 | 0.486 |
| Midpoint Imputation | CountyCamden | 0.750 | 0.107 | 7.031 | 0.000 |
| Midpoint Imputation | CountyCape May | -1.187 | 0.114 | -10.410 | 0.000 |
| Midpoint Imputation | CountyCumberland | -0.816 | 0.113 | -7.249 | 0.000 |
| Midpoint Imputation | CountyEssex | 0.784 | 0.107 | 7.336 | 0.000 |
| Midpoint Imputation | CountyGloucester | 0.050 | 0.107 | 0.463 | 0.643 |
| Midpoint Imputation | CountyHudson | 0.138 | 0.108 | 1.279 | 0.201 |
| Midpoint Imputation | CountyHunterdon | -1.927 | 0.124 | -15.497 | 0.000 |
| Midpoint Imputation | CountyMercer | -0.367 | 0.110 | -3.335 | 0.001 |
| Midpoint Imputation | CountyMiddlesex | 0.394 | 0.107 | 3.697 | 0.000 |
| Midpoint Imputation | CountyMonmouth | 0.286 | 0.108 | 2.664 | 0.008 |
| Midpoint Imputation | CountyMorris | -0.307 | 0.109 | -2.820 | 0.005 |
| Midpoint Imputation | CountyOcean | 0.396 | 0.106 | 3.723 | 0.000 |
| Midpoint Imputation | CountyPassaic | -0.021 | 0.108 | -0.193 | 0.847 |
| Midpoint Imputation | CountySalem | -1.566 | 0.120 | -13.086 | 0.000 |
| Midpoint Imputation | CountySomerset | -0.837 | 0.112 | -7.461 | 0.000 |
| Midpoint Imputation | CountySussex | -1.218 | 0.116 | -10.539 | 0.000 |
run_arrest_models <- function(data, label) {
cat("\n=== Running Models for:", label, "===\n")
# Poisson
poisson_model <- glm(
Deaths_num ~ Arrest_Count + factor(County) + factor(Year),
family = poisson,
data = data
)
# Check overdispersion
dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
cat("Overdispersion ratio (Poisson):", round(dispersion, 2), "\n")
# Negative Binomial
nb_model <- glm.nb(
Deaths_num ~ Arrest_Count + factor(County) + factor(Year),
data = data
)
# AIC summary as a data frame
model_summary <- data.frame(
Method = label,
Poisson_AIC = AIC(poisson_model),
NegBin_AIC = AIC(nb_model),
Overdispersion = round(dispersion, 2)
)
return(list(
label = label,
poisson_model = poisson_model,
nb_model = nb_model,
poisson_summary = summary(poisson_model),
nb_summary = summary(nb_model),
summary = model_summary,
dispersion = dispersion
))
}
# 1. Filter datasets for Year >= 2017
combined_df_county_md_2017 <- combined_df_county_md %>% filter(Year >= 2017)
df_lower_2017 <- df_lower %>% filter(Year >= 2017)
df_upper_2017 <- df_upper %>% filter(Year >= 2017)
imputed_df_2017 <- imputed_df_MI %>% filter(Year >= 2017)
combined_df_county_Bay1_2017 <- combined_df_county_Bay1 %>% filter(Year >= 2017)
combined_df_county_Bay2_2017 <- combined_df_county_Bay2 %>% filter(Year >= 2017)
# 2. Run models
results_md_arrest <- run_arrest_models(combined_df_county_md_2017, "Midpoint Imputation")
##
## === Running Models for: Midpoint Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_lower_arrest <- run_arrest_models(df_lower_2017, "Lower Bound Imputation")
##
## === Running Models for: Lower Bound Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_upper_arrest <- run_arrest_models(df_upper_2017, "Upper Bound Imputation")
##
## === Running Models for: Upper Bound Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_mi_arrest <- run_arrest_models(imputed_df_2017, "Multiple Imputation")
##
## === Running Models for: Multiple Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_bayes1_arrest <- run_arrest_models(combined_df_county_Bay1_2017, "Bayesian Imputation 1")
##
## === Running Models for: Bayesian Imputation 1 ===
## Overdispersion ratio (Poisson): 2.26
results_bayes2_arrest <- run_arrest_models(combined_df_county_Bay2_2017, "Bayesian Imputation 2")
##
## === Running Models for: Bayesian Imputation 2 ===
## Overdispersion ratio (Poisson): 2.26
library(dplyr)
library(knitr)
# Combine AIC results from all model runs
model_summaries_24 <- bind_rows(
results_md_arrest$summary,
results_lower_arrest$summary,
results_upper_arrest$summary,
results_mi_arrest$summary,
results_bayes1_arrest$summary,
results_bayes2_arrest$summary
)
# Print as a markdown table
kable(model_summaries_24, caption = "Model Fit Comparison: AIC Values")
| Method | Poisson_AIC | NegBin_AIC | Overdispersion |
|---|---|---|---|
| Midpoint Imputation | 1279.785 | 1241.888 | 2.26 |
| Lower Bound Imputation | 1279.785 | 1241.888 | 2.26 |
| Upper Bound Imputation | 1279.785 | 1241.888 | 2.26 |
| Multiple Imputation | 1279.785 | 1241.888 | 2.26 |
| Bayesian Imputation 1 | 1279.785 | 1241.888 | 2.26 |
| Bayesian Imputation 2 | 1279.785 | 1241.888 | 2.26 |
library(dplyr)
library(broom)
library(knitr)
# List of results
all_arrest_models <- list(
results_md_arrest,
results_lower_arrest,
results_upper_arrest,
results_mi_arrest,
results_bayes1_arrest,
results_bayes2_arrest
)
# Helper to extract and label coefficients from Poisson and NB models
extract_coefs <- function(model_list) {
label <- model_list$label
# Extract tidy summaries
poisson_coefs <- broom::tidy(model_list$poisson_model) %>%
mutate(Model = "Poisson", Method = label)
nb_coefs <- broom::tidy(model_list$nb_model) %>%
mutate(Model = "NegBin", Method = label)
bind_rows(poisson_coefs, nb_coefs)
}
# Combine all coefficients
combined_coefs55 <- purrr::map_df(all_arrest_models, extract_coefs)
# Optional: Filter to terms of interest (e.g., Arrest_Count)
arrest_coefs <- combined_coefs55 %>%
filter(term == "Arrest_Count") %>%
dplyr::select(Method, Model, term, estimate, std.error, statistic, p.value)
# Print the results in a nice table
kable(combined_coefs55, digits = 3, caption = "Arrest Data Coefficients Across Models")
| term | estimate | std.error | statistic | p.value | Model | Method |
|---|---|---|---|---|---|---|
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Midpoint Imputation |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Midpoint Imputation |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Midpoint Imputation |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Midpoint Imputation |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Midpoint Imputation |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Midpoint Imputation |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Midpoint Imputation |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Midpoint Imputation |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Midpoint Imputation |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Midpoint Imputation |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Midpoint Imputation |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Midpoint Imputation |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Midpoint Imputation |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Midpoint Imputation |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Midpoint Imputation |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Midpoint Imputation |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Midpoint Imputation |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Midpoint Imputation |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Midpoint Imputation |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Midpoint Imputation |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Midpoint Imputation |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Midpoint Imputation |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Midpoint Imputation |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Midpoint Imputation |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Midpoint Imputation |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Midpoint Imputation |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Midpoint Imputation |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Midpoint Imputation |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Midpoint Imputation |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Midpoint Imputation |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Midpoint Imputation |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Midpoint Imputation |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Midpoint Imputation |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Midpoint Imputation |
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Lower Bound Imputation |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Lower Bound Imputation |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Lower Bound Imputation |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Lower Bound Imputation |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Lower Bound Imputation |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Lower Bound Imputation |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Lower Bound Imputation |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Lower Bound Imputation |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Lower Bound Imputation |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Lower Bound Imputation |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Lower Bound Imputation |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Lower Bound Imputation |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Lower Bound Imputation |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Lower Bound Imputation |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Lower Bound Imputation |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Lower Bound Imputation |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Lower Bound Imputation |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Lower Bound Imputation |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Lower Bound Imputation |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Lower Bound Imputation |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Lower Bound Imputation |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Lower Bound Imputation |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Lower Bound Imputation |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Lower Bound Imputation |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Lower Bound Imputation |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Lower Bound Imputation |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Lower Bound Imputation |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Lower Bound Imputation |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Lower Bound Imputation |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Lower Bound Imputation |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Lower Bound Imputation |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Lower Bound Imputation |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Lower Bound Imputation |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Lower Bound Imputation |
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Upper Bound Imputation |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Upper Bound Imputation |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Upper Bound Imputation |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Upper Bound Imputation |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Upper Bound Imputation |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Upper Bound Imputation |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Upper Bound Imputation |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Upper Bound Imputation |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Upper Bound Imputation |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Upper Bound Imputation |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Upper Bound Imputation |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Upper Bound Imputation |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Upper Bound Imputation |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Upper Bound Imputation |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Upper Bound Imputation |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Upper Bound Imputation |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Upper Bound Imputation |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Upper Bound Imputation |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Upper Bound Imputation |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Upper Bound Imputation |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Upper Bound Imputation |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Upper Bound Imputation |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Upper Bound Imputation |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Upper Bound Imputation |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Upper Bound Imputation |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Upper Bound Imputation |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Upper Bound Imputation |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Upper Bound Imputation |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Upper Bound Imputation |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Upper Bound Imputation |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Upper Bound Imputation |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Upper Bound Imputation |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Upper Bound Imputation |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Upper Bound Imputation |
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Multiple Imputation |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Multiple Imputation |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Multiple Imputation |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Multiple Imputation |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Multiple Imputation |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Multiple Imputation |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Multiple Imputation |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Multiple Imputation |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Multiple Imputation |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Multiple Imputation |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Multiple Imputation |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Multiple Imputation |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Multiple Imputation |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Multiple Imputation |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Multiple Imputation |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Multiple Imputation |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Multiple Imputation |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Multiple Imputation |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Multiple Imputation |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Multiple Imputation |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Multiple Imputation |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Multiple Imputation |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Multiple Imputation |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Multiple Imputation |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Multiple Imputation |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Multiple Imputation |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Multiple Imputation |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Multiple Imputation |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Multiple Imputation |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Multiple Imputation |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Multiple Imputation |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Multiple Imputation |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Multiple Imputation |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Multiple Imputation |
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Bayesian Imputation 1 |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Bayesian Imputation 1 |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Bayesian Imputation 1 |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Bayesian Imputation 1 |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Bayesian Imputation 1 |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Bayesian Imputation 1 |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Bayesian Imputation 1 |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Bayesian Imputation 1 |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Bayesian Imputation 1 |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Bayesian Imputation 1 |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Bayesian Imputation 1 |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Bayesian Imputation 1 |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Bayesian Imputation 1 |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Bayesian Imputation 1 |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Bayesian Imputation 1 |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Bayesian Imputation 1 |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Bayesian Imputation 1 |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Bayesian Imputation 1 |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Bayesian Imputation 1 |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Bayesian Imputation 1 |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Bayesian Imputation 1 |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Bayesian Imputation 1 |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Bayesian Imputation 1 |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Bayesian Imputation 1 |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Bayesian Imputation 1 |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Bayesian Imputation 1 |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Bayesian Imputation 1 |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Bayesian Imputation 1 |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Bayesian Imputation 1 |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Bayesian Imputation 1 |
| (Intercept) | 5.022 | 0.040 | 127.046 | 0.000 | Poisson | Bayesian Imputation 2 |
| Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 | Poisson | Bayesian Imputation 2 |
| factor(County)Bergen | -0.001 | 0.045 | -0.027 | 0.978 | Poisson | Bayesian Imputation 2 |
| factor(County)Burlington | 0.000 | 0.042 | -0.011 | 0.992 | Poisson | Bayesian Imputation 2 |
| factor(County)Camden | 0.604 | 0.041 | 14.838 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Cape May | -1.146 | 0.061 | -18.758 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Cumberland | -0.654 | 0.054 | -12.226 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Essex | 0.733 | 0.039 | 18.691 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Gloucester | -0.132 | 0.046 | -2.887 | 0.004 | Poisson | Bayesian Imputation 2 |
| factor(County)Hudson | 0.047 | 0.045 | 1.047 | 0.295 | Poisson | Bayesian Imputation 2 |
| factor(County)Hunterdon | -2.136 | 0.093 | -22.921 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Mercer | -0.279 | 0.048 | -5.754 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Middlesex | 0.298 | 0.040 | 7.393 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Monmouth | 0.127 | 0.043 | 2.990 | 0.003 | Poisson | Bayesian Imputation 2 |
| factor(County)Morris | -0.549 | 0.051 | -10.795 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Ocean | 0.316 | 0.040 | 7.937 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Passaic | -0.140 | 0.046 | -3.059 | 0.002 | Poisson | Bayesian Imputation 2 |
| factor(County)Salem | -1.593 | 0.075 | -21.377 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Somerset | -1.063 | 0.061 | -17.324 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Sussex | -1.361 | 0.068 | -19.916 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(County)Union | -0.068 | 0.046 | -1.474 | 0.140 | Poisson | Bayesian Imputation 2 |
| factor(County)Warren | -1.571 | 0.074 | -21.268 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(Year)2018 | 0.078 | 0.027 | 2.922 | 0.003 | Poisson | Bayesian Imputation 2 |
| factor(Year)2019 | 0.045 | 0.027 | 1.678 | 0.093 | Poisson | Bayesian Imputation 2 |
| factor(Year)2020 | 0.055 | 0.027 | 2.022 | 0.043 | Poisson | Bayesian Imputation 2 |
| factor(Year)2021 | 0.128 | 0.027 | 4.815 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(Year)2022 | 0.104 | 0.027 | 3.910 | 0.000 | Poisson | Bayesian Imputation 2 |
| factor(Year)2023 | -0.002 | 0.028 | -0.069 | 0.945 | Poisson | Bayesian Imputation 2 |
| (Intercept) | 5.036 | 0.055 | 90.812 | 0.000 | NegBin | Bayesian Imputation 2 |
| Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 | NegBin | Bayesian Imputation 2 |
| factor(County)Bergen | -0.008 | 0.062 | -0.136 | 0.892 | NegBin | Bayesian Imputation 2 |
| factor(County)Burlington | 0.000 | 0.059 | 0.001 | 0.999 | NegBin | Bayesian Imputation 2 |
| factor(County)Camden | 0.599 | 0.060 | 9.935 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Cape May | -1.148 | 0.074 | -15.584 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Cumberland | -0.659 | 0.069 | -9.506 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Essex | 0.730 | 0.059 | 12.426 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Gloucester | -0.136 | 0.063 | -2.157 | 0.031 | NegBin | Bayesian Imputation 2 |
| factor(County)Hudson | 0.041 | 0.064 | 0.650 | 0.515 | NegBin | Bayesian Imputation 2 |
| factor(County)Hunterdon | -2.141 | 0.103 | -20.760 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Mercer | -0.286 | 0.066 | -4.348 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Middlesex | 0.294 | 0.058 | 5.073 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Monmouth | 0.123 | 0.060 | 2.043 | 0.041 | NegBin | Bayesian Imputation 2 |
| factor(County)Morris | -0.553 | 0.066 | -8.322 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Ocean | 0.314 | 0.058 | 5.458 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Passaic | -0.145 | 0.063 | -2.306 | 0.021 | NegBin | Bayesian Imputation 2 |
| factor(County)Salem | -1.599 | 0.087 | -18.422 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Somerset | -1.070 | 0.076 | -14.095 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Sussex | -1.367 | 0.082 | -16.734 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(County)Union | -0.075 | 0.064 | -1.169 | 0.243 | NegBin | Bayesian Imputation 2 |
| factor(County)Warren | -1.577 | 0.086 | -18.285 | 0.000 | NegBin | Bayesian Imputation 2 |
| factor(Year)2018 | 0.083 | 0.037 | 2.228 | 0.026 | NegBin | Bayesian Imputation 2 |
| factor(Year)2019 | 0.042 | 0.038 | 1.110 | 0.267 | NegBin | Bayesian Imputation 2 |
| factor(Year)2020 | 0.057 | 0.038 | 1.513 | 0.130 | NegBin | Bayesian Imputation 2 |
| factor(Year)2021 | 0.120 | 0.037 | 3.204 | 0.001 | NegBin | Bayesian Imputation 2 |
| factor(Year)2022 | 0.083 | 0.038 | 2.201 | 0.028 | NegBin | Bayesian Imputation 2 |
| factor(Year)2023 | -0.029 | 0.038 | -0.752 | 0.452 | NegBin | Bayesian Imputation 2 |
kable(arrest_coefs, digits = 3, caption = "Arrest_Count Coefficients Across Models" )
| Method | Model | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| Midpoint Imputation | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Midpoint Imputation | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
| Lower Bound Imputation | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Lower Bound Imputation | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
| Upper Bound Imputation | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Upper Bound Imputation | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
| Multiple Imputation | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Multiple Imputation | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
| Bayesian Imputation 1 | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Bayesian Imputation 1 | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
| Bayesian Imputation 2 | Poisson | Arrest_Count | -0.002 | 0.002 | -0.798 | 0.425 |
| Bayesian Imputation 2 | NegBin | Arrest_Count | -0.003 | 0.003 | -0.817 | 0.414 |
Arrests_Count is not statistically significant…. So not much to report here… @Eli any other ideas how I can model this better – would be great appreciated!
# Define reusable lagging function
lag_arrest <- function(df) {
df %>%
arrange(County, Year) %>%
group_by(County) %>%
mutate(Arrest_Count_lag1 = lag(Arrest_Count, 1)) %>%
ungroup()
}
# Apply to each imputed dataset
combined_df_county_md_lag_arrest <- lag_arrest(combined_df_county_md)
df_lower_lag_arrest <- lag_arrest(df_lower)
df_upper_lag_arrest <- lag_arrest(df_upper)
imputed_df_MI_lag_arrest <- lag_arrest(imputed_df_MI)
combined_df_county_Bay1_lag_arrest <- lag_arrest(combined_df_county_Bay1)
combined_df_county_Bay2_lag_arrest <- lag_arrest(combined_df_county_Bay2)
run_lagged_arrest_models <- function(data, label) {
cat("\n=== Running Lagged Arrest Models for:", label, "===\n")
# Filter for Year >= 2017
data <- data %>% filter(Year >= 2017)
# Fit Poisson model
poisson_model <- glm(
Deaths_num ~ Arrest_Count_lag1 + factor(County) + factor(Year),
family = poisson,
data = data
)
# Check for overdispersion
dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
cat("Overdispersion ratio (Poisson):", round(dispersion, 2), "\n")
# Fit NB model
nb_model <- MASS::glm.nb(
Deaths_num ~ Arrest_Count_lag1 + factor(County) + factor(Year),
data = data
)
return(list(
label = label,
poisson_model = poisson_model,
nb_model = nb_model,
poisson_summary = summary(poisson_model),
nb_summary = summary(nb_model),
poisson_AIC = AIC(poisson_model),
nb_AIC = AIC(nb_model),
dispersion = dispersion
))
}
results_md_arrest_lag <- run_lagged_arrest_models(combined_df_county_md_lag_arrest, "Midpoint Imputation")
##
## === Running Lagged Arrest Models for: Midpoint Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_lower_arrest_lag <- run_lagged_arrest_models(df_lower_lag_arrest, "Lower Bound Imputation")
##
## === Running Lagged Arrest Models for: Lower Bound Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_upper_arrest_lag <- run_lagged_arrest_models(df_upper_lag_arrest, "Upper Bound Imputation")
##
## === Running Lagged Arrest Models for: Upper Bound Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_mi_arrest_lag <- run_lagged_arrest_models(imputed_df_MI_lag_arrest, "Multiple Imputation")
##
## === Running Lagged Arrest Models for: Multiple Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_bayes1_arrest_lag <- run_lagged_arrest_models(combined_df_county_Bay1_lag_arrest, "Bayesian Imputation 1")
##
## === Running Lagged Arrest Models for: Bayesian Imputation 1 ===
## Overdispersion ratio (Poisson): 2.19
results_bayes2_arrest_lag <- run_lagged_arrest_models(combined_df_county_Bay2_lag_arrest, "Bayesian Imputation 2")
##
## === Running Lagged Arrest Models for: Bayesian Imputation 2 ===
## Overdispersion ratio (Poisson): 2.19
library(broom)
extract_arrest_lag_coefs <- function(model, method_label) {
tidy(model) %>%
filter(term == "Arrest_Count_lag1") %>%
mutate(Method = method_label) %>%
dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}
arrest_lag_coefs <- bind_rows(
extract_arrest_lag_coefs(results_md_arrest_lag$nb_model, "Midpoint"),
extract_arrest_lag_coefs(results_lower_arrest_lag$nb_model, "Lower Bound"),
extract_arrest_lag_coefs(results_upper_arrest_lag$nb_model, "Upper Bound"),
extract_arrest_lag_coefs(results_mi_arrest_lag$nb_model, "Multiple Imputation"),
extract_arrest_lag_coefs(results_bayes1_arrest_lag$nb_model, "Bayesian 1"),
extract_arrest_lag_coefs(results_bayes2_arrest_lag$nb_model, "Bayesian 2")
)
# Display results
knitr::kable(arrest_lag_coefs, digits = 3, caption = "Effect of Lagged Arrests on Overdose Deaths (NB Models)")
| Method | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|
| Midpoint | Arrest_Count_lag1 | 0.001 | 0.003 | 0.231 | 0.817 |
| Lower Bound | Arrest_Count_lag1 | 0.001 | 0.003 | 0.231 | 0.817 |
| Upper Bound | Arrest_Count_lag1 | 0.001 | 0.003 | 0.231 | 0.817 |
| Multiple Imputation | Arrest_Count_lag1 | 0.000 | 0.003 | 0.027 | 0.979 |
| Bayesian 1 | Arrest_Count_lag1 | 0.001 | 0.003 | 0.231 | 0.817 |
| Bayesian 2 | Arrest_Count_lag1 | 0.001 | 0.003 | 0.231 | 0.817 |
Arrest_Count still not statistically significant predictor.
Possible theoretical explanation: 1 year is too long of an increment to see an affect. Literature suggests the impact is 1 month after arrests/convictions. We can rerun this analysis when we get the CDC’s unsuppressed monthly county data…
~~~~~~~~~~~~~~~~ Notes on Dataset~~~~~~~~~~~~~~~~~~~
State monthly overdose data was a combination of CDC Wonder DATA (1999-2022) and data for “unconfirmed OD’s” 2023-2025 https://ocsme.nj.gov/dashboard - The State of New Jersey, Office of the Chief State Medical Examiner
I added the population data from NJ for 2024 from here because the U.S. Census website is down because of the government closure. https://www.njspotlightnews.org/2025/01/nj-population-2024-census/