library(readxl)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
Convictions <- read_excel("Strict Liability spread sheet.xlsx")
Arrest <- read_excel("NJ DIH Arrest data 2017-2024.xlsx")
Overdose <-Underlying_Cause_of_Death_1999_2023 <- read_csv("Underlying Cause of Death 1999-2023.csv", 
    col_types = cols(Notes = col_skip(), 
        Year = col_integer(), `Year Code` = col_integer(), 
        Population = col_integer()))
State_OD_18 <- read_csv("Statewide_data_month_2018-2025-5.csv", 
    col_types = cols(Notes = col_skip(), 
        State = col_skip(), `State Code` = col_skip(), 
        Population = col_skip(), `Crude Rate` = col_skip()))
State_OD_mon <- read_csv("State Month Underlying Cause of Death, 1999-2025.csv")
## Rows: 317 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Month, Month Code, Deaths
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Arrest <- Arrest %>%
  mutate(
    Race = ifelse(Race %in% c("Unknown", "Not Provided"), NA, Race)
  )

Convictions <- Convictions  %>%
  mutate(
    Race = ifelse(Race %in% c("Unknown", "Not Provided"), NA, Race)
  )

Convictions <- Convictions %>%
  mutate(
    Ethnicity = ifelse(Ethnicity %in% c("Unknown", "Not Provided", "NA", ""), NA, Ethnicity)
  )
# Create Conviction Year Variable in
Convictions$Sentence.Date <- as.Date(Convictions$`Date of Sentence`)
Convictions$Sentence.Year <- as.numeric(format(Convictions$Sentence.Date, "%Y"))

#Create Conviction Month & Year code
Convictions$`Month Code` <- format(Convictions$Sentence.Date, "%Y/%m")



#Create Year & Month Variable in State_OD
library(dplyr)

State_OD_18 <- State_OD_18 %>%
  mutate(
    Year = as.numeric(sub("/.*", "", `Month Code`)),
    Month = as.numeric(sub(".*/", "", `Month Code`))
  )

State_OD_mon <- State_OD_mon %>%
  mutate(
    Year = as.numeric(sub("/.*", "", `Month Code`)),
    Month = as.numeric(sub(".*/", "", `Month Code`))
  )
# Create Month Code
Arrest$Month_NUM <- match(Arrest$`Arrest Month`, month.name)

Arrest$`Month Code` <- paste0(Arrest$`Arrest Year`, "/", sprintf("%02d", Arrest$Month_NUM))

#Create list of all months  to include; dates: 2017, through 2024.
mons_list <- data.frame(`Month Code` = format(
  seq.Date(
    from = as.Date("2017-01-01"),  # set start
    to   = as.Date("2024-12-31"),  # set  end
    by   = "month"
  ),
  "%Y/%m" 
))

mons_list <- mons_list %>%
  rename(`Month Code` = Month.Code)

#Count arrests
arrests_by_mon <- Arrest %>%
  group_by(`Month Code`) %>%
  summarise(Arrest_Count = n(), .groups = "drop")

# Insert 0's for months not reported as having arrests
arrests_by_mon <- mons_list %>%
  left_join(arrests_by_mon, by = "Month Code") %>%
  mutate(Arrest_Count = ifelse(is.na(Arrest_Count), 0, Arrest_Count))
library(dplyr)
library(tidyr)

# Step 1: Create list of all months  to include; dates: August 5, 1988, through June 23, 2025.
all_months <- data.frame(`Month Code` = format(
  seq.Date(
    from = as.Date("1988-01-01"),  # set  start
    to   = as.Date("2022-12-01"),  # set  end
    by   = "month"
  ),
  "%Y/%m" 
))

all_months <- all_months %>%
  rename(`Month Code` = Month.Code)

# Step 2: Count convictions
conv_by_mon <- Convictions %>%
  group_by(`Month Code`) %>%
  summarise(Conv_Count = n(), .groups = "drop")

# Insert 0's for months not reported as having arrests
conv_by_mon <- all_months %>%
  left_join(conv_by_mon, by = "Month Code") %>%
  mutate(Conv_Count = ifelse(is.na(Conv_Count), 0, Conv_Count))

For the state level data – I am using the population from the counties in the CDC Wonder dataset and just combining them – but may be more accurate if we just find a population dataset for NJ state. I added population estimates from 2024 from the Census Burea website before it was rendered inoperable by the government shutdown.

# 1) Statewide population by year (summing population data across counties to determine state population -- may be best to find population data from another source for the state)
pop_by_year <- Overdose %>%
  group_by(Year) %>%
  summarise(Population = sum(Population, na.rm = TRUE), .groups = "drop")

#Added 2024 population estimates from Census bureau

library(dplyr)
pop_by_year <- pop_by_year %>%
  bind_rows(data.frame(
    Year = 2024,
    Population = 9500851
  ))


# 2) Arrests by year
arrests_by_year <- Arrest %>%
  group_by(`Arrest Year`) %>%
  summarise(Arrest_Count = n(), .groups = "drop") %>%
  rename(Year = `Arrest Year`)

# 3) Convictions by year 
#convictions_by_year_filter code removed

convictions_by_year<- Convictions %>%
  group_by(Sentence.Year) %>%
  summarise(Conviction_Count = n(), .groups = "drop") %>%
  rename(Year = Sentence.Year)

# 4) Deaths by year (statewide)

deaths_by_year <- State_OD_18 %>%
  group_by(Year) %>%
  summarise(Count = sum(Deaths, na.rm = TRUE), .groups = "drop")

#ARRESTS 2017-2024

library(ggplot2)

ggplot(arrests_by_year, aes(x = `Year`, y = Arrest_Count)) +
  geom_line(group = 1, color = "steelblue", size = 1.2) +
  geom_point(size = 2) +
  geom_text(aes(label = Arrest_Count), vjust = -0.5) +
  labs(title = "Drug-Induced Arrests by Year",
       x = "Year", y = "Arrest Count") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

@Jennifer, We need to double check that that arrest data is complete through 2024. I did not pull it…. looking at the dataset the last arrest included is September of 2024.

library(knitr)
county_summary_arr <- Arrest %>%
  count(County) %>%
  mutate(Percent = round(n / sum(n) * 100, 1)) %>%
  arrange(desc(n))

kable(county_summary_arr,
      caption = "Table: Arrests by County",
      col.names = c("County", "Count", "Percent"),
      align = "lrr")
Table: Arrests by County
County Count Percent
Atlantic 57 15.5
Burlington 56 15.3
Cape May 47 12.8
Ocean 37 10.1
Middlesex 31 8.4
Statewide or Regional LEA 22 6.0
Monmouth 20 5.4
Morris 20 5.4
Bergen 18 4.9
Gloucester 14 3.8
Passaic 13 3.5
Essex 9 2.5
Cumberland 5 1.4
Hunterdon 4 1.1
Camden 3 0.8
Mercer 3 0.8
Salem 2 0.5
Union 2 0.5
Warren 2 0.5
Hudson 1 0.3
Somerset 1 0.3

##Strict Liability Conviction Data – 1988 - April 2025

library(dplyr)
library(ggplot2)

# Line plot
ggplot(convictions_by_year, aes(x = Year, y = Conviction_Count)) +
  geom_line(color = "darkblue", size = 1.2) +
  geom_point(size = 2) +
  geom_text(aes(label = Conviction_Count), vjust = -0.7, size = 3.5) +  # adds counts above points
  scale_x_continuous(breaks = seq(min(convictions_by_year$Year, na.rm = TRUE),
                                  max(convictions_by_year$Year, na.rm = TRUE),
                                  by = 2)) +
  labs(title = "Number of DID Convictions by Year",
       x = "Year",
       y = "Number of Convictions") +
  theme_minimal()

library(knitr)
county_summary_conv <- Convictions %>%
  count(County) %>%
  mutate(Percent = round(n / sum(n) * 100, 1)) %>%
  arrange(desc(n))

kable(county_summary_arr,
      caption = "Table: Convictions by County 1999- 2024(partial)",
      col.names = c("County", "Count", "Percent"),
      align = "lrr")
Table: Convictions by County 1999- 2024(partial)
County Count Percent
Atlantic 57 15.5
Burlington 56 15.3
Cape May 47 12.8
Ocean 37 10.1
Middlesex 31 8.4
Statewide or Regional LEA 22 6.0
Monmouth 20 5.4
Morris 20 5.4
Bergen 18 4.9
Gloucester 14 3.8
Passaic 13 3.5
Essex 9 2.5
Cumberland 5 1.4
Hunterdon 4 1.1
Camden 3 0.8
Mercer 3 0.8
Salem 2 0.5
Union 2 0.5
Warren 2 0.5
Hudson 1 0.3
Somerset 1 0.3

Overdose

library(dplyr)
library(ggplot2)

# Summarize total deaths by year

State_OD_mon$Deaths <- as.numeric(State_OD_mon$Deaths)
## Warning: NAs introduced by coercion
deaths_by_year_tot <- State_OD_mon %>%
  group_by(Year) %>%
  summarise(Count = sum(Deaths, na.rm = TRUE), .groups = "drop")

# Line chart of total deaths over time
ggplot(deaths_by_year_tot, aes(x = Year, y = Count)) +
  geom_line(color = "darkred", size = 1.2) +
  geom_point(size = 2) +
  geom_text(aes(label = Count), vjust = -0.5, size = 3) +
  labs(title = "Total Overdose Deaths by Year",
       x = "Year", y = "Number of Deaths") +
  theme_minimal()

library(dplyr)
#Combine into one dataset
combined_data <- full_join(
  dplyr::select(arrests_by_year, Year, Arrest_Count),
  dplyr::select(deaths_by_year, Year, Count),
  by = "Year"
)
library(ggplot2)

# Choose a scaling factor to bring arrests to the same ballpark as deaths
scale_factor <- 30  # adjust as needed

ggplot(combined_data, aes(x = Year)) +
  geom_line(aes(y = Count, color = "Overdose Deaths"), size = 1.2) +
  geom_line(aes(y = Arrest_Count * scale_factor, color = "Arrests"), size = 1.2) +
  scale_y_continuous(
    name = "Overdose Deaths",
    sec.axis = sec_axis(~./scale_factor, name = "Arrests")
  ) +
  scale_color_manual(values = c("Overdose Deaths" = "blue", "Arrests" = "red")) +
  labs(
    title = "Overdose Deaths vs Arrests by Year (Dual Y-Axis)",
    x = "Year",
    color = "Metric"
  ) +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

Visually there does not appear to be a relationships whereby Arrests decrease when overdose deaths decrease. This means that the number of overdose deaths (the underlying incident rate) does not seem to predict whether or not there are arrests. Of course, we will test this later. It also doesn’t appear that arrests result in less overdoses the next year…

library(tidyr)

combined_long <- combined_data %>%
  pivot_longer(cols = c(Arrest_Count, Count), names_to = "Metric", values_to = "Value")

ggplot(combined_long, aes(x = Year, y = Value)) +
  geom_line(color = "steelblue", size = 1.2) +
  facet_wrap(
    ~ Metric,
    scales = "free_y",
    labeller = as_labeller(
      c("Count" = "Overdose Deaths", "Arrest_Count" = "Arrests")
    )
  ) +
  labs(
    title = "Arrests vs Overdose Deaths by Year",
    x = "Year",
    y = "Count"
  ) +
  theme_minimal()

Transformed to Z scores…

combined_data_z <- combined_data %>%
  mutate(
    Arrest_z = scale(Arrest_Count),
    Deaths_z = scale(Count)
  )

ggplot(combined_data_z, aes(x = Year)) +
  geom_line(aes(y = Arrest_z, color = "Arrests"), size = 1.2) +
  geom_line(aes(y = Deaths_z, color = "Overdose Deaths"), size = 1.2) +
  labs(title = "Z-Score Comparison: Arrests vs Overdose Deaths",
       y = "Z-Score (Standardized Values)",
       x = "Year",
       color = "Metric") +
  theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

Arrest_OD_month <- left_join(arrests_by_mon, State_OD_mon, by = "Month Code")

Arrest_OD_month <- Arrest_OD_month %>%
  left_join(pop_by_year, by = "Year")

#Convictions/Overdoses

convictions_filtered <- Convictions %>%
  filter(Sentence.Year >= 2018, Sentence.Year <= 2025)


#Create aggregated count of convictions by month for the entire dataset time period
conv_sum <- Convictions %>%
  filter(!is.na(`Month Code`)) %>%
  group_by(`Month Code`) %>%
  summarise(Conviction_Count = n()) %>%
  arrange(`Month Code`)
library(dplyr)

combined_df_3 <- conv_by_mon %>%
  left_join(State_OD_mon,       by = "Month Code") 

#Filter out prior to 1999

combined_df_3 <- combined_df_3 %>%
  mutate(Month_Date = as.Date(paste0(`Month Code`, "/01"), format = "%Y/%m/%d"))

combined_df_3_filtered <- combined_df_3 %>%
  filter(Month_Date >= as.Date("1999-01-01")) #because this is as far back as the OD data goes back

#Add population to the dataset

combined_df_3 <-combined_df_3 %>%
  left_join(pop_by_year, by = "Year")
combined_df_3_filtered <- combined_df_3_filtered %>%
  left_join(pop_by_year, by = "Year")
combined_yearly <- deaths_by_year_tot %>%
  left_join(convictions_by_year,       by = "Year") 

#filter dates

combined_yearly <- combined_yearly %>%
   filter(Year >= as.Date(1999)) 

# Add 0's for convictions for N/A
combined_yearly <- combined_yearly %>%
  replace_na(list(
    Conviction_Count = 0
  ))
library(ggplot2)

scale_factor <- 20  # Adjust to visually match scales

ggplot(combined_yearly, aes(x = Year)) +
  geom_line(aes(y = Count, color = "Overdose Deaths"), size = 1.2) +
  geom_line(aes(y = Conviction_Count * scale_factor, color = "Convictions"), size = 1.2) +
  scale_y_continuous(
    name = "Overdose Deaths",
    sec.axis = sec_axis(~./scale_factor, name = "Convictions")
  ) +
  scale_color_manual(
    values = c("Overdose Deaths" = "blue", "Convictions" = "red")
  ) +
  labs(
    title = "Overdose Deaths vs Convictions by Year",
    x = "Year",
    color = "Metric"
  ) +
  theme_minimal()

State – further statistical analysis

Arrests and Overdose deaths

Correlation

Correlations between arrests & overdose deaths since data is count – ran a spearman rank correlation

cor.test(Arrest_OD_month$Arrest_Count,
         Arrest_OD_month$Deaths,
         method = "spearman")
## Warning in cor.test.default(Arrest_OD_month$Arrest_Count,
## Arrest_OD_month$Deaths, : Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  Arrest_OD_month$Arrest_Count and Arrest_OD_month$Deaths
## S = 113708, p-value = 0.02496
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2287815

There was a statistically significant, positive association between the two variables, ρ = 0.229, p = .025. This suggests that months with higher numbers of arrests tend to also have higher numbers of overdose deaths.

###Poisson Model Arrest_OD_month

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Poisson regression: model overdose deaths as count
poisson_model1 <- glm(
  Deaths ~ Arrest_Count + Population,
  family = poisson(link = "log"),
  data = Arrest_OD_month
)

summary(poisson_model1)
## 
## Call:
## glm(formula = Deaths ~ Arrest_Count + Population, family = poisson(link = "log"), 
##     data = Arrest_OD_month)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   8.405e+00  3.181e-01  26.423   <2e-16 ***
## Arrest_Count  4.468e-03  2.332e-03   1.916   0.0554 .  
## Population   -3.296e-07  3.448e-08  -9.557   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 686.71  on 95  degrees of freedom
## Residual deviance: 551.89  on 93  degrees of freedom
## AIC: 1252.9
## 
## Number of Fisher Scoring iterations: 4
exp(coef(poisson_model1))
##  (Intercept) Arrest_Count   Population 
## 4470.6019937    1.0044780    0.9999997
# Overdispersion test
overdispersion <- sum(residuals(poisson_model1, type = "pearson")^2) / df.residual(poisson_model1)
overdispersion
## [1] 5.718123

Model: Deaths ~ Arrest_Count + Population

Overdispersion Detected - 5.718123 (A value > 1 means variance > mean → Poisson assumptions are violated)

A Poisson regression model indicated that arrest counts were marginally associated with overdose deaths (p = .055), with a 1-unit increase in arrests associated with a 0.45% increase in expected deaths. Population size, however, was negatively associated with deaths and reached statistical significance (p < .001). Despite these findings, the model exhibited substantial overdispersion (dispersion = 5.72), suggesting that the Poisson assumptions were violated and that standard errors and significance tests may be unreliable.

Negative-Binomial model may be more appropriate for model.

Negative Binomial Model

library(MASS)
nb_model <- glm.nb(Deaths ~ Arrest_Count + Population, data = Arrest_OD_month)
summary(nb_model)
## 
## Call:
## glm.nb(formula = Deaths ~ Arrest_Count + Population, data = Arrest_OD_month, 
##     init.theta = 43.83360034, link = log)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   8.601e+00  7.852e-01  10.954  < 2e-16 ***
## Arrest_Count  5.112e-03  5.909e-03   0.865    0.387    
## Population   -3.513e-07  8.496e-08  -4.135 3.55e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(43.8336) family taken to be 1)
## 
##     Null deviance: 121.922  on 95  degrees of freedom
## Residual deviance:  98.233  on 93  degrees of freedom
## AIC: 974.65
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  43.83 
##           Std. Err.:  7.70 
## 
##  2 x log-likelihood:  -966.651
# Overdispersion test
overdispersion2 <- sum(residuals(nb_model, type = "pearson")^2) / df.residual(nb_model)
overdispersion2
## [1] 0.9765794

glm.nb(Deaths ~ Arrest_Count + Population, data = Arrest_OD_month)

Intercept | 8.601 | 5433.75 | < .001 | Baseline predicted deaths |
Arrest_Count | 0.00511 | 1.0051 | 0.387 | Not significant — arrests don’t predict deaths |
Population | -3.51e-07 | ~0.99999965 | < .001 | Significant negative association | |

overdispersion2= 0.9765794, no overdispersion

AIC = 974.65 — much lower than Poisson model (1252.9), so NB is clearly a better fit.

A Negative Binomial regression model was fitted to examine the association between monthly overdose deaths, arrest counts, and population size, addressing overdispersion observed in the earlier Poisson model. Arrest counts were not statistically significant (β = 0.0051, p = .387), suggesting no meaningful association between arrests and overdose deaths in the observed data.

The model showed no evidence of overdispersion (dispersion = 0.98), indicating a good fit. Compared to the Poisson model, the negative binomial regression provided a substantially improved fit (AIC = 974.65 vs. 1252.9), confirming it as the more appropriate model for overdispersed count data.

Negative Binomial Model with 1 month time lag.

library(dplyr)

Arrest_OD_month <- Arrest_OD_month %>%
  mutate(Arrest_Count_lag1 = lag(Arrest_Count, n = 1))

library(MASS)

nb_model_lag <- glm.nb(Deaths ~ Arrest_Count_lag1 + Population, data = Arrest_OD_month)
summary(nb_model_lag)
## 
## Call:
## glm.nb(formula = Deaths ~ Arrest_Count_lag1 + Population, data = Arrest_OD_month, 
##     init.theta = 42.94998848, link = log)
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        8.818e+00  7.921e-01  11.133  < 2e-16 ***
## Arrest_Count_lag1  1.081e-03  6.001e-03   0.180    0.857    
## Population        -3.735e-07  8.569e-08  -4.359 1.31e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(42.95) family taken to be 1)
## 
##     Null deviance: 119.530  on 94  degrees of freedom
## Residual deviance:  97.187  on 92  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 965.97
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  42.95 
##           Std. Err.:  7.56 
## 
##  2 x log-likelihood:  -957.967
Term Estimate exp(β) p-value Interpretation
Intercept 8.818 6,760.39 < .001 Baseline deaths
Arrest_Count_lag1 0.00108 1.0011 0.857 Not significant
Population -3.735e-07 ~0.9999996 < .001 Significant

A negative binomial regression was conducted to test whether lagged arrest counts predict monthly overdose deaths, while accounting for population size. Arrest counts were lagged by one month to assess potential delayed effects. The model revealed that lagged arrests were not significantly associated with overdose deaths (β = 0.00108, p = .857), indicating no measurable delayed impact. Population size remained a significant negative predictor (β = -3.735e-07, p < .001).

The model demonstrated good fit to the data, with a dispersion statistic near 1 and a lower AIC (965.97) compared to the non-lagged model. These findings suggest that short-term changes in arrests do not meaningfully influence monthly overdose mortality, even when accounting for temporal lag of 1 month.

Convictions and Overdose Deaths

library(dplyr)

conv_data <- combined_df_3_filtered %>%
  dplyr::select(Month_Date, Year, Deaths, Conv_Count, Population) %>%
  arrange(Month_Date) %>%
  mutate(Conv_Count_lag1 = lag(Conv_Count, 1))

Correlation

cor.test(conv_data$Conv_Count,
         conv_data$Deaths,
         method = "spearman")
## Warning in cor.test.default(conv_data$Conv_Count, conv_data$Deaths, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  conv_data$Conv_Count and conv_data$Deaths
## S = 2323669, p-value = 4.469e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4102266

Poisson Regression

poisson_conv <- glm(
  Deaths ~ Conv_Count + Population,
  family = poisson(link = "log"),
  data = conv_data
)

summary(poisson_conv)
## 
## Call:
## glm(formula = Deaths ~ Conv_Count + Population, family = poisson(link = "log"), 
##     data = conv_data)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.283e+01  2.302e-01  -55.75   <2e-16 ***
## Conv_Count   4.470e-02  2.567e-03   17.42   <2e-16 ***
## Population   1.984e-06  2.604e-08   76.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 14017.4  on 286  degrees of freedom
## Residual deviance:  5190.6  on 284  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 7029.4
## 
## Number of Fisher Scoring iterations: 4
# Overdispersion check
overdispersion_conv <- sum(residuals(poisson_conv, type = "pearson")^2) / df.residual(poisson_conv)
overdispersion_conv
## [1] 19.11486

Negative Binomial Model

library(MASS)

nb_conv <- glm.nb(
  Deaths ~ Conv_Count + Population,
  data = conv_data
)

summary(nb_conv)
## 
## Call:
## glm.nb(formula = Deaths ~ Conv_Count + Population, data = conv_data, 
##     init.theta = 6.541899928, link = log)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.394e+01  1.008e+00 -13.831  < 2e-16 ***
## Conv_Count   6.506e-02  1.395e-02   4.665 3.09e-06 ***
## Population   2.106e-06  1.150e-07  18.305  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(6.5419) family taken to be 1)
## 
##     Null deviance: 782.51  on 286  degrees of freedom
## Residual deviance: 294.78  on 284  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 2931.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  6.542 
##           Std. Err.:  0.576 
## 
##  2 x log-likelihood:  -2923.581
# Overdispersion check
overdispersion_nb <- sum(residuals(nb_conv, type = "pearson")^2) / df.residual(nb_conv)
overdispersion_nb
## [1] 0.989071

Deaths ~ Conv_Count + Population

Term Estimate z p-value
Intercept -13.94 -13.44 < .001 ***
Conv_Count 0.06603 4.60 < .001 ***
Population 2.11e-06 17.78 < .001 ***
AIC: 2955.9
Dispersion ~0.95 acceptable

Lagged Negative Binomial Model

nb_conv_lag <- glm.nb(
  Deaths ~ Conv_Count_lag1 + Population,
  data = conv_data
)

summary(nb_conv_lag)
## 
## Call:
## glm.nb(formula = Deaths ~ Conv_Count_lag1 + Population, data = conv_data, 
##     init.theta = 6.513924753, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.407e+01  1.014e+00 -13.877  < 2e-16 ***
## Conv_Count_lag1  6.440e-02  1.394e-02   4.619 3.85e-06 ***
## Population       2.121e-06  1.157e-07  18.328  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(6.5139) family taken to be 1)
## 
##     Null deviance: 775.69  on 285  degrees of freedom
## Residual deviance: 293.71  on 283  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 2923.8
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  6.514 
##           Std. Err.:  0.574 
## 
##  2 x log-likelihood:  -2915.822
# Overdispersion check
overdispersion_nb2 <- sum(residuals(nb_conv_lag, type = "pearson")^2) / df.residual(nb_conv_lag)
overdispersion_nb2
## [1] 0.9911637

Deaths ~ Conv_Count_lag1 + Population | Term | Estimate | z | p-value | | ————— | ——– | —— | ———— | | Intercept | -14.07 | -13.49 | < .001 *** | | Conv_Count_lag1 | 0.06538 | 4.55 | < .001 *** | | Population | 2.12e-06 | 17.81 | < .001 *** | | AIC: | 2948.1 | | | | Dispersion | ~0.95 | | ✔ acceptable |

# For the NB model without lag
exp_coef_nb <- exp(coef(nb_conv))
exp_coef_nb
##  (Intercept)   Conv_Count   Population 
## 8.873064e-07 1.067222e+00 1.000002e+00
# For the lagged NB model
exp_coef_lag <- exp(coef(nb_conv_lag))
exp_coef_lag
##     (Intercept) Conv_Count_lag1      Population 
##    7.774756e-07    1.066519e+00    1.000002e+00
# Confidence intervals (exp) for the original NB model
exp_confint_nb <- exp(confint(nb_conv))
## Waiting for profiling to be done...
# Confidence intervals (exp) for the lagged NB model
exp_confint_lag <- exp(confint(nb_conv_lag))
## Waiting for profiling to be done...
anova(nb_conv_lag, nb_conv, test = "Chisq")
## Likelihood ratio tests of Negative Binomial Models
## 
## Response: Deaths
##                          Model    theta Resid. df    2 x log-lik.   Test    df
## 1      Conv_Count + Population 6.541900       284       -2923.581             
## 2 Conv_Count_lag1 + Population 6.513925       283       -2915.822 1 vs 2     1
##   LR stat.     Pr(Chi)
## 1                     
## 2 7.759112 0.005344223
library(DHARMa)
## This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
# For original model
sim1 <- simulateResiduals(nb_conv)
plot(sim1)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully

# For lagged model
sim2 <- simulateResiduals(nb_conv_lag)
plot(sim2)

Spearman’s rank correlation showed a moderate positive relationship between convictions and overdose mortality (ρ = 0.41, p < .001).

Poisson regression was initially attempted but found to be highly overdispersed (ϕ = 19.4), justifying the use of a negative binomial model (ϕ ≈ 0.95).

A series of negative binomial regression models were fit to examine the association between conviction counts and overdose deaths from 1999 to 2024.

In the adjusted model without a lag, convictions were significantly associated with overdose deaths (β = 0.066, p < .001), controlling for population size. This association remained statistically significant when a 1-month lag of conviction counts was introduced (β = 0.065, p < .001), suggesting that monthly changes in convictions may predict subsequent changes in overdose mortality. In both models, population size was a robust predictor of overdose deaths (p < .001). Model fit improved with the lagged specification (AIC = 2948.1 vs. 2955.9).

In the Negative Binomial model, the incidence rate ratio (IRR) for conviction counts was 1.07 (IRR = exp(0.066)), indicating that each additional conviction in a given month was associated with a 7% increase in the expected count of overdose deaths, holding population constant.

In the lagged model, the IRR for 1-month lagged convictions was 1.068, again showing a 6.8% increase in overdose deaths for every one-unit increase in convictions from the prior month. This suggests a potential short-term temporal association.

The IRR for population was approximately 1.000002, indicating that increases in population were also associated with slightly higher overdose deaths, as expected in population-scaled outcomes.

library(dplyr)
library(gt)

# Install broom 
# install.packages("broom")

library(broom)

# Tidy the model and exponentiate coefficients + CI
conv_nb_lag_table <- tidy(nb_conv_lag, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)

# Display the table
print(conv_nb_lag_table)
## # A tibble: 3 × 7
##   term               estimate   std.error statistic  p.value  conf.low conf.high
##   <chr>                 <dbl>       <dbl>     <dbl>    <dbl>     <dbl>     <dbl>
## 1 (Intercept)     0.000000777 1.01           -13.9  8.76e-44   1.09e-7   5.52e-6
## 2 Conv_Count_lag1 1.07        0.0139           4.62 3.85e- 6   1.04e+0   1.10e+0
## 3 Population      1.00        0.000000116     18.3  4.91e-75   1.00e+0   1.00e+0
conv_nb_lag_table %>%
  mutate(across(estimate:conf.high, ~ round(.x, 6))) %>%
  rename(
    Term = term,
    IRR = estimate,
    `CI Lower` = conf.low,
    `CI Upper` = conf.high,
    `P-Value` = p.value
  ) %>%
  gt() %>%
  tab_header(
    title = "Incidence Rate Ratios (IRRs) from Lagged Negative Binomial Model"
  )
Incidence Rate Ratios (IRRs) from Lagged Negative Binomial Model
Term IRR std.error statistic P-Value CI Lower CI Upper
(Intercept) 0.000001 1.013723 -13.876779 0e+00 0.000000 0.000006
Conv_Count_lag1 1.066519 0.013942 4.619333 4e-06 1.037923 1.096825
Population 1.000002 0.000000 18.328406 0e+00 1.000002 1.000002

Controlling for COVID

controlling for COVID-era effects using a factor variable, which allows the model to compare multiple time periods to the reference category (2023/2024).

conv_data <- conv_data %>%
  mutate(
    COVID_era = case_when(
      Year < 2020 ~ "Pre",
      Year == 2020 ~ "2020",
      Year == 2021 ~ "2021",
      Year == 2022 ~ "2022",
      TRUE ~ "2023_24"
    )
  )
nb_conv_covid_fe <- glm.nb(
  Deaths ~ Conv_Count_lag1 + Population + factor(COVID_era),
  data = conv_data
)
nb_conv_covid_fe
## 
## Call:  glm.nb(formula = Deaths ~ Conv_Count_lag1 + Population + factor(COVID_era), 
##     data = conv_data, init.theta = 7.834860144, link = log)
## 
## Coefficients:
##           (Intercept)        Conv_Count_lag1             Population  
##            -1.490e+01              6.917e-02              2.281e-06  
## factor(COVID_era)2021  factor(COVID_era)2022   factor(COVID_era)Pre  
##            -8.295e-01             -9.871e-01             -5.897e-01  
## 
## Degrees of Freedom: 285 Total (i.e. Null);  280 Residual
##   (2 observations deleted due to missingness)
## Null Deviance:       920.9 
## Residual Deviance: 295.3     AIC: 2882

A final model was estimated including time-period fixed effects to account for the COVID-19 pandemic and its aftermath. The results show that lagged convictions remain significantly associated with overdose deaths, even after controlling for population and temporal factors (IRR = 1.073, p < .001). Each additional conviction in the prior month was associated with a 7.3% increase in overdose deaths (IRR = 1.073, 95% CI: [1.044145, 1.102787], p < 0.001), controlling for population and pandemic time periods.

Overdose deaths were significantly lower in pre-COVID (IRR = 0.55, 95% CI: …), as well as in the early pandemic years of 2021 (IRR = 0.44) and 2022 (IRR = 0.37), relative to the 2023–2024 period.

# Load required packages
library(broom)
library(dplyr)
library(gt)

# Step 1: Extract IRRs + Confidence Intervals
covid_nb_table <- tidy(nb_conv_covid_fe, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)

# Step 2: Format for clean presentation
covid_nb_table %>%
  mutate(across(estimate:conf.high, ~ round(.x, 6))) %>%
  rename(
    Term = term,
    IRR = estimate,
    `CI Lower` = conf.low,
    `CI Upper` = conf.high,
    `P-Value` = p.value
  ) %>%
  gt() %>%
  tab_header(
    title = "Incidence Rate Ratios (IRRs) from COVID-Adjusted Negative Binomial Model"
  )
Incidence Rate Ratios (IRRs) from COVID-Adjusted Negative Binomial Model
Term IRR std.error statistic P-Value CI Lower CI Upper
(Intercept) 0.000000 1.189977 -12.523406 0 0.000000 0.000003
Conv_Count_lag1 1.071613 0.013373 5.172089 0 1.044087 1.100659
Population 1.000002 0.000000 17.044558 0 1.000002 1.000003
factor(COVID_era)2021 0.436256 0.156688 -5.294128 0 0.321383 0.592213
factor(COVID_era)2022 0.372670 0.158385 -6.232041 0 0.273592 0.507602
factor(COVID_era)Pre 0.554513 0.109757 -5.372464 0 0.445103 0.682471

County-Level Analysis

library(dplyr)

Overdose %>%
  filter(Deaths == "Suppressed") %>%
  tally()
## # A tibble: 1 × 1
##       n
##   <int>
## 1    60
#total observations in dataset

library(dplyr)

Overdose %>%
  tally()
## # A tibble: 1 × 1
##       n
##   <int>
## 1   525

60 out of 525, or 11.4% of the overdose observations per month by county is suppressed.

# Standardize County names in Overdose dataframe
Overdose <- Overdose %>%
  mutate(County = gsub(" County, NJ", "", County))

# 1. Convictions by county and year
convictions_by_county_year <- Convictions %>%
  filter(!is.na(Sentence.Year)) %>%
  group_by(County, Year = Sentence.Year) %>%
  summarise(Conviction_Count = n(), .groups = "drop")

# 2. Arrests by county and year
arrests_by_county_year <- Arrest %>%
  group_by(County, Year = `Arrest Year`) %>%
  summarise(Arrest_Count = n(), .groups = "drop")


# 3. Combine into one dataset
combined_df_county <- Overdose %>%
  left_join(arrests_by_county_year, by = c("County", "Year")) %>%
  left_join(convictions_by_county_year, by = c("County", "Year")) %>%
  mutate(
   Arrest_Count = ifelse(is.na(Arrest_Count) & Year >= 2017, 0, Arrest_Count),
    Conviction_Count = ifelse(is.na(Conviction_Count), 0, Conviction_Count)
  )
#Midpoint Method (setting them all to 5 which is the median)
combined_df_county_md <- combined_df_county %>%
  mutate(
    Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths),       # Step 1: Set Suppressed to NA
    Deaths_num = as.numeric(Deaths_clean),                            # Step 2: Convert remaining to numeric
    Deaths_num = ifelse(is.na(Deaths_num), 5, Deaths_num)             # Step 3: Set NAs (formerly Suppressed) to 5
  )
#Sensitivity Analysis
##lower bound
df_lower <- combined_df_county %>%
  mutate(
    Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths),       # Step 1: Set Suppressed to NA
    Deaths_num = as.numeric(Deaths_clean),                            # Step 2: Convert remaining to numeric
    Deaths_num = ifelse(is.na(Deaths_num), 1, Deaths_num)             # Step 3: Set NAs (formerly Suppressed) to 1
  )

##upper bound
df_upper <- combined_df_county %>%
  mutate(
    Deaths_clean = ifelse(Deaths == "Suppressed", NA, Deaths),       # Step 1: Set Suppressed to NA
    Deaths_num = as.numeric(Deaths_clean),                            # Step 2: Convert remaining to numeric
    Deaths_num = ifelse(is.na(Deaths_num), 9, Deaths_num)             # Step 3: Set NAs (formerly Suppressed) to 9
  )
# Load required libraries
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(truncnorm)
library(dplyr)

# Step 1: Prepare dataset and flag suppressed deaths as NA
combined_df_county_MI <- combined_df_county %>%
  mutate(
    Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths)),
    County = as.factor(County) 
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
##   as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Define custom truncated Poisson-style imputation function
mice.impute.truncated_poisson <- function(y, ry, x, ...) {
  n_miss <- sum(!ry)
  mean_est <- 5  # fixed midpoint-based mean
  imputed <- round(rtruncnorm(n = n_miss, a = 1, b = 9, mean = mean_est, sd = 2))
  return(imputed)
}

# Step 3: Specify imputation methods
mice_methods <- make.method(combined_df_county_MI)
mice_methods["Deaths_num"] <- "truncated_poisson"

# Step 4: Build predictor matrix — only safe variables
mice_pred <- make.predictorMatrix(combined_df_county_MI)
mice_pred[,] <- 0  # turn off all predictors by default
mice_pred["Deaths_num", "Population"] <- 1  # use population to inform imputation
# Optional: include County if it's a factor and not a primary predictor
mice_pred["Deaths_num", "County"] <- 1

# Step 5: Run multiple imputation
imp <- mice(
  combined_df_county_MI,
  method = mice_methods,
  predictorMatrix = mice_pred,
  m = 5,
  seed = 123
)
## 
##  iter imp variable
##   1   1  Arrest_Count  Deaths_num
##   1   2  Arrest_Count  Deaths_num
##   1   3  Arrest_Count  Deaths_num
##   1   4  Arrest_Count  Deaths_num
##   1   5  Arrest_Count  Deaths_num
##   2   1  Arrest_Count  Deaths_num
##   2   2  Arrest_Count  Deaths_num
##   2   3  Arrest_Count  Deaths_num
##   2   4  Arrest_Count  Deaths_num
##   2   5  Arrest_Count  Deaths_num
##   3   1  Arrest_Count  Deaths_num
##   3   2  Arrest_Count  Deaths_num
##   3   3  Arrest_Count  Deaths_num
##   3   4  Arrest_Count  Deaths_num
##   3   5  Arrest_Count  Deaths_num
##   4   1  Arrest_Count  Deaths_num
##   4   2  Arrest_Count  Deaths_num
##   4   3  Arrest_Count  Deaths_num
##   4   4  Arrest_Count  Deaths_num
##   4   5  Arrest_Count  Deaths_num
##   5   1  Arrest_Count  Deaths_num
##   5   2  Arrest_Count  Deaths_num
##   5   3  Arrest_Count  Deaths_num
##   5   4  Arrest_Count  Deaths_num
##   5   5  Arrest_Count  Deaths_num
## Warning: Number of logged events: 1
# Step 6: Get completed dataset
# Option A: View all imputations (long format)
completed_data_long <- complete(imp, "long")

# Option B: Use one completed dataset (e.g., the first)
imputed_df_MI <- complete(imp, 1)
library(ggplot2)

completed_data_long %>%
  ggplot(aes(x = Deaths_num)) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
  facet_wrap(~.imp) +
  labs(
    title = "Distribution of Imputed Deaths_num Values",
    x = "Imputed Deaths",
    y = "Frequency"
  )

# Load required libraries
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
## 
##     ar
library(dplyr)

# Step 1: Convert 'Deaths' to numeric with NAs for "Suppressed"
combined_df_county <- combined_df_county %>%
  mutate(
    Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths))
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
##   as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Subset to observed (non-missing) death counts
df_obs <- combined_df_county %>%
  filter(!is.na(Deaths_num))


#Step 3: Scale Population and Year
df_obs <- df_obs %>%
  mutate(
    Year_scaled = scale(Year),
    Pop_scaled = scale(Population),
    County = factor(County)
  )

# Step 4: Fit Bayesian Negative Binomial model (simplest first)
library(brms)

bayes_model <- brm(
  Deaths_num ~ Year_scaled + Pop_scaled,
  data = df_obs,
  family = negbinomial(),
  chains = 2,
  iter = 2000,
  seed = 123,
  control = list(adapt_delta = 0.95)
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 6.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.63 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.212 seconds (Warm-up)
## Chain 1:                0.196 seconds (Sampling)
## Chain 1:                0.408 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 2.8e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.207 seconds (Warm-up)
## Chain 2:                0.193 seconds (Sampling)
## Chain 2:                0.4 seconds (Total)
## Chain 2:
# Step 5: Subset missing
df_missing <- combined_df_county %>%
  filter(is.na(Deaths_num)) %>%
  mutate(
    County = factor(County, levels = levels(df_obs$County)),
    Year_scaled = scale(Year,
                        center = attr(df_obs$Year_scaled, "scaled:center"),
                        scale = attr(df_obs$Year_scaled, "scaled:scale")),
    Pop_scaled = scale(Population,
                       center = attr(df_obs$Pop_scaled, "scaled:center"),
                       scale = attr(df_obs$Pop_scaled, "scaled:scale"))
  )

# Step 6: Predict and constrain to [1, 9]
predicted_draws <- posterior_predict(bayes_model, newdata = df_missing)
set.seed(123)
imputed_values <- apply(predicted_draws, 2, function(x) sample(x, 1))
imputed_values <- pmin(pmax(imputed_values, 1), 9)

# Step 7: Insert into original data
combined_df_county_Bay1 <- combined_df_county
combined_df_county_Bay1$Deaths_num[is.na(combined_df_county_Bay1$Deaths_num)] <- imputed_values
summary(bayes_model)
##  Family: negbinomial 
##   Links: mu = log 
## Formula: Deaths_num ~ Year_scaled + Pop_scaled 
##    Data: df_obs (Number of observations: 465) 
##   Draws: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 2000
## 
## Regression Coefficients:
##             Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept       4.12      0.02     4.08     4.17 1.00     3023     1666
## Year_scaled     0.51      0.02     0.47     0.55 1.00     2556     1369
## Pop_scaled      0.51      0.03     0.46     0.56 1.00     2363     1326
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     4.67      0.31     4.07     5.30 1.00     2414     1664
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Rhat = 1.00 for all parameters → indicates convergence.

Bulk_ESS and Tail_ESS > 1000 for all → good effective sample sizes.

No warnings → model ran smoothly.

plot(bayes_model)       # Trace + density

pp_check(bayes_model)   # Posterior predictive checks
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

Matches the shape of the observed data.

Captures the overdispersion well.

Is producing plausible predictions

#Check imputation distribution for Bayesian pooling
hist(imputed_values, breaks = 9, col = "orange", main = "Imputed Deaths_num via Bayesian Model")

Issue here is that there are no 0’s but that could be ok…

# Histogram before applying pmin/pmax
hist(as.vector(apply(predicted_draws, 2, function(x) sample(x, 1))),
     main = "Posterior Samples Before Truncation",
     xlab = "Raw predicted values")

fitted_vals <- fitted(bayes_model, newdata = df_missing)
summary(fitted_vals)
##     Estimate        Est.Error           Q2.5             Q97.5       
##  Min.   : 11.14   Min.   :0.7248   Min.   :  9.776   Min.   : 12.58  
##  1st Qu.: 14.47   1st Qu.:0.8201   1st Qu.: 12.929   1st Qu.: 16.09  
##  Median : 20.52   Median :0.9837   Median : 18.657   Median : 22.41  
##  Mean   : 26.25   Mean   :1.1636   Mean   : 24.040   Mean   : 28.54  
##  3rd Qu.: 26.30   3rd Qu.:1.1268   3rd Qu.: 24.055   3rd Qu.: 28.54  
##  Max.   :130.51   Max.   :6.6528   Max.   :118.169   Max.   :144.51
# Load required libraries
library(brms)
library(dplyr)

# Step 1: Create new dataset to preserve original
combined_df_county_Bay2 <- combined_df_county %>%
  mutate(
    Deaths_num = ifelse(Deaths == "Suppressed", NA, as.numeric(Deaths))
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = ifelse(Deaths == "Suppressed", NA,
##   as.numeric(Deaths))`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion
# Step 2: Subset non-missing observations
df_obs_Bay2 <- combined_df_county_Bay2 %>%
  filter(!is.na(Deaths_num)) %>%
  mutate(
    Year_scaled = scale(Year),
    Pop_scaled = scale(Population),
    County = factor(County)  # Ensure County is a factor
  )

# Step 3: Fit Bayesian model with County as a random effect
bayes_model_Bay2 <- brm(
  Deaths_num ~ Year_scaled + Pop_scaled + (1 | County),
  data = df_obs_Bay2,
  family = negbinomial(),
  chains = 4,
  iter = 3000,
  seed = 123,
  control = list(adapt_delta = 0.95)
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000132 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.32 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 1: Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 1: Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 1: Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 1: Iteration: 1200 / 3000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1500 / 3000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1501 / 3000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1800 / 3000 [ 60%]  (Sampling)
## Chain 1: Iteration: 2100 / 3000 [ 70%]  (Sampling)
## Chain 1: Iteration: 2400 / 3000 [ 80%]  (Sampling)
## Chain 1: Iteration: 2700 / 3000 [ 90%]  (Sampling)
## Chain 1: Iteration: 3000 / 3000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 3.563 seconds (Warm-up)
## Chain 1:                2.853 seconds (Sampling)
## Chain 1:                6.416 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 4.2e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.42 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 2: Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 2: Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 2: Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 2: Iteration: 1200 / 3000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1500 / 3000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1501 / 3000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1800 / 3000 [ 60%]  (Sampling)
## Chain 2: Iteration: 2100 / 3000 [ 70%]  (Sampling)
## Chain 2: Iteration: 2400 / 3000 [ 80%]  (Sampling)
## Chain 2: Iteration: 2700 / 3000 [ 90%]  (Sampling)
## Chain 2: Iteration: 3000 / 3000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 3.193 seconds (Warm-up)
## Chain 2:                2.738 seconds (Sampling)
## Chain 2:                5.931 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 5.8e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 3: Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 3: Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 3: Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 3: Iteration: 1200 / 3000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1500 / 3000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1501 / 3000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1800 / 3000 [ 60%]  (Sampling)
## Chain 3: Iteration: 2100 / 3000 [ 70%]  (Sampling)
## Chain 3: Iteration: 2400 / 3000 [ 80%]  (Sampling)
## Chain 3: Iteration: 2700 / 3000 [ 90%]  (Sampling)
## Chain 3: Iteration: 3000 / 3000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 3.194 seconds (Warm-up)
## Chain 3:                2.765 seconds (Sampling)
## Chain 3:                5.959 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4.9e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.49 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 3000 [  0%]  (Warmup)
## Chain 4: Iteration:  300 / 3000 [ 10%]  (Warmup)
## Chain 4: Iteration:  600 / 3000 [ 20%]  (Warmup)
## Chain 4: Iteration:  900 / 3000 [ 30%]  (Warmup)
## Chain 4: Iteration: 1200 / 3000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1500 / 3000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1501 / 3000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1800 / 3000 [ 60%]  (Sampling)
## Chain 4: Iteration: 2100 / 3000 [ 70%]  (Sampling)
## Chain 4: Iteration: 2400 / 3000 [ 80%]  (Sampling)
## Chain 4: Iteration: 2700 / 3000 [ 90%]  (Sampling)
## Chain 4: Iteration: 3000 / 3000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 3.234 seconds (Warm-up)
## Chain 4:                2.887 seconds (Sampling)
## Chain 4:                6.121 seconds (Total)
## Chain 4:
# Step 4: Subset missing values and apply scaling
df_missing_Bay2 <- combined_df_county_Bay2 %>%
  filter(is.na(Deaths_num)) %>%
  mutate(
    County = factor(County, levels = levels(df_obs_Bay2$County)),  # match levels
    Year_scaled = scale(Year,
                        center = attr(df_obs_Bay2$Year_scaled, "scaled:center"),
                        scale = attr(df_obs_Bay2$Year_scaled, "scaled:scale")),
    Pop_scaled = scale(Population,
                       center = attr(df_obs_Bay2$Pop_scaled, "scaled:center"),
                       scale = attr(df_obs_Bay2$Pop_scaled, "scaled:scale"))
  )

# Step 5: Posterior prediction and imputation
predicted_draws_Bay2 <- posterior_predict(bayes_model_Bay2, newdata = df_missing_Bay2)

set.seed(123)
imputed_values_Bay2 <- apply(predicted_draws_Bay2, 2, function(x) sample(x, 1))
imputed_values_Bay2 <- pmin(pmax(imputed_values_Bay2, 1), 9)  # constrain to 1–9

# Step 6: Fill in the imputed values
combined_df_county_Bay2$Deaths_num[is.na(combined_df_county_Bay2$Deaths_num)] <- imputed_values_Bay2
#Check imputation distribution for Bayesian pooling
hist(imputed_values_Bay2, breaks = 9, col = "orange", main = "Imputed Deaths_num via Bayesian Model 2")

summary(bayes_model)
##  Family: negbinomial 
##   Links: mu = log 
## Formula: Deaths_num ~ Year_scaled + Pop_scaled 
##    Data: df_obs (Number of observations: 465) 
##   Draws: 2 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 2000
## 
## Regression Coefficients:
##             Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept       4.12      0.02     4.08     4.17 1.00     3023     1666
## Year_scaled     0.51      0.02     0.47     0.55 1.00     2556     1369
## Pop_scaled      0.51      0.03     0.46     0.56 1.00     2363     1326
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     4.67      0.31     4.07     5.30 1.00     2414     1664
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(bayes_model_Bay2)       # Trace + density

pp_check(bayes_model_Bay2)   # Posterior predictive checks
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

In the provided overdose data, 60 out of 525 county-month observations (~11.4%) were labeled as “Suppressed,” indicating that the true value ranged between 1 and 9 but was withheld for privacy reasons. Since these suppressed values represent a non-trivial portion of the dataset, appropriate imputation methods were employed to estimate plausible values while preserving statistical integrity.

We explored four different imputation strategies to address the suppressed values in the Deaths variable: 1. Midpoint Imputation (Deterministic)

All suppressed death values were replaced with the midpoint of the known range:

Value used: 5

This approach is simple and transparent but does not reflect variability in the imputed values.

Output: combined_df_county_md

2. Sensitivity Analysis

To assess the bounds of potential impact, we created two alternative datasets:

Lower-bound scenario: All suppressed values set to 1

Upper-bound scenario: All suppressed values set to 9

These datasets allow for robustness checks in downstream models by comparing how extreme assumptions influence the results.

Output:

df_lower

df_upper

3. Multiple Imputation via MICE with Truncated Distribution

A more flexible approach used the mice package to perform multiple imputation with a custom imputation function drawing from a truncated normal distribution constrained to values between 1 and 9. This method incorporates uncertainty across imputations while maintaining the known bounds.

We used Population (scaled) as a predictor. County was included as an optional random effect via its factor representation, as it serves as a control variable in future analyses.

Imputation settings: 5 imputations (m = 5)

Custom imputation function using rtruncnorm(mean = 5, sd = 2, a = 1, b = 9)

Random draws ensure variability across imputations

Output:

A long-format dataset with multiple imputations (completed_data_long)

A single completed dataset (imputed_df) was extracted for use in modeling

A histogram of the imputed values showed a plausible, near-symmetric distribution centered around the midpoint, consistent with prior expectations.

4. Bayesian Imputation via Negative Binomial Models

We fit two Bayesian models using the brms package, leveraging posterior predictive sampling to estimate the suppressed values.

A. Bayesian Model 1 (No County Effects)

Formula: Deaths_num ~ Year_scaled + Pop_scaled

Family: Negative Binomial (handles overdispersion)

Random effects: None

Dataset created: combined_df_county_Bay1

Imputation:

Posterior predictive distributions were sampled for missing rows.

A single draw per observation was taken and constrained to the 1–9 range.

Model output:

Rhat = 1.00 for all parameters → convergence achieved

Effective sample sizes (ESS) > 1000

Posterior predictive checks showed good fit to observed data

B. Bayesian Model 2 (Hierarchical: Random Intercepts by County)

Formula: Deaths_num ~ Year_scaled + Pop_scaled + (1 | County)

Family: Negative Binomial

Random effects: County (partial pooling)

Dataset created: combined_df_county_Bay2

Imputation:

Same approach as Model 1 (posterior predictions + truncation), but with better fit for county-level heterogeneity.

Model output:

Converged across all 4 chains

Posterior predictive distributions captured both mean and dispersion

Histogram of imputed values showed reasonable spread (1–9), with central tendency near observed mean

Summary of Imputation Methods

Method Variables Used Output Dataset
Midpoint (Set all to 5) combined_df_county_md
Sensitivity: Lower Bound (1) df_lower
Sensitivity: Upper Bound (9) df_upper
Multiple Imputation (MICE) Population imputed_df, completed_data_long
Bayesian Model 1 Year, Population combined_df_county_Bay1
Bayesian Model 2 (with County) Year, Population, County (random intercept) combined_df_county_Bay2

Summary of Model Justification: To address the 11.4% of observations in our dataset with suppressed overdose death counts (reported as “Suppressed” and known to fall within the range of 1 to 9), we implemented a series of imputation strategies to ensure robustness and transparency. We began with simple imputation methods, including a midpoint substitution (setting all suppressed values to 5) and sensitivity analyses using the lower and upper bounds (1 and 9, respectively). These provided boundary checks for how assumptions about suppressed values might influence our findings.

We then applied Multiple Imputation by Chained Equations (MICE) using a truncated distribution constrained between 1 and 9. In this model, we included Population and County as predictors. Population was retained as a meaningful, independent contextual variable that reflects the expected scale of overdose events. County was added as a categorical predictor to allow the imputation process to borrow information across geographic units, thereby accounting for unobserved regional variation in overdose trends.We chose not to include Year as a predictor in this imputation step to avoid conflating temporal patterns with structural variation in death reporting practices, and to reduce the risk of overfitting imputation models based on potentially nonstationary time trends. Importantly, we excluded Conviction_Count, which is a key predictor in subsequent outcome modeling, to avoid biasing those future estimates through the imputation mechanism.

Finally, we implemented two Bayesian pooling models using the brms package. The first included Year and Population as predictors to estimate the likely distribution of suppressed counts, leveraging both temporal and demographic trends. The second model added County as a random intercept to account for potential unobserved heterogeneity across counties. These models allowed us to borrow strength from the observed data while explicitly modeling uncertainty and variability in suppressed values. Each imputation approach was stored as a separate dataset for comparison and downstream analysis. We included Year as a predictor in the Bayesian imputation models because those models are explicitly designed to capture structured trends and uncertainty in the observed data.

@Eli, do you think this is sufficient? Or should I run a Bayesian imputation model without Year and compare it as well?

County/Convictions/Overdose Descriptive Statistics

I just ran this with the second Bayesian Pooling model but can re-run it with others if you all want to see it….

library(dplyr)

combined_df_county_Bay2 %>%
  group_by(County) %>%
  summarise(Total_Overdose_Deaths = sum(Deaths_num)) %>%
  arrange(desc(Total_Overdose_Deaths))
## # A tibble: 21 × 2
##    County     Total_Overdose_Deaths
##    <chr>                      <dbl>
##  1 Essex                       4019
##  2 Camden                      3881
##  3 Ocean                       2916
##  4 Middlesex                   2792
##  5 Monmouth                    2450
##  6 Bergen                      2280
##  7 Hudson                      2111
##  8 Burlington                  2061
##  9 Atlantic                    1954
## 10 Gloucester                  1910
## # ℹ 11 more rows
library(dplyr)
library(ggplot2)

# Summarize overdose deaths by county
county_death_totals <- combined_df_county_Bay2 %>%
  group_by(County) %>%
  summarise(Total_Deaths = sum(Deaths_num, na.rm = TRUE)) %>%
  arrange(desc(Total_Deaths))

# Plot the bar graph
ggplot(county_death_totals, aes(x = reorder(County, -Total_Deaths), y = Total_Deaths)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Total Overdose Deaths by County",
    x = "County",
    y = "Total Overdose Deaths"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
library(dplyr)

# Optional: arrange by descending conviction count
county_summary_conv <- county_summary_conv %>%
  arrange(desc(n))

# Plot bar chart
ggplot(county_summary_conv, aes(x = reorder(County, -n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Total Convictions by County",
    x = "County",
    y = "Number of Convictions"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Visually –by comparing the two graphs – it is clear that the counties with the largest number of overdose deaths do not have the most convictions.

library(ggplot2)
library(dplyr)

# Optional: arrange by descending arr count
county_summary_arr <- county_summary_arr %>%
  arrange(desc(n))

# Plot bar chart
ggplot(county_summary_arr, aes(x = reorder(County, -n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Total Arrests by County",
    x = "County",
    y = "Number of Arrests"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Same comments for arrests.

Modeling Convictions and Overdoses at County level

imputed_df_MI$Deaths <- as.numeric(imputed_df_MI$Deaths)
## Warning: NAs introduced by coercion
imputed_df_MI$County <- factor(imputed_df_MI$County)
run_models <- function(data, label) {
  message("===== Running models for: ", label, " =====")
  
  # Ensure required variables are correctly formatted and drop missing values
  data <- data %>%
    mutate(
      Deaths_num = as.numeric(Deaths),
      Year = as.numeric(Year),
      County = as.factor(County),
      Population = as.numeric(Population),
      Conviction_Count = as.numeric(Conviction_Count)
    ) %>%
    filter(
      !is.na(Deaths_num),
      !is.na(Conviction_Count),
      !is.na(Population)
    )
  
  # Fit Poisson regression
  poisson_model <- glm(
    Deaths_num ~ Conviction_Count + Year + Population + County,
    data = data,
    family = poisson
  )

  # Fit Negative Binomial regression
  nb_model <- MASS::glm.nb(
    Deaths_num ~ Conviction_Count + Year + Population + County,
    data = data
  )
  
  # Return AIC summary and model objects
  return(list(
    summary = data.frame(
      Method = label,
      Poisson_AIC = AIC(poisson_model),
      NegBin_AIC = AIC(nb_model)
    ),
    models = list(
      poisson = poisson_model,
      negbin = nb_model
    )
  ))
}
results_md     <- run_models(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower  <- run_models(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper  <- run_models(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi     <- run_models(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes1 <- run_models(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes2 <- run_models(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)

# Combine AIC results from all model runs
model_summaries <- bind_rows(
  results_md$summary,
  results_lower$summary,
  results_upper$summary,
  results_mi$summary,
  results_bayes1$summary,
  results_bayes2$summary
)

# Print as a markdown table
kable(model_summaries, caption = "Model Fit Comparison: AIC Values")
Model Fit Comparison: AIC Values
Method Poisson_AIC NegBin_AIC
Midpoint Imputation 5466.882 4018.577
Lower Bound Imputation 5466.882 4018.577
Upper Bound Imputation 5466.882 4018.577
Multiple Imputation 5466.882 4018.577
Bayesian Imputation 1 5466.882 4018.577
Bayesian Imputation 2 5466.882 4018.577

The table summarizes AIC values for Poisson and Negative Binomial (NB) models across six different imputation methods for suppressed overdose death counts. Across all imputation strategies, the NB models consistently demonstrate lower AIC values compared to the Poisson models, indicating a better fit — particularly important given the overdispersion typically present in count data like overdose deaths. The best-fitting model (lowest AIC) was observed for: Multiple Imputation (MICE) Bayesian Imputation 1 Bayesian Imputation 2

All three had identical AICs for both Poisson and NB models (NB AIC = 4018.58).

library(dplyr)
# Helper function to extract and label model summaries
extract_model_summary <- function(model, method_label) {
  coef_summary <- summary(model)$coefficients
  as.data.frame(coef_summary) %>%
    tibble::rownames_to_column("Term") %>%
    mutate(Method = method_label)
}

# Extract coefficient summaries from each NegBin model
coef_mi     <- extract_model_summary(results_mi$models$negbin, "Multiple Imputation")
coef_bayes1 <- extract_model_summary(results_bayes1$models$negbin, "Bayesian Imputation 1")
coef_bayes2 <- extract_model_summary(results_bayes2$models$negbin, "Bayesian Imputation 2")

# Combine all into one table
combined_coefs <- bind_rows(coef_mi, coef_bayes1, coef_bayes2)

# Optional: re-order for readability
combined_coefs <- combined_coefs %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)

filtered_coefs <- combined_coefs %>%
  filter(Term %in% c("Conviction_Count", "Year", "Population")) %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)


# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
Negative Binomial Model Coefficients: Top Imputation Methods
Method Term Estimate Std. Error z value Pr(>|z|)
Multiple Imputation Conviction_Count 0.008 0.008 1.039 0.299
Multiple Imputation Year 0.080 0.003 26.819 0.000
Multiple Imputation Population 0.000 0.000 -1.311 0.190
Bayesian Imputation 1 Conviction_Count 0.008 0.008 1.039 0.299
Bayesian Imputation 1 Year 0.080 0.003 26.819 0.000
Bayesian Imputation 1 Population 0.000 0.000 -1.311 0.190
Bayesian Imputation 2 Conviction_Count 0.008 0.008 1.039 0.299
Bayesian Imputation 2 Year 0.080 0.003 26.819 0.000
Bayesian Imputation 2 Population 0.000 0.000 -1.311 0.190
combined_coefs %>%
  filter(grepl("^County", Term)) %>%
  summarize(
    avg_effect = mean(Estimate),
    max_effect = max(Estimate),
    min_effect = min(Estimate)
  )
##    avg_effect max_effect min_effect
## 1 -0.06762637    1.56226  -1.984721

Across all three imputation strategies:

Conviction_Count was not statistically significant (p ≈ 0.299), suggesting no strong evidence that convictions are associated with monthly overdose deaths at the county level after controlling for time and population.

#Testing Colinearity of Population and County
summary(lm(Population ~ County, data = combined_df_county_Bay1))
## 
## Call:
## lm(formula = Population ~ County, data = combined_df_county_Bay1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -74724  -5586    186   4683  80466 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        268753       3504  76.708  < 2e-16 ***
## CountyBergen       646467       4955 130.472  < 2e-16 ***
## CountyBurlington   178114       4955  35.947  < 2e-16 ***
## CountyCamden       243642       4955  49.172  < 2e-16 ***
## CountyCape May    -171674       4955 -34.648  < 2e-16 ***
## CountyCumberland  -116336       4955 -23.479  < 2e-16 ***
## CountyEssex        529876       4955 106.941  < 2e-16 ***
## CountyGloucester    15276       4955   3.083  0.00216 ** 
## CountyHudson       379152       4955  76.522  < 2e-16 ***
## CountyHunterdon   -142180       4955 -28.695  < 2e-16 ***
## CountyMercer        97651       4955  19.708  < 2e-16 ***
## CountyMiddlesex    541174       4955 109.221  < 2e-16 ***
## CountyMonmouth     358691       4955  72.392  < 2e-16 ***
## CountyMorris       222895       4955  44.985  < 2e-16 ***
## CountyOcean        309978       4955  62.561  < 2e-16 ***
## CountyPassaic      232437       4955  46.911  < 2e-16 ***
## CountySalem       -204085       4955 -41.189  < 2e-16 ***
## CountySomerset      54227       4955  10.944  < 2e-16 ***
## CountySussex      -122602       4955 -24.744  < 2e-16 ***
## CountyUnion        273983       4955  55.296  < 2e-16 ***
## CountyWarren      -161313       4955 -32.557  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17520 on 504 degrees of freedom
## Multiple R-squared:  0.9953, Adjusted R-squared:  0.9951 
## F-statistic:  5342 on 20 and 504 DF,  p-value: < 2.2e-16
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
# Exclude County or reduce levels if needed
model <- glm(Deaths_num ~ County + Population, data = combined_df_county_Bay1, family = poisson)
vif(model)
##               GVIF Df GVIF^(1/(2*Df))
## County     120.727 20        1.127315
## Population 120.727  1       10.987584

I am going to run the models without Population next to avoid multicollinearity. (In the Bayesian imputation models — including one with County as a random intercept — we retained Population as a control for exposure size.)

run_models_2 <- function(data, label) {
  message("===== Running models for: ", label, " =====")
  
  # Ensure required variables are correctly formatted and drop missing values
  data <- data %>%
    mutate(
      Deaths_num = as.numeric(Deaths),
      Year = as.numeric(Year),
      County = as.factor(County),
      Conviction_Count = as.numeric(Conviction_Count)
    ) %>%
    filter(
      !is.na(Deaths_num),
      !is.na(Conviction_Count)
    )
  
  # Fit Poisson regression
  poisson_model_2 <- glm(
    Deaths_num ~ Conviction_Count + Year + County,
    data = data,
    family = poisson
  )

  # Fit Negative Binomial regression
  nb_model_2 <- MASS::glm.nb(
    Deaths_num ~ Conviction_Count + Year + County,
    data = data
  )
  
  # Return AIC summary and model objects
  return(list(
    summary = data.frame(
      Method = label,
      Poisson_AIC_2 = AIC(poisson_model_2),
      NegBin_AIC_2 = AIC(nb_model_2)
    ),
    models = list(
      poisson = poisson_model_2,
      negbin = nb_model_2
    )
  ))
}
results_md2     <- run_models_2(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower2  <- run_models_2(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper2  <- run_models_2(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi2     <- run_models_2(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes3 <- run_models_2(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes4 <- run_models_2(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)

# Combine AIC results from all model runs
model_summaries_2 <- bind_rows(
  results_md2$summary,
  results_lower2$summary,
  results_upper2$summary,
  results_mi2$summary,
  results_bayes3$summary,
  results_bayes4$summary
)

# Print as a markdown table
kable(model_summaries_2, caption = "Model Fit Comparison: AIC Values")
Model Fit Comparison: AIC Values
Method Poisson_AIC_2 NegBin_AIC_2
Midpoint Imputation 5477.669 4018.365
Lower Bound Imputation 5477.669 4018.365
Upper Bound Imputation 5477.669 4018.365
Multiple Imputation 5477.669 4018.365
Bayesian Imputation 1 5477.669 4018.365
Bayesian Imputation 2 5477.669 4018.365
library(dplyr)

# Extract coefficient summaries from each NegBin model
coef_mi2     <- extract_model_summary(results_mi2$models$negbin, "Multiple Imputation")
coef_bayes3 <- extract_model_summary(results_bayes3$models$negbin, "Bayesian Imputation 1")
coef_bayes4 <- extract_model_summary(results_bayes4$models$negbin, "Bayesian Imputation 2")

# Combine all into one table
combined_coefs2 <- bind_rows(coef_mi2, coef_bayes3, coef_bayes4)

# Optional: re-order for readability
combined_coefs2 <- combined_coefs2 %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)

filtered_coefs2 <- combined_coefs2 %>%
  filter(Term %in% c("Conviction_Count", "Year")) %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)


# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
Negative Binomial Model Coefficients: Top Imputation Methods
Method Term Estimate Std. Error z value Pr(>|z|)
Multiple Imputation Conviction_Count 0.010 0.008 1.237 0.216
Multiple Imputation Year 0.077 0.002 36.129 0.000
Bayesian Imputation 1 Conviction_Count 0.010 0.008 1.237 0.216
Bayesian Imputation 1 Year 0.077 0.002 36.129 0.000
Bayesian Imputation 2 Conviction_Count 0.010 0.008 1.237 0.216
Bayesian Imputation 2 Year 0.077 0.002 36.129 0.000

The coefficient on Conviction_Count is not statistically significant in any of the top 3 models (p = 0.216). The model fit did not worsen (same AICs), and coefficients stayed nearly identical – after removing Population

Finally, I am going to try treating County and Year as factors.

run_models_3 <- function(data, label) {
  message("===== Running models for: ", label, " =====")
  
  # Ensure required variables are correctly formatted and drop missing values
  data <- data %>%
    mutate(
      Deaths_num = as.numeric(Deaths),
      Year = as.numeric(Year),
      County = as.factor(County),
      Conviction_Count = as.numeric(Conviction_Count)
    ) %>%
    filter(
      !is.na(Deaths_num),
      !is.na(Conviction_Count)
    )
  
  # Fit Poisson regression
  poisson_model_3 <- glm(
    Deaths_num ~ Conviction_Count + factor(Year) + factor(County),
    data = data,
    family = poisson
  )

  # Fit Negative Binomial regression
  nb_model_3 <- MASS::glm.nb(
    Deaths_num ~ Conviction_Count + factor(Year) + factor(County),
    data = data
  )
  
  # Return AIC summary and model objects
  return(list(
    summary = data.frame(
      Method = label,
      Poisson_AIC_2 = AIC(poisson_model_3),
      NegBin_AIC_2 = AIC(nb_model_3)
    ),
    models = list(
      poisson = poisson_model_3,
      negbin = nb_model_3
    )
  ))
}
results_md3     <- run_models_3(combined_df_county_md, "Midpoint Imputation")
## ===== Running models for: Midpoint Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_lower3  <- run_models_3(df_lower, "Lower Bound Imputation")
## ===== Running models for: Lower Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_upper3  <- run_models_3(df_upper, "Upper Bound Imputation")
## ===== Running models for: Upper Bound Imputation =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_mi3     <- run_models_3(imputed_df_MI, "Multiple Imputation")
## ===== Running models for: Multiple Imputation =====
results_bayes5 <- run_models_3(combined_df_county_Bay1, "Bayesian Imputation 1")
## ===== Running models for: Bayesian Imputation 1 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
results_bayes6 <- run_models_3(combined_df_county_Bay2, "Bayesian Imputation 2")
## ===== Running models for: Bayesian Imputation 2 =====
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Deaths_num = as.numeric(Deaths)`.
## Caused by warning:
## ! NAs introduced by coercion
library(dplyr)
library(knitr)

# Combine AIC results from all model runs
model_summaries_4 <- bind_rows(
  results_md3$summary,
  results_lower3$summary,
  results_upper3$summary,
  results_mi3$summary,
  results_bayes5$summary,
  results_bayes6$summary
)

# Print as a markdown table
kable(model_summaries_4, caption = "Model Fit Comparison: AIC Values")
Model Fit Comparison: AIC Values
Method Poisson_AIC_2 NegBin_AIC_2
Midpoint Imputation 4073.933 3734.332
Lower Bound Imputation 4073.933 3734.332
Upper Bound Imputation 4073.933 3734.332
Multiple Imputation 4073.933 3734.332
Bayesian Imputation 1 4073.933 3734.332
Bayesian Imputation 2 4073.933 3734.332
library(dplyr)

# Extract coefficient summaries from each NegBin model
coef_mi3     <- extract_model_summary(results_mi3$models$negbin, "Multiple Imputation")
coef_bayes5 <- extract_model_summary(results_bayes5$models$negbin, "Bayesian Imputation 1")
coef_bayes6 <- extract_model_summary(results_bayes6$models$negbin, "Bayesian Imputation 2")

# Combine all into one table
combined_coefs2 <- bind_rows(coef_mi3, coef_bayes5, coef_bayes6)

# Optional: re-order for readability
combined_coefs2 <- combined_coefs2 %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)

filtered_coefs2 <- combined_coefs2 %>%
  filter(Term %in% c("Conviction_Count", "Year")) %>%
  dplyr::select(Method, Term, Estimate, `Std. Error`, `z value`, `Pr(>|z|)`)


# Optional: render nicely in RMarkdown
knitr::kable(filtered_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
Negative Binomial Model Coefficients: Top Imputation Methods
Method Term Estimate Std. Error z value Pr(>|z|)
Multiple Imputation Conviction_Count -0.002 0.005 -0.446 0.656
Bayesian Imputation 1 Conviction_Count -0.002 0.005 -0.446 0.656
Bayesian Imputation 2 Conviction_Count -0.002 0.005 -0.446 0.656
knitr::kable(combined_coefs2, digits = 3, caption = "Negative Binomial Model Coefficients: Top Imputation Methods")
Negative Binomial Model Coefficients: Top Imputation Methods
Method Term Estimate Std. Error z value Pr(>|z|)
Multiple Imputation (Intercept) 3.411 0.073 46.731 0.000
Multiple Imputation Conviction_Count -0.002 0.005 -0.446 0.656
Multiple Imputation factor(Year)2000 0.145 0.084 1.719 0.086
Multiple Imputation factor(Year)2001 0.204 0.083 2.475 0.013
Multiple Imputation factor(Year)2002 0.362 0.081 4.454 0.000
Multiple Imputation factor(Year)2003 0.240 0.082 2.931 0.003
Multiple Imputation factor(Year)2004 0.031 0.084 0.363 0.717
Multiple Imputation factor(Year)2005 0.346 0.080 4.311 0.000
Multiple Imputation factor(Year)2006 0.379 0.080 4.731 0.000
Multiple Imputation factor(Year)2007 0.236 0.082 2.867 0.004
Multiple Imputation factor(Year)2008 0.231 0.083 2.794 0.005
Multiple Imputation factor(Year)2009 -0.359 0.114 -3.153 0.002
Multiple Imputation factor(Year)2010 0.437 0.080 5.456 0.000
Multiple Imputation factor(Year)2011 0.568 0.078 7.255 0.000
Multiple Imputation factor(Year)2012 0.747 0.077 9.683 0.000
Multiple Imputation factor(Year)2013 0.818 0.077 10.614 0.000
Multiple Imputation factor(Year)2014 0.774 0.077 10.080 0.000
Multiple Imputation factor(Year)2015 0.940 0.076 12.382 0.000
Multiple Imputation factor(Year)2016 1.276 0.075 17.115 0.000
Multiple Imputation factor(Year)2017 1.545 0.074 20.940 0.000
Multiple Imputation factor(Year)2018 1.632 0.073 22.220 0.000
Multiple Imputation factor(Year)2019 1.585 0.074 21.356 0.000
Multiple Imputation factor(Year)2020 1.605 0.074 21.814 0.000
Multiple Imputation factor(Year)2021 1.663 0.074 22.586 0.000
Multiple Imputation factor(Year)2022 1.627 0.075 21.808 0.000
Multiple Imputation factor(Year)2023 1.514 0.075 20.266 0.000
Multiple Imputation factor(County)Bergen 0.226 0.058 3.911 0.000
Multiple Imputation factor(County)Burlington 0.071 0.057 1.244 0.213
Multiple Imputation factor(County)Camden 0.718 0.056 12.886 0.000
Multiple Imputation factor(County)Cape May -1.041 0.071 -14.590 0.000
Multiple Imputation factor(County)Cumberland -0.775 0.067 -11.645 0.000
Multiple Imputation factor(County)Essex 0.733 0.056 13.046 0.000
Multiple Imputation factor(County)Gloucester 0.011 0.058 0.187 0.852
Multiple Imputation factor(County)Hudson 0.098 0.059 1.675 0.094
Multiple Imputation factor(County)Hunterdon -1.877 0.096 -19.524 0.000
Multiple Imputation factor(County)Mercer -0.377 0.061 -6.166 0.000
Multiple Imputation factor(County)Middlesex 0.371 0.056 6.619 0.000
Multiple Imputation factor(County)Monmouth 0.236 0.057 4.132 0.000
Multiple Imputation factor(County)Morris -0.357 0.060 -5.900 0.000
Multiple Imputation factor(County)Ocean 0.399 0.056 7.063 0.000
Multiple Imputation factor(County)Passaic -0.062 0.059 -1.060 0.289
Multiple Imputation factor(County)Salem -1.488 0.082 -18.202 0.000
Multiple Imputation factor(County)Somerset -0.882 0.066 -13.350 0.000
Multiple Imputation factor(County)Sussex -1.205 0.072 -16.727 0.000
Multiple Imputation factor(County)Union -0.167 0.060 -2.798 0.005
Multiple Imputation factor(County)Warren -1.438 0.080 -17.958 0.000
Bayesian Imputation 1 (Intercept) 3.411 0.073 46.731 0.000
Bayesian Imputation 1 Conviction_Count -0.002 0.005 -0.446 0.656
Bayesian Imputation 1 factor(Year)2000 0.145 0.084 1.719 0.086
Bayesian Imputation 1 factor(Year)2001 0.204 0.083 2.475 0.013
Bayesian Imputation 1 factor(Year)2002 0.362 0.081 4.454 0.000
Bayesian Imputation 1 factor(Year)2003 0.240 0.082 2.931 0.003
Bayesian Imputation 1 factor(Year)2004 0.031 0.084 0.363 0.717
Bayesian Imputation 1 factor(Year)2005 0.346 0.080 4.311 0.000
Bayesian Imputation 1 factor(Year)2006 0.379 0.080 4.731 0.000
Bayesian Imputation 1 factor(Year)2007 0.236 0.082 2.867 0.004
Bayesian Imputation 1 factor(Year)2008 0.231 0.083 2.794 0.005
Bayesian Imputation 1 factor(Year)2009 -0.359 0.114 -3.153 0.002
Bayesian Imputation 1 factor(Year)2010 0.437 0.080 5.456 0.000
Bayesian Imputation 1 factor(Year)2011 0.568 0.078 7.255 0.000
Bayesian Imputation 1 factor(Year)2012 0.747 0.077 9.683 0.000
Bayesian Imputation 1 factor(Year)2013 0.818 0.077 10.614 0.000
Bayesian Imputation 1 factor(Year)2014 0.774 0.077 10.080 0.000
Bayesian Imputation 1 factor(Year)2015 0.940 0.076 12.382 0.000
Bayesian Imputation 1 factor(Year)2016 1.276 0.075 17.115 0.000
Bayesian Imputation 1 factor(Year)2017 1.545 0.074 20.940 0.000
Bayesian Imputation 1 factor(Year)2018 1.632 0.073 22.220 0.000
Bayesian Imputation 1 factor(Year)2019 1.585 0.074 21.356 0.000
Bayesian Imputation 1 factor(Year)2020 1.605 0.074 21.814 0.000
Bayesian Imputation 1 factor(Year)2021 1.663 0.074 22.586 0.000
Bayesian Imputation 1 factor(Year)2022 1.627 0.075 21.808 0.000
Bayesian Imputation 1 factor(Year)2023 1.514 0.075 20.266 0.000
Bayesian Imputation 1 factor(County)Bergen 0.226 0.058 3.911 0.000
Bayesian Imputation 1 factor(County)Burlington 0.071 0.057 1.244 0.213
Bayesian Imputation 1 factor(County)Camden 0.718 0.056 12.886 0.000
Bayesian Imputation 1 factor(County)Cape May -1.041 0.071 -14.590 0.000
Bayesian Imputation 1 factor(County)Cumberland -0.775 0.067 -11.645 0.000
Bayesian Imputation 1 factor(County)Essex 0.733 0.056 13.046 0.000
Bayesian Imputation 1 factor(County)Gloucester 0.011 0.058 0.187 0.852
Bayesian Imputation 1 factor(County)Hudson 0.098 0.059 1.675 0.094
Bayesian Imputation 1 factor(County)Hunterdon -1.877 0.096 -19.524 0.000
Bayesian Imputation 1 factor(County)Mercer -0.377 0.061 -6.166 0.000
Bayesian Imputation 1 factor(County)Middlesex 0.371 0.056 6.619 0.000
Bayesian Imputation 1 factor(County)Monmouth 0.236 0.057 4.132 0.000
Bayesian Imputation 1 factor(County)Morris -0.357 0.060 -5.900 0.000
Bayesian Imputation 1 factor(County)Ocean 0.399 0.056 7.063 0.000
Bayesian Imputation 1 factor(County)Passaic -0.062 0.059 -1.060 0.289
Bayesian Imputation 1 factor(County)Salem -1.488 0.082 -18.202 0.000
Bayesian Imputation 1 factor(County)Somerset -0.882 0.066 -13.350 0.000
Bayesian Imputation 1 factor(County)Sussex -1.205 0.072 -16.727 0.000
Bayesian Imputation 1 factor(County)Union -0.167 0.060 -2.798 0.005
Bayesian Imputation 1 factor(County)Warren -1.438 0.080 -17.958 0.000
Bayesian Imputation 2 (Intercept) 3.411 0.073 46.731 0.000
Bayesian Imputation 2 Conviction_Count -0.002 0.005 -0.446 0.656
Bayesian Imputation 2 factor(Year)2000 0.145 0.084 1.719 0.086
Bayesian Imputation 2 factor(Year)2001 0.204 0.083 2.475 0.013
Bayesian Imputation 2 factor(Year)2002 0.362 0.081 4.454 0.000
Bayesian Imputation 2 factor(Year)2003 0.240 0.082 2.931 0.003
Bayesian Imputation 2 factor(Year)2004 0.031 0.084 0.363 0.717
Bayesian Imputation 2 factor(Year)2005 0.346 0.080 4.311 0.000
Bayesian Imputation 2 factor(Year)2006 0.379 0.080 4.731 0.000
Bayesian Imputation 2 factor(Year)2007 0.236 0.082 2.867 0.004
Bayesian Imputation 2 factor(Year)2008 0.231 0.083 2.794 0.005
Bayesian Imputation 2 factor(Year)2009 -0.359 0.114 -3.153 0.002
Bayesian Imputation 2 factor(Year)2010 0.437 0.080 5.456 0.000
Bayesian Imputation 2 factor(Year)2011 0.568 0.078 7.255 0.000
Bayesian Imputation 2 factor(Year)2012 0.747 0.077 9.683 0.000
Bayesian Imputation 2 factor(Year)2013 0.818 0.077 10.614 0.000
Bayesian Imputation 2 factor(Year)2014 0.774 0.077 10.080 0.000
Bayesian Imputation 2 factor(Year)2015 0.940 0.076 12.382 0.000
Bayesian Imputation 2 factor(Year)2016 1.276 0.075 17.115 0.000
Bayesian Imputation 2 factor(Year)2017 1.545 0.074 20.940 0.000
Bayesian Imputation 2 factor(Year)2018 1.632 0.073 22.220 0.000
Bayesian Imputation 2 factor(Year)2019 1.585 0.074 21.356 0.000
Bayesian Imputation 2 factor(Year)2020 1.605 0.074 21.814 0.000
Bayesian Imputation 2 factor(Year)2021 1.663 0.074 22.586 0.000
Bayesian Imputation 2 factor(Year)2022 1.627 0.075 21.808 0.000
Bayesian Imputation 2 factor(Year)2023 1.514 0.075 20.266 0.000
Bayesian Imputation 2 factor(County)Bergen 0.226 0.058 3.911 0.000
Bayesian Imputation 2 factor(County)Burlington 0.071 0.057 1.244 0.213
Bayesian Imputation 2 factor(County)Camden 0.718 0.056 12.886 0.000
Bayesian Imputation 2 factor(County)Cape May -1.041 0.071 -14.590 0.000
Bayesian Imputation 2 factor(County)Cumberland -0.775 0.067 -11.645 0.000
Bayesian Imputation 2 factor(County)Essex 0.733 0.056 13.046 0.000
Bayesian Imputation 2 factor(County)Gloucester 0.011 0.058 0.187 0.852
Bayesian Imputation 2 factor(County)Hudson 0.098 0.059 1.675 0.094
Bayesian Imputation 2 factor(County)Hunterdon -1.877 0.096 -19.524 0.000
Bayesian Imputation 2 factor(County)Mercer -0.377 0.061 -6.166 0.000
Bayesian Imputation 2 factor(County)Middlesex 0.371 0.056 6.619 0.000
Bayesian Imputation 2 factor(County)Monmouth 0.236 0.057 4.132 0.000
Bayesian Imputation 2 factor(County)Morris -0.357 0.060 -5.900 0.000
Bayesian Imputation 2 factor(County)Ocean 0.399 0.056 7.063 0.000
Bayesian Imputation 2 factor(County)Passaic -0.062 0.059 -1.060 0.289
Bayesian Imputation 2 factor(County)Salem -1.488 0.082 -18.202 0.000
Bayesian Imputation 2 factor(County)Somerset -0.882 0.066 -13.350 0.000
Bayesian Imputation 2 factor(County)Sussex -1.205 0.072 -16.727 0.000
Bayesian Imputation 2 factor(County)Union -0.167 0.060 -2.798 0.005
Bayesian Imputation 2 factor(County)Warren -1.438 0.080 -17.958 0.000

The Negative Binomial model with County and Year treated as factors are the best model fits, but Conviction_Count is still not statistically significant. Now let’s do a time lag.

library(dplyr)

# Create 1-year lagged variables by County
combined_df_county_lagged <- combined_df_county %>%
  arrange(County, Year) %>%
  group_by(County) %>%
  mutate(
    Conviction_Count_lag1 = lag(Conviction_Count, 1),
    Arrest_Count_lag1 = lag(Arrest_Count, 1)
  ) %>%
  ungroup()
# Load required package
library(dplyr)

# Define function to add lagged conviction count
lag_and_return <- function(df) {
  df %>%
    arrange(County, Year) %>%
    group_by(County) %>%
    mutate(
      Conviction_Count_lag1 = lag(Conviction_Count, 1)
    ) %>%
    ungroup()
}


# Apply function to each dataset
combined_df_county_md_lag     <- lag_and_return(combined_df_county_md)
df_lower_lag                  <- lag_and_return(df_lower)
df_upper_lag                  <- lag_and_return(df_upper)
imputed_df_MI_lag             <- lag_and_return(imputed_df_MI)
combined_df_county_Bay1_lag   <- lag_and_return(combined_df_county_Bay1)
combined_df_county_Bay2_lag   <- lag_and_return(combined_df_county_Bay2)
library(MASS)
library(dplyr)

run_lagged_models <- function(data, label) {
  cat("\n=== Running Models for:", label, "===\n")
  
 
  
  # Fit Poisson model
  poisson_model <- glm(
    Deaths_num ~ Conviction_Count_lag1 + County + Year,
    family = poisson,
    data = data
  )
  
  # Calculate dispersion
  dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
  cat("Overdispersion ratio:", round(dispersion, 2), "\n")
  
  # Fit Negative Binomial model
  nb_model <- glm.nb(
    Deaths_num ~ Conviction_Count_lag1 + County + Year,
    data = data
  )
  
  return(list(
    label = label,
    poisson_model = poisson_model,
    nb_model = nb_model,
    poisson_summary = summary(poisson_model),
    nb_summary = summary(nb_model),
    poisson_AIC = AIC(poisson_model),
    nb_AIC = AIC(nb_model),
    dispersion = dispersion
  ))
}
# Run models
results_md_lag     <- run_lagged_models(combined_df_county_md_lag,     "Midpoint Imputation")
## 
## === Running Models for: Midpoint Imputation ===
## Overdispersion ratio: 6.77
results_lower_lag  <- run_lagged_models(df_lower_lag,                  "Lower Bound Imputation")
## 
## === Running Models for: Lower Bound Imputation ===
## Overdispersion ratio: 7.48
results_upper_lag  <- run_lagged_models(df_upper_lag,                  "Upper Bound Imputation")
## 
## === Running Models for: Upper Bound Imputation ===
## Overdispersion ratio: 6.45
results_mi_lag     <- run_lagged_models(imputed_df_MI_lag,             "Multiple Imputation")
## 
## === Running Models for: Multiple Imputation ===
## Overdispersion ratio: 6.9
results_bayes1_lag <- run_lagged_models(combined_df_county_Bay1_lag,   "Bayesian Imputation 1")
## 
## === Running Models for: Bayesian Imputation 1 ===
## Overdispersion ratio: 6.47
results_bayes2_lag <- run_lagged_models(combined_df_county_Bay2_lag,   "Bayesian Imputation 2")
## 
## === Running Models for: Bayesian Imputation 2 ===
## Overdispersion ratio: 6.45
library(knitr)

lagged_model_summary <- bind_rows(
  data.frame(Method = results_md_lag$label,     Poisson_AIC = results_md_lag$poisson_AIC,     NegBin_AIC = results_md_lag$nb_AIC),
  data.frame(Method = results_lower_lag$label,  Poisson_AIC = results_lower_lag$poisson_AIC,  NegBin_AIC = results_lower_lag$nb_AIC),
  data.frame(Method = results_upper_lag$label,  Poisson_AIC = results_upper_lag$poisson_AIC,  NegBin_AIC = results_upper_lag$nb_AIC),
  data.frame(Method = results_mi_lag$label,     Poisson_AIC = results_mi_lag$poisson_AIC,     NegBin_AIC = results_mi_lag$nb_AIC),
  data.frame(Method = results_bayes1_lag$label, Poisson_AIC = results_bayes1_lag$poisson_AIC, NegBin_AIC = results_bayes1_lag$nb_AIC),
  data.frame(Method = results_bayes2_lag$label, Poisson_AIC = results_bayes2_lag$poisson_AIC, NegBin_AIC = results_bayes2_lag$nb_AIC)
)

kable(lagged_model_summary, caption = "Model AICs with Lagged Conviction Count (1-Year Lag)")
Model AICs with Lagged Conviction Count (1-Year Lag)
Method Poisson_AIC NegBin_AIC
Midpoint Imputation 6392.913 4382.249
Lower Bound Imputation 6896.009 4568.021
Upper Bound Imputation 6193.911 4329.119
Multiple Imputation 6501.502 4430.688
Bayesian Imputation 1 6202.982 4332.581
Bayesian Imputation 2 6183.374 4321.602
library(dplyr)
library(knitr)
library(broom)

# function to extract coefficients from a model
extract_lagged_coef2 <- function(model, method_label) {
  tidy(model) %>%
    filter(term == "Conviction_Count_lag1") %>%
    mutate(Method = method_label) %>%
    dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}

# Helper function to extract coefficients from a model
extract_lagged_coef <- function(model, method_label) {
  tidy(model) %>%
    filter(term == "Conviction_Count_lag1") %>%
    mutate(Method = method_label) %>%
    dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}

# Apply to all models
coef_md_lag     <- extract_lagged_coef(results_md_lag$nb_model,     "Midpoint Imputation")
coef_lower_lag  <- extract_lagged_coef(results_lower_lag$nb_model,  "Lower Bound Imputation")
coef_upper_lag  <- extract_lagged_coef(results_upper_lag$nb_model,  "Upper Bound Imputation")
coef_mi_lag     <- extract_lagged_coef(results_mi_lag$nb_model,     "Multiple Imputation")
coef_bayes1_lag <- extract_lagged_coef(results_bayes1_lag$nb_model, "Bayesian Imputation 1")
coef_bayes2_lag <- extract_lagged_coef(results_bayes2_lag$nb_model, "Bayesian Imputation 2")

# Combine all into one table
combined_lagged_coefs <- bind_rows(
  coef_md_lag,
  coef_lower_lag,
  coef_upper_lag,
  coef_mi_lag,
  coef_bayes1_lag,
  coef_bayes2_lag
)

# Pretty print
kable(combined_lagged_coefs, digits = 3, caption = "Effect of 1-Year Lagged Conviction Count on Overdose Deaths (Negative Binomial Models)")
Effect of 1-Year Lagged Conviction Count on Overdose Deaths (Negative Binomial Models)
Method term estimate std.error statistic p.value
Midpoint Imputation Conviction_Count_lag1 0.009 0.011 0.814 0.415
Lower Bound Imputation Conviction_Count_lag1 0.007 0.014 0.501 0.616
Upper Bound Imputation Conviction_Count_lag1 0.010 0.010 1.018 0.309
Multiple Imputation Conviction_Count_lag1 0.010 0.011 0.884 0.377
Bayesian Imputation 1 Conviction_Count_lag1 0.010 0.010 1.017 0.309
Bayesian Imputation 2 Conviction_Count_lag1 0.009 0.010 0.915 0.360
library(dplyr)
library(knitr)
library(broom)

# Helper function to extract all coefficients from a model and label them
extract_all_coefs <- function(model, method_label) {
  tidy(model) %>%
    mutate(Method = method_label) %>%
    dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}

# Apply to all lagged Negative Binomial models
coef_md_lag_all     <- extract_all_coefs(results_md_lag$nb_model,     "Midpoint Imputation")
coef_lower_lag_all  <- extract_all_coefs(results_lower_lag$nb_model,  "Lower Bound Imputation")
coef_upper_lag_all  <- extract_all_coefs(results_upper_lag$nb_model,  "Upper Bound Imputation")
coef_mi_lag_all     <- extract_all_coefs(results_mi_lag$nb_model,     "Multiple Imputation")
coef_bayes1_lag_all <- extract_all_coefs(results_bayes1_lag$nb_model, "Bayesian Imputation 1")
coef_bayes2_lag_all <- extract_all_coefs(results_bayes2_lag$nb_model, "Bayesian Imputation 2")

# Combine into one big table
combined_all_coefs <- bind_rows(
  coef_md_lag_all,
  coef_lower_lag_all,
  coef_upper_lag_all,
  coef_mi_lag_all,
  coef_bayes1_lag_all,
  coef_bayes2_lag_all
)

# Optional: Print (first 20 rows only if table is too large)
kable(
  head(combined_all_coefs, 20),
  digits = 3,
  caption = "Sample of Coefficients from Lagged Negative Binomial Models"
)
Sample of Coefficients from Lagged Negative Binomial Models
Method term estimate std.error statistic p.value
Midpoint Imputation (Intercept) -163.886 5.255 -31.186 0.000
Midpoint Imputation Conviction_Count_lag1 0.009 0.011 0.814 0.415
Midpoint Imputation CountyBergen 0.268 0.108 2.489 0.013
Midpoint Imputation CountyBurlington 0.075 0.107 0.696 0.486
Midpoint Imputation CountyCamden 0.750 0.107 7.031 0.000
Midpoint Imputation CountyCape May -1.187 0.114 -10.410 0.000
Midpoint Imputation CountyCumberland -0.816 0.113 -7.249 0.000
Midpoint Imputation CountyEssex 0.784 0.107 7.336 0.000
Midpoint Imputation CountyGloucester 0.050 0.107 0.463 0.643
Midpoint Imputation CountyHudson 0.138 0.108 1.279 0.201
Midpoint Imputation CountyHunterdon -1.927 0.124 -15.497 0.000
Midpoint Imputation CountyMercer -0.367 0.110 -3.335 0.001
Midpoint Imputation CountyMiddlesex 0.394 0.107 3.697 0.000
Midpoint Imputation CountyMonmouth 0.286 0.108 2.664 0.008
Midpoint Imputation CountyMorris -0.307 0.109 -2.820 0.005
Midpoint Imputation CountyOcean 0.396 0.106 3.723 0.000
Midpoint Imputation CountyPassaic -0.021 0.108 -0.193 0.847
Midpoint Imputation CountySalem -1.566 0.120 -13.086 0.000
Midpoint Imputation CountySomerset -0.837 0.112 -7.461 0.000
Midpoint Imputation CountySussex -1.218 0.116 -10.539 0.000

Arrests/Overdoses/County

run_arrest_models <- function(data, label) {
  cat("\n=== Running Models for:", label, "===\n")
  
  # Poisson
  poisson_model <- glm(
    Deaths_num ~ Arrest_Count + factor(County) + factor(Year),
    family = poisson,
    data = data
  )
  
  # Check overdispersion
  dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
  cat("Overdispersion ratio (Poisson):", round(dispersion, 2), "\n")
  
  # Negative Binomial
  nb_model <- glm.nb(
    Deaths_num ~ Arrest_Count + factor(County) + factor(Year),
    data = data
  )
  
 # AIC summary as a data frame
  model_summary <- data.frame(
    Method = label,
    Poisson_AIC = AIC(poisson_model),
    NegBin_AIC = AIC(nb_model),
    Overdispersion = round(dispersion, 2)
  )
  
  return(list(
    label = label,
    poisson_model = poisson_model,
    nb_model = nb_model,
    poisson_summary = summary(poisson_model),
    nb_summary = summary(nb_model),
    summary = model_summary,
    dispersion = dispersion
  ))
}
# 1. Filter datasets for Year >= 2017
combined_df_county_md_2017   <- combined_df_county_md   %>% filter(Year >= 2017)
df_lower_2017                <- df_lower                %>% filter(Year >= 2017)
df_upper_2017                <- df_upper                %>% filter(Year >= 2017)
imputed_df_2017              <- imputed_df_MI              %>% filter(Year >= 2017)
combined_df_county_Bay1_2017 <- combined_df_county_Bay1 %>% filter(Year >= 2017)
combined_df_county_Bay2_2017 <- combined_df_county_Bay2 %>% filter(Year >= 2017)

# 2. Run models
results_md_arrest     <- run_arrest_models(combined_df_county_md_2017, "Midpoint Imputation")
## 
## === Running Models for: Midpoint Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_lower_arrest  <- run_arrest_models(df_lower_2017,                "Lower Bound Imputation")
## 
## === Running Models for: Lower Bound Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_upper_arrest  <- run_arrest_models(df_upper_2017,                "Upper Bound Imputation")
## 
## === Running Models for: Upper Bound Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_mi_arrest     <- run_arrest_models(imputed_df_2017,              "Multiple Imputation")
## 
## === Running Models for: Multiple Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_bayes1_arrest <- run_arrest_models(combined_df_county_Bay1_2017, "Bayesian Imputation 1")
## 
## === Running Models for: Bayesian Imputation 1 ===
## Overdispersion ratio (Poisson): 2.26
results_bayes2_arrest <- run_arrest_models(combined_df_county_Bay2_2017, "Bayesian Imputation 2")
## 
## === Running Models for: Bayesian Imputation 2 ===
## Overdispersion ratio (Poisson): 2.26
library(dplyr)
library(knitr)

# Combine AIC results from all model runs
model_summaries_24 <- bind_rows(
  results_md_arrest$summary,
results_lower_arrest$summary, 
results_upper_arrest$summary,
results_mi_arrest$summary,  
results_bayes1_arrest$summary,
results_bayes2_arrest$summary 
)

# Print as a markdown table
kable(model_summaries_24, caption = "Model Fit Comparison: AIC Values")
Model Fit Comparison: AIC Values
Method Poisson_AIC NegBin_AIC Overdispersion
Midpoint Imputation 1279.785 1241.888 2.26
Lower Bound Imputation 1279.785 1241.888 2.26
Upper Bound Imputation 1279.785 1241.888 2.26
Multiple Imputation 1279.785 1241.888 2.26
Bayesian Imputation 1 1279.785 1241.888 2.26
Bayesian Imputation 2 1279.785 1241.888 2.26
library(dplyr)
library(broom)
library(knitr)

# List of results
all_arrest_models <- list(
  results_md_arrest,
  results_lower_arrest,
  results_upper_arrest,
  results_mi_arrest,
  results_bayes1_arrest,
  results_bayes2_arrest
)

# Helper to extract and label coefficients from Poisson and NB models
extract_coefs <- function(model_list) {
  label <- model_list$label
  
  # Extract tidy summaries
  poisson_coefs <- broom::tidy(model_list$poisson_model) %>%
    mutate(Model = "Poisson", Method = label)
  
  nb_coefs <- broom::tidy(model_list$nb_model) %>%
    mutate(Model = "NegBin", Method = label)
  
  bind_rows(poisson_coefs, nb_coefs)
}

# Combine all coefficients
combined_coefs55 <- purrr::map_df(all_arrest_models, extract_coefs)

# Optional: Filter to terms of interest (e.g., Arrest_Count)
arrest_coefs <- combined_coefs55 %>%
  filter(term == "Arrest_Count") %>%
  dplyr::select(Method, Model, term, estimate, std.error, statistic, p.value)

# Print the results in a nice table
kable(combined_coefs55, digits = 3, caption = "Arrest Data Coefficients Across Models")
Arrest Data Coefficients Across Models
term estimate std.error statistic p.value Model Method
(Intercept) 5.022 0.040 127.046 0.000 Poisson Midpoint Imputation
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Midpoint Imputation
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Midpoint Imputation
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Midpoint Imputation
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Midpoint Imputation
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Midpoint Imputation
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Midpoint Imputation
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Midpoint Imputation
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Midpoint Imputation
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Midpoint Imputation
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Midpoint Imputation
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Midpoint Imputation
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Midpoint Imputation
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Midpoint Imputation
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Midpoint Imputation
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Midpoint Imputation
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Midpoint Imputation
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Midpoint Imputation
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Midpoint Imputation
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Midpoint Imputation
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Midpoint Imputation
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Midpoint Imputation
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Midpoint Imputation
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Midpoint Imputation
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Midpoint Imputation
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Midpoint Imputation
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Midpoint Imputation
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Midpoint Imputation
(Intercept) 5.036 0.055 90.812 0.000 NegBin Midpoint Imputation
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Midpoint Imputation
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Midpoint Imputation
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Midpoint Imputation
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Midpoint Imputation
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Midpoint Imputation
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Midpoint Imputation
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Midpoint Imputation
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Midpoint Imputation
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Midpoint Imputation
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Midpoint Imputation
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Midpoint Imputation
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Midpoint Imputation
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Midpoint Imputation
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Midpoint Imputation
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Midpoint Imputation
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Midpoint Imputation
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Midpoint Imputation
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Midpoint Imputation
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Midpoint Imputation
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Midpoint Imputation
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Midpoint Imputation
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Midpoint Imputation
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Midpoint Imputation
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Midpoint Imputation
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Midpoint Imputation
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Midpoint Imputation
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Midpoint Imputation
(Intercept) 5.022 0.040 127.046 0.000 Poisson Lower Bound Imputation
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Lower Bound Imputation
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Lower Bound Imputation
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Lower Bound Imputation
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Lower Bound Imputation
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Lower Bound Imputation
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Lower Bound Imputation
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Lower Bound Imputation
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Lower Bound Imputation
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Lower Bound Imputation
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Lower Bound Imputation
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Lower Bound Imputation
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Lower Bound Imputation
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Lower Bound Imputation
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Lower Bound Imputation
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Lower Bound Imputation
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Lower Bound Imputation
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Lower Bound Imputation
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Lower Bound Imputation
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Lower Bound Imputation
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Lower Bound Imputation
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Lower Bound Imputation
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Lower Bound Imputation
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Lower Bound Imputation
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Lower Bound Imputation
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Lower Bound Imputation
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Lower Bound Imputation
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Lower Bound Imputation
(Intercept) 5.036 0.055 90.812 0.000 NegBin Lower Bound Imputation
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Lower Bound Imputation
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Lower Bound Imputation
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Lower Bound Imputation
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Lower Bound Imputation
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Lower Bound Imputation
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Lower Bound Imputation
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Lower Bound Imputation
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Lower Bound Imputation
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Lower Bound Imputation
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Lower Bound Imputation
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Lower Bound Imputation
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Lower Bound Imputation
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Lower Bound Imputation
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Lower Bound Imputation
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Lower Bound Imputation
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Lower Bound Imputation
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Lower Bound Imputation
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Lower Bound Imputation
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Lower Bound Imputation
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Lower Bound Imputation
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Lower Bound Imputation
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Lower Bound Imputation
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Lower Bound Imputation
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Lower Bound Imputation
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Lower Bound Imputation
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Lower Bound Imputation
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Lower Bound Imputation
(Intercept) 5.022 0.040 127.046 0.000 Poisson Upper Bound Imputation
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Upper Bound Imputation
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Upper Bound Imputation
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Upper Bound Imputation
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Upper Bound Imputation
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Upper Bound Imputation
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Upper Bound Imputation
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Upper Bound Imputation
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Upper Bound Imputation
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Upper Bound Imputation
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Upper Bound Imputation
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Upper Bound Imputation
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Upper Bound Imputation
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Upper Bound Imputation
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Upper Bound Imputation
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Upper Bound Imputation
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Upper Bound Imputation
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Upper Bound Imputation
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Upper Bound Imputation
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Upper Bound Imputation
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Upper Bound Imputation
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Upper Bound Imputation
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Upper Bound Imputation
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Upper Bound Imputation
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Upper Bound Imputation
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Upper Bound Imputation
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Upper Bound Imputation
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Upper Bound Imputation
(Intercept) 5.036 0.055 90.812 0.000 NegBin Upper Bound Imputation
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Upper Bound Imputation
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Upper Bound Imputation
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Upper Bound Imputation
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Upper Bound Imputation
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Upper Bound Imputation
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Upper Bound Imputation
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Upper Bound Imputation
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Upper Bound Imputation
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Upper Bound Imputation
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Upper Bound Imputation
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Upper Bound Imputation
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Upper Bound Imputation
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Upper Bound Imputation
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Upper Bound Imputation
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Upper Bound Imputation
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Upper Bound Imputation
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Upper Bound Imputation
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Upper Bound Imputation
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Upper Bound Imputation
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Upper Bound Imputation
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Upper Bound Imputation
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Upper Bound Imputation
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Upper Bound Imputation
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Upper Bound Imputation
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Upper Bound Imputation
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Upper Bound Imputation
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Upper Bound Imputation
(Intercept) 5.022 0.040 127.046 0.000 Poisson Multiple Imputation
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Multiple Imputation
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Multiple Imputation
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Multiple Imputation
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Multiple Imputation
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Multiple Imputation
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Multiple Imputation
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Multiple Imputation
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Multiple Imputation
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Multiple Imputation
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Multiple Imputation
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Multiple Imputation
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Multiple Imputation
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Multiple Imputation
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Multiple Imputation
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Multiple Imputation
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Multiple Imputation
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Multiple Imputation
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Multiple Imputation
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Multiple Imputation
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Multiple Imputation
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Multiple Imputation
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Multiple Imputation
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Multiple Imputation
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Multiple Imputation
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Multiple Imputation
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Multiple Imputation
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Multiple Imputation
(Intercept) 5.036 0.055 90.812 0.000 NegBin Multiple Imputation
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Multiple Imputation
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Multiple Imputation
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Multiple Imputation
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Multiple Imputation
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Multiple Imputation
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Multiple Imputation
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Multiple Imputation
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Multiple Imputation
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Multiple Imputation
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Multiple Imputation
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Multiple Imputation
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Multiple Imputation
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Multiple Imputation
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Multiple Imputation
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Multiple Imputation
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Multiple Imputation
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Multiple Imputation
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Multiple Imputation
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Multiple Imputation
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Multiple Imputation
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Multiple Imputation
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Multiple Imputation
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Multiple Imputation
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Multiple Imputation
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Multiple Imputation
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Multiple Imputation
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Multiple Imputation
(Intercept) 5.022 0.040 127.046 0.000 Poisson Bayesian Imputation 1
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Bayesian Imputation 1
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Bayesian Imputation 1
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Bayesian Imputation 1
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Bayesian Imputation 1
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Bayesian Imputation 1
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Bayesian Imputation 1
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Bayesian Imputation 1
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Bayesian Imputation 1
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Bayesian Imputation 1
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Bayesian Imputation 1
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Bayesian Imputation 1
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Bayesian Imputation 1
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Bayesian Imputation 1
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Bayesian Imputation 1
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Bayesian Imputation 1
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Bayesian Imputation 1
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Bayesian Imputation 1
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Bayesian Imputation 1
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Bayesian Imputation 1
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Bayesian Imputation 1
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Bayesian Imputation 1
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Bayesian Imputation 1
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Bayesian Imputation 1
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Bayesian Imputation 1
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Bayesian Imputation 1
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Bayesian Imputation 1
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Bayesian Imputation 1
(Intercept) 5.036 0.055 90.812 0.000 NegBin Bayesian Imputation 1
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Bayesian Imputation 1
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Bayesian Imputation 1
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Bayesian Imputation 1
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Bayesian Imputation 1
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Bayesian Imputation 1
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Bayesian Imputation 1
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Bayesian Imputation 1
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Bayesian Imputation 1
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Bayesian Imputation 1
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Bayesian Imputation 1
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Bayesian Imputation 1
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Bayesian Imputation 1
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Bayesian Imputation 1
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Bayesian Imputation 1
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Bayesian Imputation 1
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Bayesian Imputation 1
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Bayesian Imputation 1
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Bayesian Imputation 1
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Bayesian Imputation 1
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Bayesian Imputation 1
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Bayesian Imputation 1
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Bayesian Imputation 1
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Bayesian Imputation 1
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Bayesian Imputation 1
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Bayesian Imputation 1
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Bayesian Imputation 1
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Bayesian Imputation 1
(Intercept) 5.022 0.040 127.046 0.000 Poisson Bayesian Imputation 2
Arrest_Count -0.002 0.002 -0.798 0.425 Poisson Bayesian Imputation 2
factor(County)Bergen -0.001 0.045 -0.027 0.978 Poisson Bayesian Imputation 2
factor(County)Burlington 0.000 0.042 -0.011 0.992 Poisson Bayesian Imputation 2
factor(County)Camden 0.604 0.041 14.838 0.000 Poisson Bayesian Imputation 2
factor(County)Cape May -1.146 0.061 -18.758 0.000 Poisson Bayesian Imputation 2
factor(County)Cumberland -0.654 0.054 -12.226 0.000 Poisson Bayesian Imputation 2
factor(County)Essex 0.733 0.039 18.691 0.000 Poisson Bayesian Imputation 2
factor(County)Gloucester -0.132 0.046 -2.887 0.004 Poisson Bayesian Imputation 2
factor(County)Hudson 0.047 0.045 1.047 0.295 Poisson Bayesian Imputation 2
factor(County)Hunterdon -2.136 0.093 -22.921 0.000 Poisson Bayesian Imputation 2
factor(County)Mercer -0.279 0.048 -5.754 0.000 Poisson Bayesian Imputation 2
factor(County)Middlesex 0.298 0.040 7.393 0.000 Poisson Bayesian Imputation 2
factor(County)Monmouth 0.127 0.043 2.990 0.003 Poisson Bayesian Imputation 2
factor(County)Morris -0.549 0.051 -10.795 0.000 Poisson Bayesian Imputation 2
factor(County)Ocean 0.316 0.040 7.937 0.000 Poisson Bayesian Imputation 2
factor(County)Passaic -0.140 0.046 -3.059 0.002 Poisson Bayesian Imputation 2
factor(County)Salem -1.593 0.075 -21.377 0.000 Poisson Bayesian Imputation 2
factor(County)Somerset -1.063 0.061 -17.324 0.000 Poisson Bayesian Imputation 2
factor(County)Sussex -1.361 0.068 -19.916 0.000 Poisson Bayesian Imputation 2
factor(County)Union -0.068 0.046 -1.474 0.140 Poisson Bayesian Imputation 2
factor(County)Warren -1.571 0.074 -21.268 0.000 Poisson Bayesian Imputation 2
factor(Year)2018 0.078 0.027 2.922 0.003 Poisson Bayesian Imputation 2
factor(Year)2019 0.045 0.027 1.678 0.093 Poisson Bayesian Imputation 2
factor(Year)2020 0.055 0.027 2.022 0.043 Poisson Bayesian Imputation 2
factor(Year)2021 0.128 0.027 4.815 0.000 Poisson Bayesian Imputation 2
factor(Year)2022 0.104 0.027 3.910 0.000 Poisson Bayesian Imputation 2
factor(Year)2023 -0.002 0.028 -0.069 0.945 Poisson Bayesian Imputation 2
(Intercept) 5.036 0.055 90.812 0.000 NegBin Bayesian Imputation 2
Arrest_Count -0.003 0.003 -0.817 0.414 NegBin Bayesian Imputation 2
factor(County)Bergen -0.008 0.062 -0.136 0.892 NegBin Bayesian Imputation 2
factor(County)Burlington 0.000 0.059 0.001 0.999 NegBin Bayesian Imputation 2
factor(County)Camden 0.599 0.060 9.935 0.000 NegBin Bayesian Imputation 2
factor(County)Cape May -1.148 0.074 -15.584 0.000 NegBin Bayesian Imputation 2
factor(County)Cumberland -0.659 0.069 -9.506 0.000 NegBin Bayesian Imputation 2
factor(County)Essex 0.730 0.059 12.426 0.000 NegBin Bayesian Imputation 2
factor(County)Gloucester -0.136 0.063 -2.157 0.031 NegBin Bayesian Imputation 2
factor(County)Hudson 0.041 0.064 0.650 0.515 NegBin Bayesian Imputation 2
factor(County)Hunterdon -2.141 0.103 -20.760 0.000 NegBin Bayesian Imputation 2
factor(County)Mercer -0.286 0.066 -4.348 0.000 NegBin Bayesian Imputation 2
factor(County)Middlesex 0.294 0.058 5.073 0.000 NegBin Bayesian Imputation 2
factor(County)Monmouth 0.123 0.060 2.043 0.041 NegBin Bayesian Imputation 2
factor(County)Morris -0.553 0.066 -8.322 0.000 NegBin Bayesian Imputation 2
factor(County)Ocean 0.314 0.058 5.458 0.000 NegBin Bayesian Imputation 2
factor(County)Passaic -0.145 0.063 -2.306 0.021 NegBin Bayesian Imputation 2
factor(County)Salem -1.599 0.087 -18.422 0.000 NegBin Bayesian Imputation 2
factor(County)Somerset -1.070 0.076 -14.095 0.000 NegBin Bayesian Imputation 2
factor(County)Sussex -1.367 0.082 -16.734 0.000 NegBin Bayesian Imputation 2
factor(County)Union -0.075 0.064 -1.169 0.243 NegBin Bayesian Imputation 2
factor(County)Warren -1.577 0.086 -18.285 0.000 NegBin Bayesian Imputation 2
factor(Year)2018 0.083 0.037 2.228 0.026 NegBin Bayesian Imputation 2
factor(Year)2019 0.042 0.038 1.110 0.267 NegBin Bayesian Imputation 2
factor(Year)2020 0.057 0.038 1.513 0.130 NegBin Bayesian Imputation 2
factor(Year)2021 0.120 0.037 3.204 0.001 NegBin Bayesian Imputation 2
factor(Year)2022 0.083 0.038 2.201 0.028 NegBin Bayesian Imputation 2
factor(Year)2023 -0.029 0.038 -0.752 0.452 NegBin Bayesian Imputation 2
kable(arrest_coefs, digits = 3, caption = "Arrest_Count Coefficients Across Models" )
Arrest_Count Coefficients Across Models
Method Model term estimate std.error statistic p.value
Midpoint Imputation Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Midpoint Imputation NegBin Arrest_Count -0.003 0.003 -0.817 0.414
Lower Bound Imputation Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Lower Bound Imputation NegBin Arrest_Count -0.003 0.003 -0.817 0.414
Upper Bound Imputation Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Upper Bound Imputation NegBin Arrest_Count -0.003 0.003 -0.817 0.414
Multiple Imputation Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Multiple Imputation NegBin Arrest_Count -0.003 0.003 -0.817 0.414
Bayesian Imputation 1 Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Bayesian Imputation 1 NegBin Arrest_Count -0.003 0.003 -0.817 0.414
Bayesian Imputation 2 Poisson Arrest_Count -0.002 0.002 -0.798 0.425
Bayesian Imputation 2 NegBin Arrest_Count -0.003 0.003 -0.817 0.414

Arrests_Count is not statistically significant…. So not much to report here… @Eli any other ideas how I can model this better – would be great appreciated!

# Define reusable lagging function
lag_arrest <- function(df) {
  df %>%
    arrange(County, Year) %>%
    group_by(County) %>%
    mutate(Arrest_Count_lag1 = lag(Arrest_Count, 1)) %>%
    ungroup()
}

# Apply to each imputed dataset
combined_df_county_md_lag_arrest   <- lag_arrest(combined_df_county_md)
df_lower_lag_arrest                <- lag_arrest(df_lower)
df_upper_lag_arrest                <- lag_arrest(df_upper)
imputed_df_MI_lag_arrest           <- lag_arrest(imputed_df_MI)
combined_df_county_Bay1_lag_arrest <- lag_arrest(combined_df_county_Bay1)
combined_df_county_Bay2_lag_arrest <- lag_arrest(combined_df_county_Bay2)
run_lagged_arrest_models <- function(data, label) {
  cat("\n=== Running Lagged Arrest Models for:", label, "===\n")

  # Filter for Year >= 2017
  data <- data %>% filter(Year >= 2017)

  # Fit Poisson model
  poisson_model <- glm(
    Deaths_num ~ Arrest_Count_lag1 + factor(County) + factor(Year),
    family = poisson,
    data = data
  )

  # Check for overdispersion
  dispersion <- sum(residuals(poisson_model, type = "pearson")^2) / poisson_model$df.residual
  cat("Overdispersion ratio (Poisson):", round(dispersion, 2), "\n")

  # Fit NB model
  nb_model <- MASS::glm.nb(
    Deaths_num ~ Arrest_Count_lag1 + factor(County) + factor(Year),
    data = data
  )

  return(list(
    label = label,
    poisson_model = poisson_model,
    nb_model = nb_model,
    poisson_summary = summary(poisson_model),
    nb_summary = summary(nb_model),
    poisson_AIC = AIC(poisson_model),
    nb_AIC = AIC(nb_model),
    dispersion = dispersion
  ))
}
results_md_arrest_lag     <- run_lagged_arrest_models(combined_df_county_md_lag_arrest,   "Midpoint Imputation")
## 
## === Running Lagged Arrest Models for: Midpoint Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_lower_arrest_lag  <- run_lagged_arrest_models(df_lower_lag_arrest,                "Lower Bound Imputation")
## 
## === Running Lagged Arrest Models for: Lower Bound Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_upper_arrest_lag  <- run_lagged_arrest_models(df_upper_lag_arrest,                "Upper Bound Imputation")
## 
## === Running Lagged Arrest Models for: Upper Bound Imputation ===
## Overdispersion ratio (Poisson): 2.19
results_mi_arrest_lag     <- run_lagged_arrest_models(imputed_df_MI_lag_arrest,           "Multiple Imputation")
## 
## === Running Lagged Arrest Models for: Multiple Imputation ===
## Overdispersion ratio (Poisson): 2.26
results_bayes1_arrest_lag <- run_lagged_arrest_models(combined_df_county_Bay1_lag_arrest, "Bayesian Imputation 1")
## 
## === Running Lagged Arrest Models for: Bayesian Imputation 1 ===
## Overdispersion ratio (Poisson): 2.19
results_bayes2_arrest_lag <- run_lagged_arrest_models(combined_df_county_Bay2_lag_arrest, "Bayesian Imputation 2")
## 
## === Running Lagged Arrest Models for: Bayesian Imputation 2 ===
## Overdispersion ratio (Poisson): 2.19
library(broom)

extract_arrest_lag_coefs <- function(model, method_label) {
  tidy(model) %>%
    filter(term == "Arrest_Count_lag1") %>%
    mutate(Method = method_label) %>%
    dplyr::select(Method, term, estimate, std.error, statistic, p.value)
}

arrest_lag_coefs <- bind_rows(
  extract_arrest_lag_coefs(results_md_arrest_lag$nb_model,     "Midpoint"),
  extract_arrest_lag_coefs(results_lower_arrest_lag$nb_model,  "Lower Bound"),
  extract_arrest_lag_coefs(results_upper_arrest_lag$nb_model,  "Upper Bound"),
  extract_arrest_lag_coefs(results_mi_arrest_lag$nb_model,     "Multiple Imputation"),
  extract_arrest_lag_coefs(results_bayes1_arrest_lag$nb_model, "Bayesian 1"),
  extract_arrest_lag_coefs(results_bayes2_arrest_lag$nb_model, "Bayesian 2")
)

# Display results
knitr::kable(arrest_lag_coefs, digits = 3, caption = "Effect of Lagged Arrests on Overdose Deaths (NB Models)")
Effect of Lagged Arrests on Overdose Deaths (NB Models)
Method term estimate std.error statistic p.value
Midpoint Arrest_Count_lag1 0.001 0.003 0.231 0.817
Lower Bound Arrest_Count_lag1 0.001 0.003 0.231 0.817
Upper Bound Arrest_Count_lag1 0.001 0.003 0.231 0.817
Multiple Imputation Arrest_Count_lag1 0.000 0.003 0.027 0.979
Bayesian 1 Arrest_Count_lag1 0.001 0.003 0.231 0.817
Bayesian 2 Arrest_Count_lag1 0.001 0.003 0.231 0.817

Arrest_Count still not statistically significant predictor.

Possible theoretical explanation: 1 year is too long of an increment to see an affect. Literature suggests the impact is 1 month after arrests/convictions. We can rerun this analysis when we get the CDC’s unsuppressed monthly county data…

~~~~~~~~~~~~~~~~ Notes on Dataset~~~~~~~~~~~~~~~~~~~

State monthly overdose data was a combination of CDC Wonder DATA (1999-2022) and data for “unconfirmed OD’s” 2023-2025 https://ocsme.nj.gov/dashboard - The State of New Jersey, Office of the Chief State Medical Examiner

I added the population data from NJ for 2024 from here because the U.S. Census website is down because of the government closure. https://www.njspotlightnews.org/2025/01/nj-population-2024-census/