EPI 553 Final Report

Data Loading

# Load required packages
library(tinytex)
library(tidyverse)
library(kableExtra)
library(broom)
library(haven)
library(janitor)
library(knitr)
library(plotly)
library(ggeffects)
library(gtsummary)
library(GGally)
library(car)
library(lmtest)
library(corrplot)

# Import the dataset
brfss_full <- read_xpt("/Users/joshm/Documents/UAlbany/Spring 2026/EPI 553/Project/BRFSS.XPT") %>%
  clean_names()

Data Cleaning & Recoding

# keep only key variables
brfss_sub <- brfss_full %>%
  select(seqno, menthlth, drnk3ge5, avedrnk4, maxdrnks, sex, ageg5yr, incomg1, marijan1)


cat("Mental Health Days Missing data:", sum(is.na(brfss_sub$menthlth)), "\n")

## Mental Health Days Missing data: 3

a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$menthlth))
c <- (b / a)*100

cat("Mental Health Days missing data %:", c , "\n")

## Mental Health Days missing data %: 0.0006554941

cat("Binge Drinking Frequency Missing data:", sum(is.na(brfss_sub$drnk3ge5)), "\n")

## Binge Drinking Frequency Missing data: 247988

a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$drnk3ge5))
c <- (b / a)*100

cat("Binge Drinking Frequency missing data %:", c , "\n")

## Binge Drinking Frequency missing data %: 54.18489

cat("Average Drinks Missing data:", sum(is.na(brfss_sub$avedrnk4)), "\n")

## Average Drinks Missing data: 247519

a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$avedrnk4))
c <- (b / a)*100

cat("Average Drinks missing data %:", c , "\n")

## Average Drinks missing data %: 54.08242

cat("Maximum Drinks Missing data:", sum(is.na(brfss_sub$maxdrnks)), "\n")

## Maximum Drinks Missing data: 248422

a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$maxdrnks))
c <- (b / a)*100

cat("Maximum Drinks missing data %:", c , "\n")

## Maximum Drinks missing data %: 54.27972

#remove missing data from key variables
brfss_sub <- brfss_sub %>%
  filter(
    !is.na(seqno),
    !is.na(menthlth),
    !is.na(drnk3ge5),
    !is.na(avedrnk4),
    !is.na(maxdrnks),
    !is.na(sex),
    !is.na(ageg5yr),
    !is.na(incomg1),
    !is.na(marijan1)
    
  )
#Remove invalid data, recode key variables, add labels
brfss_sub2 <- brfss_sub %>%
  filter(
    ageg5yr != 14,
    incomg1 != 9,
    
    menthlth != 77, menthlth != 99,
    drnk3ge5 != 77, drnk3ge5 != 99,
    marijan1 != 77, marijan1 != 99,
    
    avedrnk4 != 77, avedrnk4 != 99, avedrnk4 != 88,   
    
    maxdrnks != 77, maxdrnks != 99, maxdrnks != 88, maxdrnks != 0)

#recoding
brfss_sub2 <- brfss_sub2 %>%
  mutate(
      menthlth_new = case_when(menthlth == 88 ~ 0,
      menthlth >= 1 & menthlth <= 30 ~ as.numeric(menthlth),
      TRUE                        ~ NA_real_),
      
      drnk3ge5_new = case_when(drnk3ge5 == 88 ~ 0,
      drnk3ge5 >= 1 & drnk3ge5 <= 76 ~ as.numeric(drnk3ge5),
      TRUE                        ~ NA_real_),
      
      marijan1_new = case_when(marijan1 == 88 ~ 0,
      marijan1 >= 1 & marijan1 <= 30 ~ as.numeric(marijan1),
      TRUE                        ~ NA_real_),
 
      sex_new = factor(sex, levels = c(1, 2), labels = c("Male", "Female")),
      
      age_new = factor(ageg5yr, levels = 1:13,
      labels = c("18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+") ),
      
      income_new = factor(incomg1, levels = 1:7,
      labels = c("<$15k", "$15-25k", "$25-35k", "$35-50k",
                 "$50-100k", "$100-200k", ">$200k"))
  
       ) 

# binge drinking dataset
brfss_binge <- brfss_sub2 %>%
  filter(drnk3ge5_new > 0, 
         sex_new == "Female" & maxdrnks>=4 | sex_new == "Male" & maxdrnks>=5)

# Random sample of final datasets
set.seed(1220) 
brfss_sub2 <- brfss_sub2 |> 
slice_sample(n = 8000) 

set.seed(1220) 
brfss_binge <- brfss_binge|> 
slice_sample(n = 8000)

Descriptive Statistics

brfss_sub2 %>%
  select(menthlth_new, #outcome
         drnk3ge5_new, #exposure
         marijan1_new, sex_new, age_new, income_new #covariates
         ) %>% 
  tbl_summary(
    label = list(
      menthlth_new ~ "Mentally unhealthy days (past 30)",
      drnk3ge5_new ~ "Binge drinking frequency (past 30 days)",
      marijan1_new     ~ "Marijuana use days (past 30)",
      sex_new           ~ "Sex",
      age_new      ~ "Age category",
      income_new           ~ "Income category"
    ),
    statistic = list(
      all_continuous()  ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  add_n() %>%
  bold_labels() %>%
  modify_caption("**Table 1a. Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 8000)**")

**Table 1a. Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 8000)**
Characteristic	N	N = 8,000¹
Mentally unhealthy days (past 30)	8,000	4.1 (7.8)
Binge drinking frequency (past 30 days)	8,000	1.2 (4.0)
Marijuana use days (past 30)	8,000	3.1 (8.1)
Sex	8,000
Male		4,210 (53%)
Female		3,790 (47%)
Age category	8,000
18-24		365 (4.6%)
25-29		494 (6.2%)
30-34		539 (6.7%)
35-39		611 (7.6%)
40-44		619 (7.7%)
45-49		554 (6.9%)
50-54		620 (7.8%)
55-59		713 (8.9%)
60-64		809 (10%)
65-69		830 (10%)
70-74		749 (9.4%)
75-79		626 (7.8%)
80+		471 (5.9%)
Income category	8,000
<$15k		213 (2.7%)
$15-25k		430 (5.4%)
$25-35k		624 (7.8%)
$35-50k		869 (11%)
$50-100k		2,583 (32%)
$100-200k		2,347 (29%)
>$200k		934 (12%)
¹ Mean (SD); n (%)

#binge drinkers subset

brfss_binge %>%
  select(menthlth_new, #outcome
         avedrnk4, #exposure
         maxdrnks,
         marijan1_new, sex_new, age_new, income_new #covariates
         ) %>% 
  tbl_summary(
    label = list(
      menthlth_new ~ "Mentally unhealthy days (past 30)",
      avedrnk4 ~ "Average number of drinks per session (past 30 days)",
      maxdrnks ~ "Maximum number of drinks in one session (past 30 days)",
      marijan1_new     ~ "Marijuana use days (past 30)",
      sex_new           ~ "Sex",
      age_new      ~ "Age category",
      income_new           ~ "Income category"
    ),
    statistic = list(
      all_continuous()  ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  add_n() %>%
  bold_labels() %>%
  modify_caption("**Table 1b. Descriptive Statistics — BRFSS 2024 Binge Drinkers Subset (n = 8000)**")

**Table 1b. Descriptive Statistics — BRFSS 2024 Binge Drinkers Subset (n = 8000)**
Characteristic	N	N = 8,000¹
Mentally unhealthy days (past 30)	8,000	5.4 (8.6)
Average number of drinks per session (past 30 days)	8,000	3.9 (3.3)
Maximum number of drinks in one session (past 30 days)	8,000	7.3 (4.6)
Marijuana use days (past 30)	8,000	5.3 (10.1)
Sex	8,000
Male		4,866 (61%)
Female		3,134 (39%)
Age category	8,000
18-24		732 (9.2%)
25-29		738 (9.2%)
30-34		826 (10%)
35-39		811 (10%)
40-44		872 (11%)
45-49		721 (9.0%)
50-54		725 (9.1%)
55-59		729 (9.1%)
60-64		696 (8.7%)
65-69		549 (6.9%)
70-74		348 (4.4%)
75-79		170 (2.1%)
80+		83 (1.0%)
Income category	8,000
<$15k		275 (3.4%)
$15-25k		461 (5.8%)
$25-35k		621 (7.8%)
$35-50k		868 (11%)
$50-100k		2,387 (30%)
$100-200k		2,377 (30%)
>$200k		1,011 (13%)
¹ Mean (SD); n (%)

Regression

#unadjusted regression - full dataset

reg1 <- glm(menthlth_new ~ drnk3ge5_new, data = brfss_sub2, family = quasipoisson(link="log"))

tidy(reg1, conf.int = TRUE, exponentiate=TRUE) |>

  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "drnk3ge5_new" = "Binge Drinking Frequency"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2a. Unadjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 2a. Unadjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	3.9341	0.0222	61.8141	0	3.7658	4.1075
Binge Drinking Frequency	1.0327	0.0036	8.9554	0	1.0252	1.0398

#unadjusted regression - binge drinkers dataset
reg2 <- glm(menthlth_new ~ avedrnk4, data = brfss_binge, family=quasipoisson(link="log"))

tidy(reg2, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "avedrnk4" = "Average Drinks"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2b. Unadjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 2b. Unadjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	4.8346	0.0235	67.0186	0	4.6178	5.0638
Average Drinks	1.0280	0.0036	7.7363	0	1.0205	1.0349

#unadjusted regression - binge drinkers dataset
reg3 <- glm(menthlth_new ~ maxdrnks, data = brfss_binge, family=quasipoisson(link="log"))

tidy(reg3, conf.int = TRUE, exponentiate=TRUE) |>
    mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "maxdrnks" = "Maximum Drinks"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2c. Unadjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 2c. Unadjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	4.4665	0.0279	53.6714	0	4.2302	4.7189
Maximum Drinks	1.0257	0.0027	9.4062	0	1.0201	1.0309

#adjusted regression - full dataset
adj_reg1 <- glm(menthlth_new ~ drnk3ge5_new + marijan1_new + sex_new + age_new + income_new, data = brfss_sub2, family = quasipoisson(link="log"))

tidy(adj_reg1, conf.int = TRUE, exponentiate=TRUE) |>
  
    mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "drnk3ge5_new" = "Binge Drinking Frequency",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3a. Adjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 3a. Adjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	9.3490	0.1056	21.1725	0.0000	7.5725	11.4561
Binge Drinking Frequency	1.0188	0.0037	4.9739	0.0000	1.0112	1.0261
Marijuana Use	1.0181	0.0020	9.1830	0.0000	1.0142	1.0220
Female	1.4002	0.0416	8.0990	0.0000	1.2908	1.5193
Age 25-29	0.8845	0.0945	-1.2980	0.1943	0.7349	1.0647
Age 30-34	0.8936	0.0922	-1.2209	0.2222	0.7460	1.0709
Age 35-39	0.7733	0.0943	-2.7267	0.0064	0.6427	0.9304
Age 40-44	0.8245	0.0931	-2.0738	0.0381	0.6871	0.9898
Age 45-49	0.8301	0.0956	-1.9481	0.0514	0.6882	1.0012
Age 50-54	0.6358	0.1008	-4.4910	0.0000	0.5214	0.7744
Age 55-59	0.6211	0.0974	-4.8885	0.0000	0.5129	0.7516
Age 60-64	0.4705	0.0997	-7.5658	0.0000	0.3867	0.5717
Age 65-69	0.3620	0.1068	-9.5193	0.0000	0.2931	0.4456
Age 70-74	0.3861	0.1092	-8.7144	0.0000	0.3111	0.4775
Age 75-79	0.3429	0.1209	-8.8554	0.0000	0.2696	0.4332
Age 80+	0.2689	0.1436	-9.1481	0.0000	0.2014	0.3539
Income $15-25K	0.9322	0.1056	-0.6651	0.5060	0.7591	1.1486
Income $25-35K	0.7495	0.1037	-2.7812	0.0054	0.6128	0.9203
Income $35-50K	0.7020	0.0997	-3.5510	0.0004	0.5788	0.8556
Income $50-100K	0.5164	0.0918	-7.2014	0.0000	0.4328	0.6203
Income $100-200K	0.4479	0.0938	-8.5582	0.0000	0.3738	0.5402
Income >$200K	0.2887	0.1179	-10.5375	0.0000	0.2291	0.3639

#adjusted regression - binge drinkers dataset
adj_reg2 <- glm(menthlth_new ~ avedrnk4 + marijan1_new + sex_new + age_new + income_new, data = brfss_binge, family=quasipoisson(link="log"))

tidy(adj_reg2, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "avedrnk4" = "Average Drinks",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3b. Adjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 3b. Adjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	7.0290	0.0910	21.4265	0.0000	5.8668	8.3826
Average Drinks	1.0203	0.0039	5.2067	0.0000	1.0123	1.0277
Marijuana Use	1.0187	0.0014	12.8624	0.0000	1.0158	1.0215
Female	1.4915	0.0348	11.4949	0.0000	1.3932	1.5966
Age 25-29	1.0581	0.0700	0.8063	0.4201	0.9223	1.2136
Age 30-34	1.0775	0.0682	1.0955	0.2733	0.9428	1.2318
Age 35-39	0.9695	0.0712	-0.4352	0.6635	0.8431	1.1145
Age 40-44	1.0291	0.0699	0.4102	0.6816	0.8973	1.1801
Age 45-49	0.9338	0.0746	-0.9183	0.3585	0.8063	1.0804
Age 50-54	0.8962	0.0762	-1.4374	0.1507	0.7713	1.0401
Age 55-59	0.6828	0.0826	-4.6192	0.0000	0.5800	0.8019
Age 60-64	0.6515	0.0827	-5.1827	0.0000	0.5532	0.7652
Age 65-69	0.4800	0.0988	-7.4315	0.0000	0.3942	0.5808
Age 70-74	0.4805	0.1205	-6.0847	0.0000	0.3770	0.6049
Age 75-79	0.4923	0.1663	-4.2611	0.0000	0.3499	0.6728
Age 80+	0.5235	0.2181	-2.9675	0.0030	0.3316	0.7827
Income $15-25K	0.9063	0.0907	-1.0854	0.2778	0.7594	1.0837
Income $25-35K	0.7754	0.0885	-2.8736	0.0041	0.6527	0.9236
Income $35-50K	0.7778	0.0836	-3.0061	0.0027	0.6614	0.9180
Income $50-100K	0.6320	0.0773	-5.9328	0.0000	0.5444	0.7373
Income $100-200K	0.4669	0.0802	-9.4980	0.0000	0.3998	0.5476
Income >$200K	0.3665	0.0970	-10.3519	0.0000	0.3032	0.4435

#adjusted regression - binge drinkers dataset
adj_reg3 <- glm(menthlth_new ~ maxdrnks + marijan1_new + sex_new + age_new + income_new, data = brfss_binge, family=quasipoisson(link="log"))

tidy(adj_reg3, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "maxdrnks" = "Maximum Drinks",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3c. Adjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Table 3c. Adjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term	IRR	SE	t	p-value	95% CI Lower	95% CI Upper
Intercept	6.3009	0.0923	19.9509	0.0000	5.2467	7.5335
Maximum Drinks	1.0220	0.0027	8.0795	0.0000	1.0165	1.0273
Marijuana Use	1.0180	0.0014	12.3539	0.0000	1.0151	1.0208
Female	1.5493	0.0354	12.3784	0.0000	1.4455	1.6605
Age 25-29	1.0603	0.0698	0.8391	0.4014	0.9247	1.2156
Age 30-34	1.0917	0.0680	1.2901	0.1971	0.9555	1.2476
Age 35-39	0.9762	0.0710	-0.3390	0.7346	0.8493	1.1218
Age 40-44	1.0283	0.0696	0.4011	0.6884	0.8971	1.1787
Age 45-49	0.9422	0.0744	-0.8002	0.4236	0.8139	1.0897
Age 50-54	0.9045	0.0760	-1.3207	0.1866	0.7788	1.0492
Age 55-59	0.6918	0.0824	-4.4728	0.0000	0.5878	0.8121
Age 60-64	0.6640	0.0825	-4.9622	0.0000	0.5641	0.7796
Age 65-69	0.4910	0.0986	-7.2165	0.0000	0.4034	0.5938
Age 70-74	0.4945	0.1202	-5.8568	0.0000	0.3882	0.6223
Age 75-79	0.5103	0.1660	-4.0531	0.0001	0.3630	0.6970
Age 80+	0.5398	0.2175	-2.8341	0.0046	0.3423	0.8064
Income $15-25K	0.9057	0.0902	-1.0972	0.2726	0.7596	1.0822
Income $25-35K	0.7839	0.0881	-2.7649	0.0057	0.6603	0.9328
Income $35-50K	0.7885	0.0833	-2.8534	0.0043	0.6709	0.9301
Income $50-100K	0.6340	0.0766	-5.9468	0.0000	0.5469	0.7386
Income $100-200K	0.4681	0.0795	-9.5536	0.0000	0.4014	0.5482
Income >$200K	0.3688	0.0961	-10.3770	0.0000	0.3055	0.4455

Model Diagnostics

#model diagnostics
par(mfrow = c(2, 2))
plot(adj_reg1, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

par(mfrow = c(2, 2))
plot(adj_reg2, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

par(mfrow = c(2, 2))
plot(adj_reg3, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

Visualizations

#visualizations of adjusted regression models
ggpredict(adj_reg1, terms = "drnk3ge5_new") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1a. Binge Drinking vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Analytic sample (n=8000), Other covariates held constant",
    x        = "Frequency of Binge Drinking",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

ggpredict(adj_reg2, terms = "avedrnk4") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1b. Average Drinks vs. Mentally Unhealthy Days (Binge Drinkers)",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000), Other covariates held constant",
    x        = "Average Number of Drinks",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

ggpredict(adj_reg3, terms = "maxdrnks") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1c. Maximum Drinks vs. Mentally Unhealthy Days (Binge Drinkers)",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000), Other covariates held constant",
    x        = "Maximum Number of Drinks",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

#visualizations of coefficients of all covariates and main exposure
tidy(adj_reg1, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2a. Binge Drinking & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Analytic sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

tidy(adj_reg2, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2b. Average Drinks & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

tidy(adj_reg3, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2c. Maximum Drinks & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

EPI 553 Final Report

EPI 553 — Principles of Statistical Inference II (Spring 2026)

Josh Macera

May 1, 2026

Data Loading

Data Cleaning & Recoding

Descriptive Statistics

Regression

Model Diagnostics

Visualizations