Data Loading

# Load required packages
library(tinytex)
library(tidyverse)
library(kableExtra)
library(broom)
library(haven)
library(janitor)
library(knitr)
library(plotly)
library(ggeffects)
library(gtsummary)
library(GGally)
library(car)
library(lmtest)
library(corrplot)
# Import the dataset
brfss_full <- read_xpt("/Users/joshm/Documents/UAlbany/Spring 2026/EPI 553/Project/BRFSS.XPT") %>%
  clean_names()

Data Cleaning & Recoding

# keep only key variables
brfss_sub <- brfss_full %>%
  select(seqno, menthlth, drnk3ge5, avedrnk4, maxdrnks, sex, ageg5yr, incomg1, marijan1)


cat("Mental Health Days Missing data:", sum(is.na(brfss_sub$menthlth)), "\n")
## Mental Health Days Missing data: 3
a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$menthlth))
c <- (b / a)*100

cat("Mental Health Days missing data %:", c , "\n")
## Mental Health Days missing data %: 0.0006554941
cat("Binge Drinking Frequency Missing data:", sum(is.na(brfss_sub$drnk3ge5)), "\n")
## Binge Drinking Frequency Missing data: 247988
a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$drnk3ge5))
c <- (b / a)*100

cat("Binge Drinking Frequency missing data %:", c , "\n")
## Binge Drinking Frequency missing data %: 54.18489
cat("Average Drinks Missing data:", sum(is.na(brfss_sub$avedrnk4)), "\n")
## Average Drinks Missing data: 247519
a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$avedrnk4))
c <- (b / a)*100

cat("Average Drinks missing data %:", c , "\n")
## Average Drinks missing data %: 54.08242
cat("Maximum Drinks Missing data:", sum(is.na(brfss_sub$maxdrnks)), "\n")
## Maximum Drinks Missing data: 248422
a <- nrow(brfss_sub)
b <- sum(is.na(brfss_sub$maxdrnks))
c <- (b / a)*100

cat("Maximum Drinks missing data %:", c , "\n")
## Maximum Drinks missing data %: 54.27972
#remove missing data from key variables
brfss_sub <- brfss_sub %>%
  filter(
    !is.na(seqno),
    !is.na(menthlth),
    !is.na(drnk3ge5),
    !is.na(avedrnk4),
    !is.na(maxdrnks),
    !is.na(sex),
    !is.na(ageg5yr),
    !is.na(incomg1),
    !is.na(marijan1)
    
  )
#Remove invalid data, recode key variables, add labels
brfss_sub2 <- brfss_sub %>%
  filter(
    ageg5yr != 14,
    incomg1 != 9,
    
    menthlth != 77, menthlth != 99,
    drnk3ge5 != 77, drnk3ge5 != 99,
    marijan1 != 77, marijan1 != 99,
    
    avedrnk4 != 77, avedrnk4 != 99, avedrnk4 != 88,   
    
    maxdrnks != 77, maxdrnks != 99, maxdrnks != 88, maxdrnks != 0)
#recoding
brfss_sub2 <- brfss_sub2 %>%
  mutate(
      menthlth_new = case_when(menthlth == 88 ~ 0,
      menthlth >= 1 & menthlth <= 30 ~ as.numeric(menthlth),
      TRUE                        ~ NA_real_),
      
      drnk3ge5_new = case_when(drnk3ge5 == 88 ~ 0,
      drnk3ge5 >= 1 & drnk3ge5 <= 76 ~ as.numeric(drnk3ge5),
      TRUE                        ~ NA_real_),
      
      marijan1_new = case_when(marijan1 == 88 ~ 0,
      marijan1 >= 1 & marijan1 <= 30 ~ as.numeric(marijan1),
      TRUE                        ~ NA_real_),
 
      sex_new = factor(sex, levels = c(1, 2), labels = c("Male", "Female")),
      
      age_new = factor(ageg5yr, levels = 1:13,
      labels = c("18-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+") ),
      
      income_new = factor(incomg1, levels = 1:7,
      labels = c("<$15k", "$15-25k", "$25-35k", "$35-50k",
                 "$50-100k", "$100-200k", ">$200k"))
  
       ) 

# binge drinking dataset
brfss_binge <- brfss_sub2 %>%
  filter(drnk3ge5_new > 0, 
         sex_new == "Female" & maxdrnks>=4 | sex_new == "Male" & maxdrnks>=5)

# Random sample of final datasets
set.seed(1220) 
brfss_sub2 <- brfss_sub2 |> 
slice_sample(n = 8000) 

set.seed(1220) 
brfss_binge <- brfss_binge|> 
slice_sample(n = 8000) 

Descriptive Statistics

brfss_sub2 %>%
  select(menthlth_new, #outcome
         drnk3ge5_new, #exposure
         marijan1_new, sex_new, age_new, income_new #covariates
         ) %>% 
  tbl_summary(
    label = list(
      menthlth_new ~ "Mentally unhealthy days (past 30)",
      drnk3ge5_new ~ "Binge drinking frequency (past 30 days)",
      marijan1_new     ~ "Marijuana use days (past 30)",
      sex_new           ~ "Sex",
      age_new      ~ "Age category",
      income_new           ~ "Income category"
    ),
    statistic = list(
      all_continuous()  ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  add_n() %>%
  bold_labels() %>%
  modify_caption("**Table 1a. Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 8000)**")
Table 1a. Descriptive Statistics — BRFSS 2024 Analytic Sample (n = 8000)
Characteristic N N = 8,0001
Mentally unhealthy days (past 30) 8,000 4.1 (7.8)
Binge drinking frequency (past 30 days) 8,000 1.2 (4.0)
Marijuana use days (past 30) 8,000 3.1 (8.1)
Sex 8,000
    Male
4,210 (53%)
    Female
3,790 (47%)
Age category 8,000
    18-24
365 (4.6%)
    25-29
494 (6.2%)
    30-34
539 (6.7%)
    35-39
611 (7.6%)
    40-44
619 (7.7%)
    45-49
554 (6.9%)
    50-54
620 (7.8%)
    55-59
713 (8.9%)
    60-64
809 (10%)
    65-69
830 (10%)
    70-74
749 (9.4%)
    75-79
626 (7.8%)
    80+
471 (5.9%)
Income category 8,000
    <$15k
213 (2.7%)
    $15-25k
430 (5.4%)
    $25-35k
624 (7.8%)
    $35-50k
869 (11%)
    $50-100k
2,583 (32%)
    $100-200k
2,347 (29%)
    >$200k
934 (12%)
1 Mean (SD); n (%)
#binge drinkers subset

brfss_binge %>%
  select(menthlth_new, #outcome
         avedrnk4, #exposure
         maxdrnks,
         marijan1_new, sex_new, age_new, income_new #covariates
         ) %>% 
  tbl_summary(
    label = list(
      menthlth_new ~ "Mentally unhealthy days (past 30)",
      avedrnk4 ~ "Average number of drinks per session (past 30 days)",
      maxdrnks ~ "Maximum number of drinks in one session (past 30 days)",
      marijan1_new     ~ "Marijuana use days (past 30)",
      sex_new           ~ "Sex",
      age_new      ~ "Age category",
      income_new           ~ "Income category"
    ),
    statistic = list(
      all_continuous()  ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  add_n() %>%
  bold_labels() %>%
  modify_caption("**Table 1b. Descriptive Statistics — BRFSS 2024 Binge Drinkers Subset (n = 8000)**")
Table 1b. Descriptive Statistics — BRFSS 2024 Binge Drinkers Subset (n = 8000)
Characteristic N N = 8,0001
Mentally unhealthy days (past 30) 8,000 5.4 (8.6)
Average number of drinks per session (past 30 days) 8,000 3.9 (3.3)
Maximum number of drinks in one session (past 30 days) 8,000 7.3 (4.6)
Marijuana use days (past 30) 8,000 5.3 (10.1)
Sex 8,000
    Male
4,866 (61%)
    Female
3,134 (39%)
Age category 8,000
    18-24
732 (9.2%)
    25-29
738 (9.2%)
    30-34
826 (10%)
    35-39
811 (10%)
    40-44
872 (11%)
    45-49
721 (9.0%)
    50-54
725 (9.1%)
    55-59
729 (9.1%)
    60-64
696 (8.7%)
    65-69
549 (6.9%)
    70-74
348 (4.4%)
    75-79
170 (2.1%)
    80+
83 (1.0%)
Income category 8,000
    <$15k
275 (3.4%)
    $15-25k
461 (5.8%)
    $25-35k
621 (7.8%)
    $35-50k
868 (11%)
    $50-100k
2,387 (30%)
    $100-200k
2,377 (30%)
    >$200k
1,011 (13%)
1 Mean (SD); n (%)

Regression

#unadjusted regression - full dataset

reg1 <- glm(menthlth_new ~ drnk3ge5_new, data = brfss_sub2, family = quasipoisson(link="log"))

tidy(reg1, conf.int = TRUE, exponentiate=TRUE) |>

  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "drnk3ge5_new" = "Binge Drinking Frequency"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2a. Unadjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 2a. Unadjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 3.9341 0.0222 61.8141 0 3.7658 4.1075
Binge Drinking Frequency 1.0327 0.0036 8.9554 0 1.0252 1.0398
#unadjusted regression - binge drinkers dataset
reg2 <- glm(menthlth_new ~ avedrnk4, data = brfss_binge, family=quasipoisson(link="log"))

tidy(reg2, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "avedrnk4" = "Average Drinks"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2b. Unadjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 2b. Unadjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 4.8346 0.0235 67.0186 0 4.6178 5.0638
Average Drinks 1.0280 0.0036 7.7363 0 1.0205 1.0349
#unadjusted regression - binge drinkers dataset
reg3 <- glm(menthlth_new ~ maxdrnks, data = brfss_binge, family=quasipoisson(link="log"))

tidy(reg3, conf.int = TRUE, exponentiate=TRUE) |>
    mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "maxdrnks" = "Maximum Drinks"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 2c. Unadjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 2c. Unadjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 4.4665 0.0279 53.6714 0 4.2302 4.7189
Maximum Drinks 1.0257 0.0027 9.4062 0 1.0201 1.0309
#adjusted regression - full dataset
adj_reg1 <- glm(menthlth_new ~ drnk3ge5_new + marijan1_new + sex_new + age_new + income_new, data = brfss_sub2, family = quasipoisson(link="log"))

tidy(adj_reg1, conf.int = TRUE, exponentiate=TRUE) |>
  
    mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "drnk3ge5_new" = "Binge Drinking Frequency",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3a. Adjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 3a. Adjusted Regression - Binge Drinking Frequency & Poor Mental Health (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 9.3490 0.1056 21.1725 0.0000 7.5725 11.4561
Binge Drinking Frequency 1.0188 0.0037 4.9739 0.0000 1.0112 1.0261
Marijuana Use 1.0181 0.0020 9.1830 0.0000 1.0142 1.0220
Female 1.4002 0.0416 8.0990 0.0000 1.2908 1.5193
Age 25-29 0.8845 0.0945 -1.2980 0.1943 0.7349 1.0647
Age 30-34 0.8936 0.0922 -1.2209 0.2222 0.7460 1.0709
Age 35-39 0.7733 0.0943 -2.7267 0.0064 0.6427 0.9304
Age 40-44 0.8245 0.0931 -2.0738 0.0381 0.6871 0.9898
Age 45-49 0.8301 0.0956 -1.9481 0.0514 0.6882 1.0012
Age 50-54 0.6358 0.1008 -4.4910 0.0000 0.5214 0.7744
Age 55-59 0.6211 0.0974 -4.8885 0.0000 0.5129 0.7516
Age 60-64 0.4705 0.0997 -7.5658 0.0000 0.3867 0.5717
Age 65-69 0.3620 0.1068 -9.5193 0.0000 0.2931 0.4456
Age 70-74 0.3861 0.1092 -8.7144 0.0000 0.3111 0.4775
Age 75-79 0.3429 0.1209 -8.8554 0.0000 0.2696 0.4332
Age 80+ 0.2689 0.1436 -9.1481 0.0000 0.2014 0.3539
Income $15-25K 0.9322 0.1056 -0.6651 0.5060 0.7591 1.1486
Income $25-35K 0.7495 0.1037 -2.7812 0.0054 0.6128 0.9203
Income $35-50K 0.7020 0.0997 -3.5510 0.0004 0.5788 0.8556
Income $50-100K 0.5164 0.0918 -7.2014 0.0000 0.4328 0.6203
Income $100-200K 0.4479 0.0938 -8.5582 0.0000 0.3738 0.5402
Income >$200K 0.2887 0.1179 -10.5375 0.0000 0.2291 0.3639
#adjusted regression - binge drinkers dataset
adj_reg2 <- glm(menthlth_new ~ avedrnk4 + marijan1_new + sex_new + age_new + income_new, data = brfss_binge, family=quasipoisson(link="log"))

tidy(adj_reg2, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "avedrnk4" = "Average Drinks",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3b. Adjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 3b. Adjusted Regression - Average Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 7.0290 0.0910 21.4265 0.0000 5.8668 8.3826
Average Drinks 1.0203 0.0039 5.2067 0.0000 1.0123 1.0277
Marijuana Use 1.0187 0.0014 12.8624 0.0000 1.0158 1.0215
Female 1.4915 0.0348 11.4949 0.0000 1.3932 1.5966
Age 25-29 1.0581 0.0700 0.8063 0.4201 0.9223 1.2136
Age 30-34 1.0775 0.0682 1.0955 0.2733 0.9428 1.2318
Age 35-39 0.9695 0.0712 -0.4352 0.6635 0.8431 1.1145
Age 40-44 1.0291 0.0699 0.4102 0.6816 0.8973 1.1801
Age 45-49 0.9338 0.0746 -0.9183 0.3585 0.8063 1.0804
Age 50-54 0.8962 0.0762 -1.4374 0.1507 0.7713 1.0401
Age 55-59 0.6828 0.0826 -4.6192 0.0000 0.5800 0.8019
Age 60-64 0.6515 0.0827 -5.1827 0.0000 0.5532 0.7652
Age 65-69 0.4800 0.0988 -7.4315 0.0000 0.3942 0.5808
Age 70-74 0.4805 0.1205 -6.0847 0.0000 0.3770 0.6049
Age 75-79 0.4923 0.1663 -4.2611 0.0000 0.3499 0.6728
Age 80+ 0.5235 0.2181 -2.9675 0.0030 0.3316 0.7827
Income $15-25K 0.9063 0.0907 -1.0854 0.2778 0.7594 1.0837
Income $25-35K 0.7754 0.0885 -2.8736 0.0041 0.6527 0.9236
Income $35-50K 0.7778 0.0836 -3.0061 0.0027 0.6614 0.9180
Income $50-100K 0.6320 0.0773 -5.9328 0.0000 0.5444 0.7373
Income $100-200K 0.4669 0.0802 -9.4980 0.0000 0.3998 0.5476
Income >$200K 0.3665 0.0970 -10.3519 0.0000 0.3032 0.4435
#adjusted regression - binge drinkers dataset
adj_reg3 <- glm(menthlth_new ~ maxdrnks + marijan1_new + sex_new + age_new + income_new, data = brfss_binge, family=quasipoisson(link="log"))

tidy(adj_reg3, conf.int = TRUE, exponentiate=TRUE) |>
  mutate(
    term = dplyr::recode(term,
      "(Intercept)"   = "Intercept",
      "maxdrnks" = "Maximum Drinks",
      "marijan1_new"     = "Marijuana Use",
      "sex_newFemale"           = "Female",
      "age_new25-29"           = "Age 25-29",
      "age_new30-34"           = "Age 30-34",
      "age_new35-39"           = "Age 35-39",
      "age_new40-44"           = "Age 40-44",
      "age_new45-49"           = "Age 45-49",
      "age_new50-54"           = "Age 50-54",
      "age_new55-59"           = "Age 55-59",
      "age_new60-64"           = "Age 60-64",
      "age_new65-69"           = "Age 65-69",
      "age_new70-74"           = "Age 70-74",
      "age_new75-79"           = "Age 75-79",
      "age_new80+"            = "Age 80+",
      "income_new$15-25k"           = "Income $15-25K",
      "income_new$25-35k"           = "Income $25-35K",
      "income_new$35-50k"           = "Income $35-50K",
      "income_new$50-100k"           = "Income $50-100K",
      "income_new$100-200k"           = "Income $100-200K",
      "income_new>$200k"           = "Income >$200K"
    ),
    across(where(is.numeric), ~ round(., 4))
  )  |>
  kable(
    caption = "Table 3c. Adjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)",
    col.names = c("Term", "IRR", "SE", "t", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Table 3c. Adjusted Regression - Maximum Drinks & Poor Mental Health Among Binge Drinkers (n=8000)
Term IRR SE t p-value 95% CI Lower 95% CI Upper
Intercept 6.3009 0.0923 19.9509 0.0000 5.2467 7.5335
Maximum Drinks 1.0220 0.0027 8.0795 0.0000 1.0165 1.0273
Marijuana Use 1.0180 0.0014 12.3539 0.0000 1.0151 1.0208
Female 1.5493 0.0354 12.3784 0.0000 1.4455 1.6605
Age 25-29 1.0603 0.0698 0.8391 0.4014 0.9247 1.2156
Age 30-34 1.0917 0.0680 1.2901 0.1971 0.9555 1.2476
Age 35-39 0.9762 0.0710 -0.3390 0.7346 0.8493 1.1218
Age 40-44 1.0283 0.0696 0.4011 0.6884 0.8971 1.1787
Age 45-49 0.9422 0.0744 -0.8002 0.4236 0.8139 1.0897
Age 50-54 0.9045 0.0760 -1.3207 0.1866 0.7788 1.0492
Age 55-59 0.6918 0.0824 -4.4728 0.0000 0.5878 0.8121
Age 60-64 0.6640 0.0825 -4.9622 0.0000 0.5641 0.7796
Age 65-69 0.4910 0.0986 -7.2165 0.0000 0.4034 0.5938
Age 70-74 0.4945 0.1202 -5.8568 0.0000 0.3882 0.6223
Age 75-79 0.5103 0.1660 -4.0531 0.0001 0.3630 0.6970
Age 80+ 0.5398 0.2175 -2.8341 0.0046 0.3423 0.8064
Income $15-25K 0.9057 0.0902 -1.0972 0.2726 0.7596 1.0822
Income $25-35K 0.7839 0.0881 -2.7649 0.0057 0.6603 0.9328
Income $35-50K 0.7885 0.0833 -2.8534 0.0043 0.6709 0.9301
Income $50-100K 0.6340 0.0766 -5.9468 0.0000 0.5469 0.7386
Income $100-200K 0.4681 0.0795 -9.5536 0.0000 0.4014 0.5482
Income >$200K 0.3688 0.0961 -10.3770 0.0000 0.3055 0.4455

Model Diagnostics

#model diagnostics
par(mfrow = c(2, 2))
plot(adj_reg1, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

par(mfrow = c(2, 2))
plot(adj_reg2, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

par(mfrow = c(2, 2))
plot(adj_reg3, which = 1:4,
     col = adjustcolor("steelblue", 0.4),
     pch = 19, cex = 0.6)

Visualizations

#visualizations of adjusted regression models
ggpredict(adj_reg1, terms = "drnk3ge5_new") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1a. Binge Drinking vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Analytic sample (n=8000), Other covariates held constant",
    x        = "Frequency of Binge Drinking",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

ggpredict(adj_reg2, terms = "avedrnk4") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1b. Average Drinks vs. Mentally Unhealthy Days (Binge Drinkers)",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000), Other covariates held constant",
    x        = "Average Number of Drinks",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

ggpredict(adj_reg3, terms = "maxdrnks") %>%
  plot(show_data=TRUE) +
  labs(
    title    = "Figure 1c. Maximum Drinks vs. Mentally Unhealthy Days (Binge Drinkers)",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000), Other covariates held constant",
    x        = "Maximum Number of Drinks",
    y        = "Predicted Mentally Unhealthy Days"
  ) +
  theme_minimal(base_size = 13)

#visualizations of coefficients of all covariates and main exposure
tidy(adj_reg1, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2a. Binge Drinking & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Analytic sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

tidy(adj_reg2, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2b. Average Drinks & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

tidy(adj_reg3, conf.int = TRUE) |>
  
  ggplot(aes(x = estimate, y = term, color = term)) +
  geom_point(size = 4) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, linewidth = 1) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +

  labs(
    title    = "Figure 2c. Maximum Drinks & Covariates vs. Mentally Unhealthy Days",
    subtitle = "BRFSS 2024 Binge Drinkers sample (n=8000)",
    x        = "Difference in Mentally Unhealthy Days",
    y        = "Predictor"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")