library(tidyverse)
library(haven)
library(janitor)
library(broom)
library(knitr)
library(kableExtra)
library(car)
library(gtsummary)
library(ggeffects)brfss_raw <- read_xpt('/Users/samriddhi/Downloads/LLCP2023.XPT ')
cat("Raw dataset dimensions:\n")
cat(" Rows: ", nrow(brfss_raw), "\n")
cat(" Columns:", ncol(brfss_raw), "\n")The raw 2023 BRFSS dataset contains 433,323 rows and 350 columns, representing one respondent per row across all U.S. states and territories for the 2023 survey year.
#Cleaning names
#
# Raw BRFSS name → clean_names() output
# MENTHLTH → menthlth
# PHYSHLTH → physhlth
# _BMI5 → bmi5
# SEXVAR → sexvar
# EXERANY2 → exerany2
# _AGEG5YR → ageg5yr
# _INCOMG1 → incomg1
# EDUCA → educa
# _SMOKER3 → smoker3
brfss_raw_clean <- brfss_raw |>
clean_names()
# Diagnostic — verify exact names before selecting
cat("menthlth :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^menthlth$")], "\n")
cat("physhlth :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^physhlth$")], "\n")
cat("bmi5 :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^bmi5$")], "\n")
cat("sexvar :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^sexvar$")], "\n")
cat("exerany2 :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^exerany2$")], "\n")
cat("ageg5yr :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^ageg5yr$")], "\n")
cat("incomg1 :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^incomg1$")], "\n")
cat("educa :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^educa$")], "\n")
cat("smoker3 :", names(brfss_raw_clean)[str_detect(names(brfss_raw_clean), "^smoker3$")], "\n")
# Select required variables
brfss_selected <- brfss_raw_clean |>
select(
menthlth, # was MENTHLTH
physhlth, # was PHYSHLTH
bmi5, # was _BMI5 → confirmed: bmi5
sexvar, # was SEXVAR
exerany2, # was EXERANY2
ageg5yr, # was _AGEG5YR → confirmed: ageg5yr
incomg1, # was _INCOMG1 → confirmed: incomg1
educa, # was EDUCA
smoker3 # was _SMOKER3 → confirmed: smoker3
)
# Recode all nine variables
brfss_clean <- brfss_selected |>
mutate(
# Mentally unhealthy days: 88=None->0; 77/99=DK/Refused->NA; 1-30 kept
menthlth_days = case_when(
menthlth == 88 ~ 0,
menthlth >= 1 & menthlth <= 30 ~ as.numeric(menthlth),
menthlth %in% c(77, 99) ~ NA_real_,
TRUE ~ NA_real_
),
# Physically unhealthy days: same recoding rules
physhlth_days = case_when(
physhlth == 88 ~ 0,
physhlth >= 1 & physhlth <= 30 ~ as.numeric(physhlth),
physhlth %in% c(77, 99) ~ NA_real_,
TRUE ~ NA_real_
),
# BMI: stored as BMI × 100; divide by 100 for actual value; 9999->NA
bmi = case_when(
bmi5 == 9999 ~ NA_real_,
TRUE ~ as.numeric(bmi5) / 100
),
# Sex: 1=Male (reference), 2=Female
sex = factor(
case_when(
sexvar == 1 ~ "Male",
sexvar == 2 ~ "Female",
TRUE ~ NA_character_
),
levels = c("Male", "Female")
),
# Exercise: 1=Yes, 2=No; 7/9->NA; No is reference
exercise = factor(
case_when(
exerany2 == 1 ~ "Yes",
exerany2 == 2 ~ "No",
exerany2 %in% c(7, 9) ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("No", "Yes")
),
# Age group: 13 five-year bands; 14->NA; 18-24 is reference
age_group = factor(
case_when(
ageg5yr == 1 ~ "18-24",
ageg5yr == 2 ~ "25-29",
ageg5yr == 3 ~ "30-34",
ageg5yr == 4 ~ "35-39",
ageg5yr == 5 ~ "40-44",
ageg5yr == 6 ~ "45-49",
ageg5yr == 7 ~ "50-54",
ageg5yr == 8 ~ "55-59",
ageg5yr == 9 ~ "60-64",
ageg5yr == 10 ~ "65-69",
ageg5yr == 11 ~ "70-74",
ageg5yr == 12 ~ "75-79",
ageg5yr == 13 ~ "80+",
ageg5yr == 14 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("18-24","25-29","30-34","35-39","40-44","45-49",
"50-54","55-59","60-64","65-69","70-74","75-79","80+")
),
# Income: 7 levels; 9->NA; "Less than $15,000" is reference
income = factor(
case_when(
incomg1 == 1 ~ "Less than $15,000",
incomg1 == 2 ~ "$15,000-$24,999",
incomg1 == 3 ~ "$25,000-$34,999",
incomg1 == 4 ~ "$35,000-$49,999",
incomg1 == 5 ~ "$50,000-$99,999",
incomg1 == 6 ~ "$100,000-$199,999",
incomg1 == 7 ~ "$200,000 or more",
incomg1 == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Less than $15,000","$15,000-$24,999","$25,000-$34,999",
"$35,000-$49,999","$50,000-$99,999",
"$100,000-$199,999","$200,000 or more")
),
# Education: 5 levels; 1-2 collapsed; 9->NA
education = factor(
case_when(
educa %in% c(1, 2) ~ "Less than high school",
educa == 3 ~ "High school diploma or GED",
educa == 4 ~ "Some college or technical school",
educa == 5 ~ "College graduate",
educa == 6 ~ "Graduate or professional degree",
educa == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Less than high school",
"High school diploma or GED",
"Some college or technical school",
"College graduate",
"Graduate or professional degree")
),
# Smoking status: 4 levels; 9->NA
smoking = factor(
case_when(
smoker3 == 1 ~ "Current daily smoker",
smoker3 == 2 ~ "Current some-day smoker",
smoker3 == 3 ~ "Former smoker",
smoker3 == 4 ~ "Never smoker",
smoker3 == 9 ~ NA_character_,
TRUE ~ NA_character_
),
levels = c("Current daily smoker","Current some-day smoker",
"Former smoker","Never smoker")
)
) |>
select(menthlth_days, physhlth_days, bmi, sex, exercise,
age_group, income, education, smoking)
# Save file
saveRDS(brfss_clean, "/Users/samriddhi/Downloads/brfss_clean.rds")
cat("Saved: brfss_clean.rds —", nrow(brfss_clean), "rows,",
ncol(brfss_clean), "columns\n")# Load from the saved RDS
brfss_clean <- readRDS("/Users/samriddhi/Downloads/brfss_clean.rds")
cat("Clean dataset dimensions:\n")## Clean dataset dimensions:
## Rows: 433323
## Columns: 9
## Variables: menthlth_days, physhlth_days, bmi, sex, exercise, age_group, income, education, smoking
miss_report <- brfss_clean |>
summarise(
across(
everything(),
list(
N_missing = ~ sum(is.na(.)),
Pct_missing = ~ round(mean(is.na(.)) * 100, 1)
),
.names = "{.col}__{.fn}"
)
) |>
pivot_longer(
everything(),
names_to = c("Variable", ".value"),
names_sep = "__"
) |>
rename(`N Missing` = N_missing, `% Missing` = Pct_missing)
miss_report |>
kable(
caption = "Table 1.0. Missing Data for All Nine Analysis Variables (Before Exclusions)",
col.names = c("Variable", "N Missing", "% Missing")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Variable | N Missing | % Missing |
|---|---|---|
| menthlth_days | 8108 | 1.9 |
| physhlth_days | 10785 | 2.5 |
| bmi | 40535 | 9.4 |
| sex | 0 | 0.0 |
| exercise | 1251 | 0.3 |
| age_group | 7779 | 1.8 |
| income | 86623 | 20.0 |
| education | 2325 | 0.5 |
| smoking | 23062 | 5.3 |
set.seed(1220)
brfss_analytic <- brfss_clean |>
drop_na(menthlth_days, physhlth_days, bmi, sex, exercise,
age_group, income, education, smoking) |>
slice_sample(n = 8000)
cat("Final analytic sample size: n =", nrow(brfss_analytic), "\n")## Final analytic sample size: n = 8000
Final analytic sample: After removing observations with missing
values on any of the nine analysis variables and drawing a reproducible
random sample using set.seed(1220), the analytic dataset
contains n = 8,000 observations.
#descriptive-table
brfss_analytic |>
select(menthlth_days, physhlth_days, bmi, exercise,
age_group, income, education, smoking, sex) |>
tbl_summary(
by = sex,
label = list(
menthlth_days ~ "Mentally unhealthy days (past 30)",
physhlth_days ~ "Physically unhealthy days (past 30)",
bmi ~ "Body Mass Index (kg/m²)",
exercise ~ "Any exercise in past 30 days",
age_group ~ "Age group",
income ~ "Annual household income",
education ~ "Education level",
smoking ~ "Smoking status"
),
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) |>
add_overall() |>
add_p() |>
bold_labels() |>
italicize_levels() |>
modify_caption(
"**Table 0.2. Descriptive Statistics — BRFSS 2023 Analytic Sample (n = 8,000), Stratified by Sex**"
) |>
as_flex_table()Characteristic | Overall | Male | Female | p-value2 |
|---|---|---|---|---|
Mentally unhealthy days (past 30) | 4.3 (8.1) | 3.5 (7.5) | 5.1 (8.6) | <0.001 |
Physically unhealthy days (past 30) | 4.4 (8.7) | 4.0 (8.4) | 4.9 (8.9) | <0.001 |
Body Mass Index (kg/m²) | 28.7 (6.5) | 28.7 (6.0) | 28.7 (7.0) | 0.008 |
Any exercise in past 30 days | 6,240 (78%) | 3,146 (80%) | 3,094 (76%) | <0.001 |
Age group | <0.001 | |||
18-24 | 406 (5.1%) | 235 (6.0%) | 171 (4.2%) | |
25-29 | 408 (5.1%) | 219 (5.6%) | 189 (4.7%) | |
30-34 | 463 (5.8%) | 253 (6.4%) | 210 (5.2%) | |
35-39 | 565 (7.1%) | 263 (6.7%) | 302 (7.4%) | |
40-44 | 582 (7.3%) | 290 (7.4%) | 292 (7.2%) | |
45-49 | 518 (6.5%) | 266 (6.8%) | 252 (6.2%) | |
50-54 | 608 (7.6%) | 305 (7.7%) | 303 (7.5%) | |
55-59 | 660 (8.3%) | 308 (7.8%) | 352 (8.7%) | |
60-64 | 787 (9.8%) | 408 (10%) | 379 (9.3%) | |
65-69 | 901 (11%) | 418 (11%) | 483 (12%) | |
70-74 | 808 (10%) | 382 (9.7%) | 426 (10%) | |
75-79 | 663 (8.3%) | 325 (8.3%) | 338 (8.3%) | |
80+ | 631 (7.9%) | 264 (6.7%) | 367 (9.0%) | |
Annual household income | <0.001 | |||
Less than $15,000 | 407 (5.1%) | 160 (4.1%) | 247 (6.1%) | |
$15,000-$24,999 | 641 (8.0%) | 271 (6.9%) | 370 (9.1%) | |
$25,000-$34,999 | 871 (11%) | 376 (9.6%) | 495 (12%) | |
$35,000-$49,999 | 1,067 (13%) | 482 (12%) | 585 (14%) | |
$50,000-$99,999 | 2,511 (31%) | 1,251 (32%) | 1,260 (31%) | |
$100,000-$199,999 | 1,865 (23%) | 996 (25%) | 869 (21%) | |
$200,000 or more | 638 (8.0%) | 400 (10%) | 238 (5.9%) | |
Education level | 0.003 | |||
Less than high school | 124 (1.6%) | 75 (1.9%) | 49 (1.2%) | |
High school diploma or GED | 252 (3.2%) | 130 (3.3%) | 122 (3.0%) | |
Some college or technical school | 1,827 (23%) | 950 (24%) | 877 (22%) | |
College graduate | 2,138 (27%) | 1,018 (26%) | 1,120 (28%) | |
Graduate or professional degree | 3,659 (46%) | 1,763 (45%) | 1,896 (47%) | |
Smoking status | <0.001 | |||
Current daily smoker | 658 (8.2%) | 339 (8.6%) | 319 (7.8%) | |
Current some-day smoker | 268 (3.4%) | 151 (3.8%) | 117 (2.9%) | |
Former smoker | 2,262 (28%) | 1,207 (31%) | 1,055 (26%) | |
Never smoker | 4,812 (60%) | 2,239 (57%) | 2,573 (63%) | |
1Mean (SD); n (%) | ||||
2Wilcoxon rank sum test; Pearson's Chi-squared test | ||||
mod_full <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + income + education + smoking,
data = brfss_analytic
)
summary(mod_full)##
## Call:
## lm(formula = menthlth_days ~ physhlth_days + bmi + sex + exercise +
## age_group + income + education + smoking, data = brfss_analytic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.080 -3.865 -1.597 0.712 30.471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.65053 0.95407 10.115 < 2e-16
## physhlth_days 0.26558 0.01007 26.384 < 2e-16
## bmi 0.03338 0.01321 2.527 0.011510
## sexFemale 1.39038 0.16750 8.301 < 2e-16
## exerciseYes -0.65116 0.21472 -3.033 0.002432
## age_group25-29 -1.05660 0.51938 -2.034 0.041950
## age_group30-34 -1.09395 0.50646 -2.160 0.030803
## age_group35-39 -1.81103 0.48851 -3.707 0.000211
## age_group40-44 -2.89307 0.48749 -5.935 3.07e-09
## age_group45-49 -3.05618 0.49769 -6.141 8.61e-10
## age_group50-54 -3.51674 0.48323 -7.277 3.72e-13
## age_group55-59 -4.49597 0.47555 -9.454 < 2e-16
## age_group60-64 -4.52215 0.45848 -9.863 < 2e-16
## age_group65-69 -5.57795 0.45019 -12.390 < 2e-16
## age_group70-74 -6.02536 0.45741 -13.173 < 2e-16
## age_group75-79 -6.28656 0.47484 -13.239 < 2e-16
## age_group80+ -6.81968 0.47684 -14.302 < 2e-16
## income$15,000-$24,999 -1.63703 0.46899 -3.491 0.000485
## income$25,000-$34,999 -2.07637 0.44797 -4.635 3.63e-06
## income$35,000-$49,999 -2.54685 0.43819 -5.812 6.40e-09
## income$50,000-$99,999 -3.05048 0.41069 -7.428 1.22e-13
## income$100,000-$199,999 -3.49984 0.42923 -8.154 4.07e-16
## income$200,000 or more -3.78409 0.50036 -7.563 4.38e-14
## educationHigh school diploma or GED 0.07991 0.81066 0.099 0.921484
## educationSome college or technical school 1.07679 0.68973 1.561 0.118520
## educationCollege graduate 1.79091 0.69119 2.591 0.009585
## educationGraduate or professional degree 1.73781 0.69250 2.509 0.012111
## smokingCurrent some-day smoker -1.58670 0.53523 -2.965 0.003041
## smokingFormer smoker -1.98971 0.33713 -5.902 3.74e-09
## smokingNever smoker -2.93681 0.32162 -9.131 < 2e-16
##
## (Intercept) ***
## physhlth_days ***
## bmi *
## sexFemale ***
## exerciseYes **
## age_group25-29 *
## age_group30-34 *
## age_group35-39 ***
## age_group40-44 ***
## age_group45-49 ***
## age_group50-54 ***
## age_group55-59 ***
## age_group60-64 ***
## age_group65-69 ***
## age_group70-74 ***
## age_group75-79 ***
## age_group80+ ***
## income$15,000-$24,999 ***
## income$25,000-$34,999 ***
## income$35,000-$49,999 ***
## income$50,000-$99,999 ***
## income$100,000-$199,999 ***
## income$200,000 or more ***
## educationHigh school diploma or GED
## educationSome college or technical school
## educationCollege graduate **
## educationGraduate or professional degree *
## smokingCurrent some-day smoker **
## smokingFormer smoker ***
## smokingNever smoker ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.352 on 7970 degrees of freedom
## Multiple R-squared: 0.1853, Adjusted R-squared: 0.1824
## F-statistic: 62.52 on 29 and 7970 DF, p-value: < 2.2e-16
#coef-table
tidy(mod_full, conf.int = TRUE) |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 1.1. Full Model Coefficient Estimates with 95% Confidence Intervals",
col.names = c("Term","Estimate","SE","t-statistic","p-value","95% CI Lower","95% CI Upper")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) |>
scroll_box(height = "420px")| Term | Estimate | SE | t-statistic | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 9.6505 | 0.9541 | 10.1151 | 0.0000 | 7.7803 | 11.5208 |
| physhlth_days | 0.2656 | 0.0101 | 26.3841 | 0.0000 | 0.2459 | 0.2853 |
| bmi | 0.0334 | 0.0132 | 2.5274 | 0.0115 | 0.0075 | 0.0593 |
| sexFemale | 1.3904 | 0.1675 | 8.3007 | 0.0000 | 1.0620 | 1.7187 |
| exerciseYes | -0.6512 | 0.2147 | -3.0326 | 0.0024 | -1.0721 | -0.2303 |
| age_group25-29 | -1.0566 | 0.5194 | -2.0343 | 0.0420 | -2.0747 | -0.0385 |
| age_group30-34 | -1.0939 | 0.5065 | -2.1600 | 0.0308 | -2.0867 | -0.1012 |
| age_group35-39 | -1.8110 | 0.4885 | -3.7072 | 0.0002 | -2.7686 | -0.8534 |
| age_group40-44 | -2.8931 | 0.4875 | -5.9346 | 0.0000 | -3.8487 | -1.9375 |
| age_group45-49 | -3.0562 | 0.4977 | -6.1408 | 0.0000 | -4.0318 | -2.0806 |
| age_group50-54 | -3.5167 | 0.4832 | -7.2775 | 0.0000 | -4.4640 | -2.5695 |
| age_group55-59 | -4.4960 | 0.4755 | -9.4543 | 0.0000 | -5.4282 | -3.5638 |
| age_group60-64 | -4.5221 | 0.4585 | -9.8633 | 0.0000 | -5.4209 | -3.6234 |
| age_group65-69 | -5.5779 | 0.4502 | -12.3903 | 0.0000 | -6.4604 | -4.6955 |
| age_group70-74 | -6.0254 | 0.4574 | -13.1728 | 0.0000 | -6.9220 | -5.1287 |
| age_group75-79 | -6.2866 | 0.4748 | -13.2392 | 0.0000 | -7.2174 | -5.3557 |
| age_group80+ | -6.8197 | 0.4768 | -14.3019 | 0.0000 | -7.7544 | -5.8850 |
| income$15,000-$24,999 | -1.6370 | 0.4690 | -3.4905 | 0.0005 | -2.5564 | -0.7177 |
| income$25,000-$34,999 | -2.0764 | 0.4480 | -4.6351 | 0.0000 | -2.9545 | -1.1982 |
| income$35,000-$49,999 | -2.5469 | 0.4382 | -5.8122 | 0.0000 | -3.4058 | -1.6879 |
| income$50,000-$99,999 | -3.0505 | 0.4107 | -7.4277 | 0.0000 | -3.8555 | -2.2454 |
| income$100,000-$199,999 | -3.4998 | 0.4292 | -8.1537 | 0.0000 | -4.3413 | -2.6584 |
| income$200,000 or more | -3.7841 | 0.5004 | -7.5628 | 0.0000 | -4.7649 | -2.8033 |
| educationHigh school diploma or GED | 0.0799 | 0.8107 | 0.0986 | 0.9215 | -1.5092 | 1.6690 |
| educationSome college or technical school | 1.0768 | 0.6897 | 1.5612 | 0.1185 | -0.2753 | 2.4288 |
| educationCollege graduate | 1.7909 | 0.6912 | 2.5911 | 0.0096 | 0.4360 | 3.1458 |
| educationGraduate or professional degree | 1.7378 | 0.6925 | 2.5095 | 0.0121 | 0.3803 | 3.0953 |
| smokingCurrent some-day smoker | -1.5867 | 0.5352 | -2.9645 | 0.0030 | -2.6359 | -0.5375 |
| smokingFormer smoker | -1.9897 | 0.3371 | -5.9020 | 0.0000 | -2.6506 | -1.3289 |
| smokingNever smoker | -2.9368 | 0.3216 | -9.1313 | 0.0000 | -3.5673 | -2.3063 |
## (Intercept)
## 9.651
## physhlth_days
## 0.266
## bmi
## 0.033
## sexFemale
## 1.390
## exerciseYes
## -0.651
## age_group25-29
## -1.057
## age_group30-34
## -1.094
## age_group35-39
## -1.811
## age_group40-44
## -2.893
## age_group45-49
## -3.056
## age_group50-54
## -3.517
## age_group55-59
## -4.496
## age_group60-64
## -4.522
## age_group65-69
## -5.578
## age_group70-74
## -6.025
## age_group75-79
## -6.287
## age_group80+
## -6.820
## income$15,000-$24,999
## -1.637
## income$25,000-$34,999
## -2.076
## income$35,000-$49,999
## -2.547
## income$50,000-$99,999
## -3.050
## income$100,000-$199,999
## -3.500
## income$200,000 or more
## -3.784
## educationHigh school diploma or GED
## 0.080
## educationSome college or technical school
## 1.077
## educationCollege graduate
## 1.791
## educationGraduate or professional degree
## 1.738
## smokingCurrent some-day smoker
## -1.587
## smokingFormer smoker
## -1.990
## smokingNever smoker
## -2.937
The fitted regression equation (coefficients rounded to three decimal places) is:
\[ \begin{aligned} \widehat{\text{MentalHealthDays}} =\ &9.651 \\ &+ 0.266 \cdot \text{PhysicalHealthDays} \\ &+ 0.033 \cdot \text{BMI} \\ &+ 1.39 \cdot \mathbb{1}[\text{Female}] \\ &+ -0.651 \cdot \mathbb{1}[\text{Exercise=Yes}] \\ &+ -1.057 \cdot \mathbb{1}[\text{25-29}] + -1.094 \cdot \mathbb{1}[\text{30-34}] + -1.811 \cdot \mathbb{1}[\text{35-39}] \\ &+ -2.893 \cdot \mathbb{1}[\text{40-44}] + -3.056 \cdot \mathbb{1}[\text{45-49}] + -3.517 \cdot \mathbb{1}[\text{50-54}] \\ &+ -4.496 \cdot \mathbb{1}[\text{55-59}] + -4.522 \cdot \mathbb{1}[\text{60-64}] + -5.578 \cdot \mathbb{1}[\text{65-69}] \\ &+ -6.025 \cdot \mathbb{1}[\text{70-74}] + -6.287 \cdot \mathbb{1}[\text{75-79}] + -6.82 \cdot \mathbb{1}[\text{80+}] \\ &+ -1.637 \cdot \mathbb{1}[\text{\$15k-\$25k}] + -2.076 \cdot \mathbb{1}[\text{\$25k-\$35k}] + -2.547 \cdot \mathbb{1}[\text{\$35k-\$50k}] \\ &+ -3.05 \cdot \mathbb{1}[\text{\$50k-\$100k}] + -3.5 \cdot \mathbb{1}[\text{\$100k-\$200k}] + -3.784 \cdot \mathbb{1}[\text{\$200k+}] \\ &+ 0.08 \cdot \mathbb{1}[\text{HS/GED}] + 1.077 \cdot \mathbb{1}[\text{Some college}] \\ &+ 1.791 \cdot \mathbb{1}[\text{College grad}] + 1.738 \cdot \mathbb{1}[\text{Grad degree}] \\ &+ -1.587 \cdot \mathbb{1}[\text{Some-day smoker}] + -1.99 \cdot \mathbb{1}[\text{Former smoker}] + -2.937 \cdot \mathbb{1}[\text{Never smoker}] \end{aligned} \]
Step 4: Coefficient Interpretations
physhlth_days (Continuous)
For one unit increase in physically unhealthy day, the mentally unhealth days increases by 0.27 units, holding all other predictors (BMI, sex, exercise, age group, income, education, and smoking) constant. This is the strongest continuous predictor in the model.
bmi (Continuous)
For each one-unit increase in BMI (kg/m²), there is an associated increase of 0.03 mentally unhealthy days, holding all other predictors constant. The small magnitude suggests a modest independent association after adjusting for the other variables.
sex: Female vs. Male (Reference)
Females report an estimated 1.39 mentally more unhealthy days on average compared to males (the reference group), holding all other variables constant.
exercise: Yes vs. No (Reference)
Adults who reported any exercise in the past 30 days had 0.651 fewer mentally unhealthy days compared to those who reported no exercise, holding all other variables constant.
Age Group: 25–29 vs. 18–24 (Reference)
Adults aged 25–29 report, on average, 1.057 fewer mentally unhealthy days compared to those aged 18–24 (the reference group), holding all other predictors constant.
Income: Two Coefficients vs. Less than $15,000 (Reference)
$50,000–$99,999 vs. Less than $15,000:
Adults earning $50,000–$99,999 annually report, on average, 3.05 fewer mentally unhealthy days compared to those earning less than $15,000, holding all other variables constant.
$200,000 or more vs. Less than $15,000:
Adults earning $200,000 annually report, on average, -3.78 fewer mentally unhealthy days, reflecting the economic gradient assiciated with mentally unhealthy days.
mod_stats <- glance(mod_full)
tibble(
Statistic = c(
"R-squared",
"Adjusted R-squared",
"Root MSE (Residual Standard Error)",
"Overall F-statistic",
"Numerator df (p)",
"Denominator df (n - p - 1)",
"Overall p-value"
),
Value = c(
round(mod_stats$r.squared, 4),
round(mod_stats$adj.r.squared, 4),
round(mod_stats$sigma, 3),
round(mod_stats$statistic, 2),
mod_stats$df,
mod_stats$df.residual,
format.pval(mod_stats$p.value, digits = 3)
)
) |>
kable(caption = "Table 1.2. Model-Level Summary Statistics") |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Statistic | Value |
|---|---|
| R-squared | 0.1853 |
| Adjusted R-squared | 0.1824 |
| Root MSE (Residual Standard Error) | 7.352 |
| Overall F-statistic | 62.52 |
| Numerator df (p) | 29 |
| Denominator df (n - p - 1) | 7970 |
| Overall p-value | <2e-16 |
R-square
R square is 0.185. This means that 18.5% of the total variability in mentally unhealthy days is explained by all predictors combined.
Adjusted R-square
Adjusted R-square is 0.182 which is slightly lower than unadjusted R square. Adjusted R square penalizes the model for each additional predictor added, correcting for the mechanical increase in R square that occurs even when predictors add no real information. The small gap confirms the predictors are genuinely contributing.
Root MSE
The root mean squared error (RMSE) of 7.352 indicates the average deviation of observed mentally unhealthy days from the values predicted by the model.
Overall F-test
\[H_0: \beta_1 = \beta_2 = \cdots = \beta_p = 0 \quad \text{(no predictor is associated with mentally unhealthy days)}\] \[H_A: \text{At least one } \beta_j \neq 0\]
The overall F-statistic of 62.52 (df = 29, 7970, p < 0.001) indicates that the model as a whole is statistically significant, meaning that, collectively, the predictors explain a significant amount of variation in mentally unhealthy days.
#type3-anova
anova_type3 <- Anova(mod_full, type = "III")
anova_type3 |>
tidy() |>
mutate(
across(where(is.numeric), ~ round(., 4)),
`Significant (alpha = 0.05)` = ifelse(p.value < 0.05, "Yes", "No")
) |>
kable(
caption = "Table 2.1. Type III Partial F-Tests — All Predictors",
col.names = c("Predictor","Sum of Sq","df","F value","p-value","Significant?")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) |>
column_spec(6, bold = TRUE)| Predictor | Sum of Sq | df | F value | p-value | Significant? |
|---|---|---|---|---|---|
| (Intercept) | 5529.7722 | 1 | 102.3152 | 0.0000 | Yes |
| physhlth_days | 37622.7952 | 1 | 696.1198 | 0.0000 | Yes |
| bmi | 345.2408 | 1 | 6.3879 | 0.0115 | Yes |
| sex | 3723.8662 | 1 | 68.9012 | 0.0000 | Yes |
| exercise | 497.0434 | 1 | 9.1966 | 0.0024 | Yes |
| age_group | 30092.1774 | 12 | 46.3986 | 0.0000 | Yes |
| income | 4733.8943 | 6 | 14.5982 | 0.0000 | Yes |
| education | 1265.1504 | 4 | 5.8521 | 0.0001 | Yes |
| smoking | 5101.1076 | 3 | 31.4613 | 0.0000 | Yes |
| Residuals | 430750.0872 | 7970 | NA | NA | NA |
Predictors which are marked “Yes” make a statistically significant independent contribution to predicting mentally unhealthy days, after adjusting for all other predictors simultaneously.
#chunk-test-income
mod_no_income <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + education + smoking,
data = brfss_analytic
)
ft_income <- anova(mod_no_income, mod_full)
ft_income |>
tidy() |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 2.2. Chunk Test: Does Income as a Group Improve the Model?",
col.names = c("Model","Res. df","RSS","df","Sum of Sq","F","p-value")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + education + smoking | 7976 | 435484.0 | NA | NA | NA | NA |
| menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + income + education + smoking | 7970 | 430750.1 | 6 | 4733.894 | 14.5982 | 0 |
\[H_0: \beta_{\$15k-\$25k} = \beta_{\$25k-\$35k} = \beta_{\$35k-\$50k} = \beta_{\$50k-\$100k} = \beta_{\$100k-\$200k} = \beta_{\$200k+} = 0\] \[H_A: \text{At least one income coefficient} \neq 0\]
The F-statistic = 14.60 with p < 0.001, therefore we will reject H0. This indicates that income, as a group, significantly improves the model. Income categories collectively explain a significant portion of the variation in mentally unhealthy days, beyond what is explained by physical health, BMI, sex, exercise, age, education, and smoking.
mod_no_educ <- lm(
menthlth_days ~ physhlth_days + bmi + sex + exercise +
age_group + income + smoking,
data = brfss_analytic
)
ft_educ <- anova(mod_no_educ, mod_full)
ft_educ |>
tidy() |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 2.3. Chunk Test: Does Education as a Group Improve the Model?",
col.names = c("Model","Res. df","RSS","df","Sum of Sq","F","p-value")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + income + smoking | 7974 | 432015.2 | NA | NA | NA | NA |
| menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + income + education + smoking | 7970 | 430750.1 | 4 | 1265.15 | 5.8521 | 1e-04 |
f_educ <- round(ft_educ$F[2], 3)
p_educ <- ft_educ$`Pr(>F)`[2]
df1_educ <- ft_educ$Df[2]
df2_educ <- ft_educ$Res.Df[2]Hypotheses:
\[H_0: \beta_{\text{HS/GED}} = \beta_{\text{Some college}} = \beta_{\text{College grad}} = \beta_{\text{Grad degree}} = 0\] \[H_A: \text{At least one education coefficient} \neq 0\]
The F-statistic = 5.85 with p < 0.001, therefore, we will reject the H0. This indicates that education categories collectively contribute to explaining variation in mentally unhealthy days, even after accounting for physical health, BMI, sex, exercise, age, income, and smoking.
Type III partial F-tests show that physical health days, sex, exercise, age group, income, education, and smoking each make significant independent contributions to predicting mentally unhealthy days, after controlling for all other predictors. Physical health days and smoking are among the strongest predictors. Chunk tests indicate that even when some individual dummy coefficients in multi-level variables like income and education are not significant, the variables as a whole still explain meaningful variation, confirming their group-level predictive value.
#create-smoker-binary
brfss_analytic <- brfss_analytic |>
mutate(
smoker_current = factor(
case_when(
smoking %in% c("Current daily smoker",
"Current some-day smoker") ~ "Current smoker",
smoking %in% c("Former smoker",
"Never smoker") ~ "Non-smoker",
TRUE ~ NA_character_
),
levels = c("Non-smoker", "Current smoker") # Non-smoker is reference
)
)
table(brfss_analytic$smoker_current, useNA = "always")##
## Non-smoker Current smoker <NA>
## 7074 926 0
#fit-model-A
mod_A <- lm(
menthlth_days ~ physhlth_days + bmi + sex + smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
tidy(mod_A, conf.int = TRUE) |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 3.1. Model A — Main Effects Only",
col.names = c("Term","Estimate","SE","t","p-value","95% CI Lower","95% CI Upper")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) |>
scroll_box(height = "360px")| Term | Estimate | SE | t | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 6.7553 | 0.9271 | 7.2869 | 0.0000 | 4.9381 | 8.5726 |
| physhlth_days | 0.2686 | 0.0101 | 26.6731 | 0.0000 | 0.2489 | 0.2884 |
| bmi | 0.0334 | 0.0132 | 2.5250 | 0.0116 | 0.0075 | 0.0593 |
| sexFemale | 1.3331 | 0.1673 | 7.9675 | 0.0000 | 1.0051 | 1.6611 |
| smoker_currentCurrent smoker | 2.1287 | 0.2712 | 7.8489 | 0.0000 | 1.5971 | 2.6604 |
| exerciseYes | -0.6725 | 0.2150 | -3.1274 | 0.0018 | -1.0940 | -0.2510 |
| age_group25-29 | -0.9149 | 0.5198 | -1.7602 | 0.0784 | -1.9338 | 0.1040 |
| age_group30-34 | -0.8823 | 0.5061 | -1.7434 | 0.0813 | -1.8743 | 0.1097 |
| age_group35-39 | -1.5810 | 0.4877 | -3.2421 | 0.0012 | -2.5369 | -0.6251 |
| age_group40-44 | -2.6157 | 0.4858 | -5.3847 | 0.0000 | -3.5680 | -1.6635 |
| age_group45-49 | -2.8246 | 0.4970 | -5.6836 | 0.0000 | -3.7988 | -1.8504 |
| age_group50-54 | -3.2600 | 0.4821 | -6.7628 | 0.0000 | -4.2050 | -2.3151 |
| age_group55-59 | -4.2301 | 0.4741 | -8.9219 | 0.0000 | -5.1595 | -3.3007 |
| age_group60-64 | -4.2484 | 0.4568 | -9.3000 | 0.0000 | -5.1439 | -3.3529 |
| age_group65-69 | -5.2338 | 0.4467 | -11.7163 | 0.0000 | -6.1095 | -4.3582 |
| age_group70-74 | -5.7023 | 0.4545 | -12.5457 | 0.0000 | -6.5933 | -4.8113 |
| age_group75-79 | -5.8977 | 0.4703 | -12.5401 | 0.0000 | -6.8197 | -4.9758 |
| age_group80+ | -6.4888 | 0.4737 | -13.6975 | 0.0000 | -7.4174 | -5.5602 |
| income$15,000-$24,999 | -1.6797 | 0.4698 | -3.5754 | 0.0004 | -2.6007 | -0.7588 |
| income$25,000-$34,999 | -2.1023 | 0.4486 | -4.6861 | 0.0000 | -2.9817 | -1.2229 |
| income$35,000-$49,999 | -2.5869 | 0.4390 | -5.8931 | 0.0000 | -3.4474 | -1.7264 |
| income$50,000-$99,999 | -3.0823 | 0.4114 | -7.4921 | 0.0000 | -3.8887 | -2.2758 |
| income$100,000-$199,999 | -3.5360 | 0.4300 | -8.2232 | 0.0000 | -4.3789 | -2.6931 |
| income$200,000 or more | -3.8625 | 0.5011 | -7.7078 | 0.0000 | -4.8448 | -2.8802 |
| educationHigh school diploma or GED | 0.2139 | 0.8119 | 0.2634 | 0.7922 | -1.3776 | 1.8053 |
| educationSome college or technical school | 1.1965 | 0.6907 | 1.7323 | 0.0833 | -0.1575 | 2.5505 |
| educationCollege graduate | 1.9035 | 0.6922 | 2.7499 | 0.0060 | 0.5466 | 3.2604 |
| educationGraduate or professional degree | 1.7456 | 0.6938 | 2.5160 | 0.0119 | 0.3856 | 3.1057 |
#fit-model-B
# includes sex, smoker_current, AND their interaction term
mod_B <- lm(
menthlth_days ~ physhlth_days + bmi + sex * smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
tidy(mod_B, conf.int = TRUE) |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 3.2. Model B — With sex x smoker_current Interaction",
col.names = c("Term","Estimate","SE","t","p-value","95% CI Lower","95% CI Upper")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE) |>
scroll_box(height = "400px")| Term | Estimate | SE | t | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 6.8994 | 0.9286 | 7.4301 | 0.0000 | 5.0791 | 8.7196 |
| physhlth_days | 0.2686 | 0.0101 | 26.6788 | 0.0000 | 0.2489 | 0.2883 |
| bmi | 0.0331 | 0.0132 | 2.5017 | 0.0124 | 0.0072 | 0.0590 |
| sexFemale | 1.1855 | 0.1775 | 6.6784 | 0.0000 | 0.8376 | 1.5335 |
| smoker_currentCurrent smoker | 1.5208 | 0.3654 | 4.1621 | 0.0000 | 0.8045 | 2.2371 |
| exerciseYes | -0.6728 | 0.2150 | -3.1301 | 0.0018 | -1.0942 | -0.2515 |
| age_group25-29 | -0.9202 | 0.5196 | -1.7709 | 0.0766 | -1.9388 | 0.0984 |
| age_group30-34 | -0.8924 | 0.5059 | -1.7640 | 0.0778 | -1.8841 | 0.0993 |
| age_group35-39 | -1.5929 | 0.4875 | -3.2673 | 0.0011 | -2.5485 | -0.6372 |
| age_group40-44 | -2.6286 | 0.4856 | -5.4127 | 0.0000 | -3.5806 | -1.6766 |
| age_group45-49 | -2.8425 | 0.4969 | -5.7209 | 0.0000 | -3.8165 | -1.8686 |
| age_group50-54 | -3.2778 | 0.4820 | -6.8012 | 0.0000 | -4.2226 | -2.3331 |
| age_group55-59 | -4.2499 | 0.4740 | -8.9652 | 0.0000 | -5.1791 | -3.3206 |
| age_group60-64 | -4.2640 | 0.4567 | -9.3364 | 0.0000 | -5.1593 | -3.3688 |
| age_group65-69 | -5.2506 | 0.4466 | -11.7563 | 0.0000 | -6.1261 | -4.3751 |
| age_group70-74 | -5.7111 | 0.4544 | -12.5686 | 0.0000 | -6.6018 | -4.8203 |
| age_group75-79 | -5.9076 | 0.4702 | -12.5646 | 0.0000 | -6.8292 | -4.9859 |
| age_group80+ | -6.4995 | 0.4736 | -13.7239 | 0.0000 | -7.4278 | -5.5711 |
| income$15,000-$24,999 | -1.6357 | 0.4700 | -3.4804 | 0.0005 | -2.5570 | -0.7144 |
| income$25,000-$34,999 | -2.0746 | 0.4486 | -4.6243 | 0.0000 | -2.9540 | -1.1952 |
| income$35,000-$49,999 | -2.5455 | 0.4392 | -5.7964 | 0.0000 | -3.4064 | -1.6847 |
| income$50,000-$99,999 | -3.0430 | 0.4116 | -7.3935 | 0.0000 | -3.8498 | -2.2362 |
| income$100,000-$199,999 | -3.5097 | 0.4300 | -8.1623 | 0.0000 | -4.3526 | -2.6668 |
| income$200,000 or more | -3.8405 | 0.5010 | -7.6651 | 0.0000 | -4.8226 | -2.8583 |
| educationHigh school diploma or GED | 0.1256 | 0.8124 | 0.1546 | 0.8772 | -1.4669 | 1.7180 |
| educationSome college or technical school | 1.1179 | 0.6912 | 1.6172 | 0.1059 | -0.2371 | 2.4729 |
| educationCollege graduate | 1.8179 | 0.6928 | 2.6239 | 0.0087 | 0.4598 | 3.1760 |
| educationGraduate or professional degree | 1.6691 | 0.6943 | 2.4040 | 0.0162 | 0.3081 | 3.0300 |
| sexFemale:smoker_currentCurrent smoker | 1.2833 | 0.5171 | 2.4819 | 0.0131 | 0.2697 | 2.2968 |
#interaction-test
int_test <- anova(mod_A, mod_B)
int_test |>
tidy() |>
mutate(across(where(is.numeric), ~ round(., 4))) |>
kable(
caption = "Table 3.3. Partial F-Test: Model A (no interaction) vs. Model B (with interaction)",
col.names = c("Model","Res. df","RSS","df","Sum of Sq","F","p-value")
) |>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| menthlth_days ~ physhlth_days + bmi + sex + smoker_current + exercise + age_group + income + education | 7972 | 432508.9 | NA | NA | NA | NA |
| menthlth_days ~ physhlth_days + bmi + sex * smoker_current + exercise + age_group + income + education | 7971 | 432174.9 | 1 | 333.9749 | 6.1598 | 0.0131 |
\[H_0: \beta_{\text{Female} \times \text{Current smoker}} = 0 \quad \text{(smoking–mental health association is the same for men and women)}\]
\[H_A: \beta_{\text{Female} \times \text{Current smoker}} \neq 0 \quad \text{(the association differs by sex)}\]
#Interpret interaction-coefs
b_B <- round(coef(mod_B), 3)
smoking_men <- b_B["smoker_currentCurrent smoker"]
smoking_women <- b_B["smoker_currentCurrent smoker"] +
b_B["sexFemale:smoker_currentCurrent smoker"]
cat("Estimated association of current smoking with mentally unhealthy days:\n")## Estimated association of current smoking with mentally unhealthy days:
## Among men : 1.521 days
## Among women : 2.804 days
## Difference : 1.283 days
Among men (reference sex group): Among men, current smoking is associated with 1.52 additional mentally unhealthy days compared to male non-smokers, holding all other predictors constant.
Among women: Among women, current smoking is associated with 2.80 additional mentally unhealthy days, holding all other variables constant.
Together: The difference between women and men is 1.28 days, indicating that current smoking has a stronger impact on mentally unhealthy days for women than for men.
#interaction-plot, Figure 3.1. Predicted Mentally Unhealthy Days by Smoking Status and Sex
pred_int <- ggpredict(mod_B, terms = c("smoker_current", "sex"))
# Convert to data frame for full control over the plot
pred_df <- as.data.frame(pred_int)
ggplot(pred_df, aes(x = x, y = predicted, color = group, group = group)) +
geom_point(size = 4, position = position_dodge(width = 0.3)) +
geom_line(linewidth = 1.2, position = position_dodge(width = 0.3)) +
geom_errorbar(
aes(ymin = conf.low, ymax = conf.high),
width = 0.15,
linewidth = 0.8,
position = position_dodge(width = 0.3)
) +
scale_color_brewer(palette = "Set1", name = "Sex") +
labs(
title = "Predicted Mentally Unhealthy Days by Smoking Status and Sex",
subtitle = "Model B: sex x smoker_current interaction, adjusted for all other predictors",
x = "Smoking Status",
y = "Predicted Mentally Unhealthy Days (Past 30)",
caption = "Points = model-adjusted predictions; error bars = 95% confidence intervals."
) +
theme_minimal(base_size = 13) +
theme(legend.position = "right")Among U.S. adults, current smokers report more mentally unhealthy days on average than former or never smokers, even after accounting for age, sex, income, education, physical health, and exercise. The strength of this association differs between men and women, with women who smoke experiencing a larger increase in mentally unhealthy days compared to non-smoking women, relative to the corresponding difference among men. These findings suggest that public health programs addressing both tobacco use and mental health may benefit from sex-specific approaches. Since, this analysis is cross-sectional, we cannot determine the direction of causality—poor mental health may increase the likelihood of smoking, smoking may worsen mental health, or both may reflect shared upstream causes.
Among U.S. adults, mentally unhealthy days are associated with a broad range of individual, behavioral, and socioeconomic factors. Physical health days emerged as the strongest predictor: each additional physically unhealthy day was associated with an estimated 0.27 increase in mentally unhealthy days, holding all other variables constant. Current smokers reported more mentally unhealthy days than former or never smokers, even after adjusting for age, sex, income, education, BMI, exercise, and physical health. The interaction analysis revealed that the effect of smoking differs by sex, with women who smoke experiencing 2.80 additional mentally unhealthy days compared to non-smoking women, whereas men who smoke experience 1.52 additional days, resulting in a difference of 1.28 days between sexes. Other predictors also showed significant associations. Adults reporting any exercise in the past 30 days had, on average, 0.65 fewer mentally unhealthy days than those who did not exercise. Age was inversely associated with mentally unhealthy days, with older adults reporting progressively fewer days compared to the 18–24 reference group. Income and education exhibited clear inverse gradients: higher socioeconomic status was linked to fewer mentally unhealthy days. For example, adults earning $50,000–$99,999 annually reported 3.05 fewer mentally unhealthy days than those earning less than $15,000. Some predictors, such as BMI, had weaker associations, suggesting they may influence mental health through indirect pathways not captured in this cross-sectional analysis.The model explained a modest but meaningful portion of variance, with R² = 0.185 and adjusted R² = 0.182, indicating that approximately 18–18.5% of the variability in mentally unhealthy days is accounted for by the predictors. The small difference between R² and adjusted R² confirms that the predictors are informative rather than noise. The overall F-test (F = 62.52, df = 29, 7970, p < 0.001) indicates that the model is statistically significant.
Chunk tests provided additional insight for multi-level categorical predictors. Testing income and education collectively using ANOVA confirmed that both variables significantly improve model fit, even if some individual dummy coefficients are not significant, highlighting the importance of evaluating predictors at the group level. Similarly, adding the sex × current smoking interaction significantly improved model fit (F = 6.16, df = 1, 7971, p = 0.013), confirming that the effect of smoking on mental health differs between men and women.
Overall, these results highlight the complex interplay of behavioral, socioeconomic, and health factors in shaping mentally unhealthy days among U.S. adults and underscore the importance of multi-faceted, targeted public health interventions.
It was a lengthy assignment. So there were lot of places where my codes were sowing errors. I have used chat gpt to resolve the errors. It was helpful, as it explains the error well and also can provide step to step solutions. This helps to understand the codes.