##Background: Mental health is a leading public health concern in the United States. The BRFSS survey asks respondents: “Now thinking about your mental health, which includes stress, depression, and problems with emotions, for how many days during the past 30 days was your mental health not good?” The resulting variable, mentally unhealthy days, captures a broad measure of psychological distress and is widely used in public health surveillance. Multiple regression allows us to examine the independent association between each predictor and mental health, holding all other factors constant. Interaction analysis tests whether the association between two predictors (such as smoking and mental health) differs across levels of a third variable (such as sex), a concept epidemiologists call effect modification.
library(tidyverse)
library(haven)
library(janitor)
library(knitr)
library(kableExtra)
library(broom)
library(gtsummary)
library(GGally)
library(car)
library(ggeffects)
library(plotly)
library(lmtest)
options(gtsummary.use_ftExtra = TRUE)
set_gtsummary_theme(theme_gtsummary_compact(set_theme = TRUE))brfss_recode <-brfss_full|>
rename(
menthlth_days = MENTHLTH,
physhlth_days = PHYSHLTH,
bmi =`_BMI5`,
sex = SEXVAR,
exercise = EXERANY2,
age_group = `_AGEG5YR`,
income = `_INCOMG1`,
education = EDUCA,
smoking = `_SMOKER3`
) |>
mutate(
# outcome: mentally unhealthy days in past 30
menthlth_days = case_when(
menthlth_days == 88 ~0,
menthlth_days %in% c(77,99) ~ NA_real_,
menthlth_days >=1 & menthlth_days<=30 ~as.numeric(menthlth_days),
TRUE ~NA_real_
),
# Physically unhealthy days in past 30
physhlth_days = case_when(
physhlth_days == 88 ~0,
physhlth_days %in% c(77,99) ~ NA_real_,
physhlth_days >=1 & physhlth_days<=30 ~as.numeric(physhlth_days),
TRUE ~NA_real_
),
# BMI
bmi = case_when(
bmi == 9999 ~NA_real_,
TRUE ~as.numeric(bmi)/100
),
# Sex
sex = factor(sex,
levels = c(1,2),
labels = c("Male","Female")),
# Any exericise in the past 30 days
exercise = case_when(
exercise %in% c(7,9) ~NA_real_,
TRUE ~ as.numeric(exercise)
),
exercise = factor(exercise ,
levels = c(1,2),
labels = c("Yes", "No")),
# Age group in 5-year categories (13 levels)
age_group = case_when(
age_group == 14 ~ NA_real_,
TRUE ~ as.numeric(age_group)
),
age_group = factor(age_group,
levels = 1:13,
labels =c("18-24","25-29","30-34","35-39","40-44",
"45-49","50-54","55-59","60-64","65-69",
"70-74","75-79","80+")),
#Annual household Income (7 levels)
income = factor(income,
levels = 1:7,
labels = c("Less than $15,000",
"$15,000 to less than $25,000",
"$25,000 to less than $35,000",
"$35,000 to less than $50,000",
"$50,000 to less than $100,000",
"$100,000 to less than $200,000",
"$200,000 or more")),
# Highest level of education completed
education = case_when(
education %in% c(1,2) ~1,
education == 3 ~2,
education == 4 ~3,
education == 5 ~4,
education == 6 ~5,
education == 9 ~ NA_real_,
TRUE ~ NA_real_),
education = factor(education,
levels = 1:5,
labels = c("Less than high school",
"High school diploma or GED",
"Some college or technical school",
"College graduate",
"Graduate or professional degree")),
# Smoking status 4 levels
smoking = case_when(
smoking == 9 ~NA_real_,
TRUE ~ as.numeric(smoking)),
smoking = factor(smoking,
levels= 1:4,
labels = c("Current daily Smoker",
"Current some-day smoker",
"Former smoker",
"Never smoker"))
)vars <- c("menthlth_days","physhlth_days","bmi","sex","exercise","age_group","income","education","smoking")
missing_summary <- data.frame(
Variable = vars,
Missing_Count = colSums(is.na(brfss_recode[vars])),
Missing_Percent = round(colSums(is.na(brfss_recode[vars])) / nrow(brfss_recode) * 100, 2)
)
print(missing_summary)## Variable Missing_Count Missing_Percent
## menthlth_days menthlth_days 8108 1.87
## physhlth_days physhlth_days 10785 2.49
## bmi bmi 40535 9.35
## sex sex 0 0.00
## exercise exercise 1251 0.29
## age_group age_group 7779 1.80
## income income 86623 19.99
## education education 2325 0.54
## smoking smoking 23062 5.32
set.seed(1220)
brfss_analytic <-brfss_recode |>
drop_na(menthlth_days,physhlth_days,bmi,sex,exercise, age_group,income,education,smoking) |>
slice_sample(n=8000)
tibble(Metric = c("Observations", "Variables"),
Value = c(nrow(brfss_analytic), ncol(brfss_analytic))) |>
kable(caption = "Analytic Dataset Dimensions") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Metric | Value |
|---|---|
| Observations | 8000 |
| Variables | 9 |
brfss_analytic |>
select(menthlth_days, physhlth_days, bmi, sex , exercise, age_group, income, education, smoking) |>
tbl_summary(
by = sex,
label= list(
menthlth_days ~ "Mentally unhealthy days in past 30 (outcome)",
physhlth_days ~ "Physically unhealthy days in past 30",
bmi ~ "Body mass index × 100 (divide by 100)",
exercise ~ "Any exercise in past 30 days",
age_group ~ "Age group in 5-year categories (13 levels)",
income ~ "Annual household income (7 levels)",
education ~ "Highest level of education completed",
smoking ~ "Smoking status (4 levels)"
),
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~1,
missing = "no"
) |>
add_n() |>
bold_labels() |>
italicize_levels() |>
modify_caption("**Table 1. Descriptive Statistics Stratified by Sex - BRFSS 2023**" ) |>
as_flex_table()Characteristic | N | Male | Female |
|---|---|---|---|
Mentally unhealthy days in past 30 (outcome) | 8,000 | 3.5 (7.5) | 5.1 (8.6) |
Physically unhealthy days in past 30 | 8,000 | 4.0 (8.4) | 4.9 (8.9) |
Body mass index × 100 (divide by 100) | 8,000 | 28.7 (6.0) | 28.7 (7.0) |
Any exercise in past 30 days | 8,000 | 3,146 (80%) | 3,094 (76%) |
Age group in 5-year categories (13 levels) | 8,000 | ||
18-24 | 235 (6.0%) | 171 (4.2%) | |
25-29 | 219 (5.6%) | 189 (4.7%) | |
30-34 | 253 (6.4%) | 210 (5.2%) | |
35-39 | 263 (6.7%) | 302 (7.4%) | |
40-44 | 290 (7.4%) | 292 (7.2%) | |
45-49 | 266 (6.8%) | 252 (6.2%) | |
50-54 | 305 (7.7%) | 303 (7.5%) | |
55-59 | 308 (7.8%) | 352 (8.7%) | |
60-64 | 408 (10%) | 379 (9.3%) | |
65-69 | 418 (11%) | 483 (12%) | |
70-74 | 382 (9.7%) | 426 (10%) | |
75-79 | 325 (8.3%) | 338 (8.3%) | |
80+ | 264 (6.7%) | 367 (9.0%) | |
Annual household income (7 levels) | 8,000 | ||
Less than $15,000 | 160 (4.1%) | 247 (6.1%) | |
$15,000 to less than $25,000 | 271 (6.9%) | 370 (9.1%) | |
$25,000 to less than $35,000 | 376 (9.6%) | 495 (12%) | |
$35,000 to less than $50,000 | 482 (12%) | 585 (14%) | |
$50,000 to less than $100,000 | 1,251 (32%) | 1,260 (31%) | |
$100,000 to less than $200,000 | 996 (25%) | 869 (21%) | |
$200,000 or more | 400 (10%) | 238 (5.9%) | |
Highest level of education completed | 8,000 | ||
Less than high school | 75 (1.9%) | 49 (1.2%) | |
High school diploma or GED | 130 (3.3%) | 122 (3.0%) | |
Some college or technical school | 950 (24%) | 877 (22%) | |
College graduate | 1,018 (26%) | 1,120 (28%) | |
Graduate or professional degree | 1,763 (45%) | 1,896 (47%) | |
Smoking status (4 levels) | 8,000 | ||
Current daily Smoker | 339 (8.6%) | 319 (7.8%) | |
Current some-day smoker | 151 (3.8%) | 117 (2.9%) | |
Former smoker | 1,207 (31%) | 1,055 (26%) | |
Never smoker | 2,239 (57%) | 2,573 (63%) | |
1Mean (SD); n (%) | |||
m1<- lm(menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + income + education + smoking, data = brfss_analytic )
summary(m1)##
## Call:
## lm(formula = menthlth_days ~ physhlth_days + bmi + sex + exercise +
## age_group + income + education + smoking, data = brfss_analytic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.080 -3.865 -1.597 0.712 30.471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.99937 0.93968 9.577 < 2e-16
## physhlth_days 0.26558 0.01007 26.384 < 2e-16
## bmi 0.03338 0.01321 2.527 0.011510
## sexFemale 1.39038 0.16750 8.301 < 2e-16
## exerciseNo 0.65116 0.21472 3.033 0.002432
## age_group25-29 -1.05660 0.51938 -2.034 0.041950
## age_group30-34 -1.09395 0.50646 -2.160 0.030803
## age_group35-39 -1.81103 0.48851 -3.707 0.000211
## age_group40-44 -2.89307 0.48749 -5.935 3.07e-09
## age_group45-49 -3.05618 0.49769 -6.141 8.61e-10
## age_group50-54 -3.51674 0.48323 -7.277 3.72e-13
## age_group55-59 -4.49597 0.47555 -9.454 < 2e-16
## age_group60-64 -4.52215 0.45848 -9.863 < 2e-16
## age_group65-69 -5.57795 0.45019 -12.390 < 2e-16
## age_group70-74 -6.02536 0.45741 -13.173 < 2e-16
## age_group75-79 -6.28656 0.47484 -13.239 < 2e-16
## age_group80+ -6.81968 0.47684 -14.302 < 2e-16
## income$15,000 to less than $25,000 -1.63703 0.46899 -3.491 0.000485
## income$25,000 to less than $35,000 -2.07637 0.44797 -4.635 3.63e-06
## income$35,000 to less than $50,000 -2.54685 0.43819 -5.812 6.40e-09
## income$50,000 to less than $100,000 -3.05048 0.41069 -7.428 1.22e-13
## income$100,000 to less than $200,000 -3.49984 0.42923 -8.154 4.07e-16
## income$200,000 or more -3.78409 0.50036 -7.563 4.38e-14
## educationHigh school diploma or GED 0.07991 0.81066 0.099 0.921484
## educationSome college or technical school 1.07679 0.68973 1.561 0.118520
## educationCollege graduate 1.79091 0.69119 2.591 0.009585
## educationGraduate or professional degree 1.73781 0.69250 2.509 0.012111
## smokingCurrent some-day smoker -1.58670 0.53523 -2.965 0.003041
## smokingFormer smoker -1.98971 0.33713 -5.902 3.74e-09
## smokingNever smoker -2.93681 0.32162 -9.131 < 2e-16
##
## (Intercept) ***
## physhlth_days ***
## bmi *
## sexFemale ***
## exerciseNo **
## age_group25-29 *
## age_group30-34 *
## age_group35-39 ***
## age_group40-44 ***
## age_group45-49 ***
## age_group50-54 ***
## age_group55-59 ***
## age_group60-64 ***
## age_group65-69 ***
## age_group70-74 ***
## age_group75-79 ***
## age_group80+ ***
## income$15,000 to less than $25,000 ***
## income$25,000 to less than $35,000 ***
## income$35,000 to less than $50,000 ***
## income$50,000 to less than $100,000 ***
## income$100,000 to less than $200,000 ***
## income$200,000 or more ***
## educationHigh school diploma or GED
## educationSome college or technical school
## educationCollege graduate **
## educationGraduate or professional degree *
## smokingCurrent some-day smoker **
## smokingFormer smoker ***
## smokingNever smoker ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.352 on 7970 degrees of freedom
## Multiple R-squared: 0.1853, Adjusted R-squared: 0.1824
## F-statistic: 62.52 on 29 and 7970 DF, p-value: < 2.2e-16
##Step 4. Model level statistics
glance(m1) %>%
select(r.squared, adj.r.squared, sigma, statistic, p.value, df, df.residual, nobs) %>%
mutate(across(where(is.numeric), ~ round(., 4))) %>%
pivot_longer(everything(), names_to = "Statistic", values_to = "Value") %>%
mutate(Statistic = dplyr::recode(Statistic,
"r.squared" = "R²",
"adj.r.squared" = "Adjusted R²",
"sigma" = "Residual Std. Error (Root MSE)",
"statistic" = "F-statistic",
"p.value" = "p-value (overall F-test)",
"df" = "Model df (p)",
"df.residual" = "Residual df (n − p − 1)",
"nobs" = "n (observations)"
)) %>%
kable(caption = "Overall Model Summary — Model 1") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Statistic | Value |
|---|---|
| R² | 0.1853 |
| Adjusted R² | 0.1824 |
| Residual Std. Error (Root MSE) | 7.3516 |
| F-statistic | 62.5234 |
| p-value (overall F-test) | 0.0000 |
| Model df (p) | 29.0000 |
| Residual df (n − p − 1) | 7970.0000 |
| n (observations) | 8000.0000 |
Anova(m1, type = "III") %>%
as.data.frame() %>%
rownames_to_column("Source") %>%
filter(Source != "(Intercept)") %>%
mutate(
Significant = ifelse(`Pr(>F)` < 0.05, "Yes", "No"),
across(where(is.numeric), ~ round(., 4))
) %>%
kable(
caption = "Type III Partial F-Tests for Each Predictor",
col.names = c("Source", "Sum of Sq", "df", "F value", "p-value", "Significant (α = 0.05)")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
column_spec(6, bold = TRUE)| Source | Sum of Sq | df | F value | p-value | Significant (α = 0.05) |
|---|---|---|---|---|---|
| physhlth_days | 37622.7952 | 1 | 696.1198 | 0.0000 | Yes |
| bmi | 345.2408 | 1 | 6.3879 | 0.0115 | Yes |
| sex | 3723.8662 | 1 | 68.9012 | 0.0000 | Yes |
| exercise | 497.0434 | 1 | 9.1966 | 0.0024 | Yes |
| age_group | 30092.1774 | 12 | 46.3986 | 0.0000 | Yes |
| income | 4733.8943 | 6 | 14.5982 | 0.0000 | Yes |
| education | 1265.1504 | 4 | 5.8521 | 0.0001 | Yes |
| smoking | 5101.1076 | 3 | 31.4613 | 0.0000 | Yes |
| Residuals | 430750.0872 | 7970 | NA | NA | NA |
##Step 2. Chunk test for income: Test whether income collectively improves the model significantly, after accounting for all other predictors:
m2 <-lm(menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + education + smoking, data = brfss_analytic)
# compare using anova(): perform the partial F-test
anova(m2,m1) |>
as.data.frame() |>
rownames_to_column("Model") |>
mutate(
Model = c("Reduced (exclude income)", "Full(include income)"),
across(where(is.numeric), ~ round(., 4))
) |>
kable(
caption = "Partial F-test: whether income collectively improves the model significantly?",
col.names = c("Model", "Res. df", "RSS", "df", "Sum of Sq", "F", "p-value")
)|>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| Reduced (exclude income) | 7976 | 435484.0 | NA | NA | NA | NA |
| Full(include income) | 7970 | 430750.1 | 6 | 4733.894 | 14.5982 | 0 |
##Step 3 (Chunk test for education): Repeat the same procedure for education as a group of predictors
m3 <-lm(menthlth_days ~ physhlth_days + bmi + sex + exercise + age_group + income + smoking, data = brfss_analytic)
# compare using anova(): perform the partial F-test
anova(m3,m1) |>
as.data.frame() |>
rownames_to_column("Model") |>
mutate(
Model = c("Reduced (exclude education)", "Full(include education)"),
across(where(is.numeric), ~ round(., 4))
) |>
kable(
caption = "Partial F-test: whether education collectively improves the model significantly?",
col.names = c("Model", "Res. df", "RSS", "df", "Sum of Sq", "F", "p-value")
)|>
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| Reduced (exclude education) | 7974 | 432015.2 | NA | NA | NA | NA |
| Full(include education) | 7970 | 430750.1 | 4 | 1265.15 | 5.8521 | 1e-04 |
##Step 4. Interpretation:Type III ANOVA resutls indicate that all predictors make statistically significant independent contributions to mentally unhealthy days after accounting for all other variables in the model at α = 0.05. The strongest contributions are: physical unhealthy days, sex, soking and age group. The chunk tests confirmed that both income and education improve the model after accounting for all other predictors.
brfss_analytic <- brfss_analytic |>
mutate(
smoker_current = factor(case_when(
smoking %in% c("Current daily smoker", "Current some-day smoker") ~ "Current smoker",
smoking %in% c("Former smoker", "Never smoker") ~ "Non-smoker",
TRUE ~ NA_character_
), levels = c("Non-smoker", "Current smoker"))
)
brfss_analytic |>
count(smoker_current) |>
mutate(`%` = round(n / sum(n) * 100, 1)) |>
kable(
caption = "Distribution of Binary Smoking Variable",
col.names = c("Smoking Status", "N", "%")
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Smoking Status | N | % |
|---|---|---|
| Non-smoker | 7074 | 88.4 |
| Current smoker | 268 | 3.4 |
| NA | 658 | 8.2 |
##Step 2: Fit two models: # • Model A (main effects only): regress menthlth_days on physhlth_days, bmi, sex, smoker_current, exercise, age_group, income, and education. # • Model B (with interaction): same as Model A, but use sex * smoker_current in the formula to include the interaction term
# Model A: main effects only
model_A <- lm(
menthlth_days ~ physhlth_days + bmi + sex + smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
# Model B: with sex * smoker_current interaction
model_B <- lm(
menthlth_days ~ physhlth_days + bmi + sex * smoker_current +
exercise + age_group + income + education,
data = brfss_analytic
)
tidy(model_A, conf.int = TRUE) |>
mutate(across(where(is.numeric), \(x) round(x, 4))) |>
kable(
caption = "Model A: Main Effects Only",
col.names = c("Term", "Estimate", "SE", "t", "p-value", "CI Lower", "CI Upper")
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Term | Estimate | SE | t | p-value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 5.6652 | 0.9364 | 6.0501 | 0.0000 | 3.8296 | 7.5007 |
| physhlth_days | 0.2590 | 0.0104 | 24.8271 | 0.0000 | 0.2386 | 0.2795 |
| bmi | 0.0305 | 0.0134 | 2.2717 | 0.0231 | 0.0042 | 0.0567 |
| sexFemale | 1.2416 | 0.1678 | 7.4003 | 0.0000 | 0.9127 | 1.5705 |
| smoker_currentCurrent smoker | 1.0762 | 0.4465 | 2.4105 | 0.0160 | 0.2010 | 1.9515 |
| exerciseNo | 0.7866 | 0.2204 | 3.5692 | 0.0004 | 0.3546 | 1.2187 |
| age_group25-29 | -1.2126 | 0.5102 | -2.3767 | 0.0175 | -2.2128 | -0.2125 |
| age_group30-34 | -0.9655 | 0.4987 | -1.9359 | 0.0529 | -1.9432 | 0.0122 |
| age_group35-39 | -1.5918 | 0.4834 | -3.2927 | 0.0010 | -2.5395 | -0.6441 |
| age_group40-44 | -2.7309 | 0.4815 | -5.6715 | 0.0000 | -3.6748 | -1.7870 |
| age_group45-49 | -3.0025 | 0.4935 | -6.0847 | 0.0000 | -3.9698 | -2.0352 |
| age_group50-54 | -3.2555 | 0.4789 | -6.7973 | 0.0000 | -4.1944 | -2.3166 |
| age_group55-59 | -4.0350 | 0.4701 | -8.5827 | 0.0000 | -4.9566 | -3.1134 |
| age_group60-64 | -4.1094 | 0.4529 | -9.0740 | 0.0000 | -4.9971 | -3.2216 |
| age_group65-69 | -5.1389 | 0.4403 | -11.6714 | 0.0000 | -6.0020 | -4.2757 |
| age_group70-74 | -5.5927 | 0.4456 | -12.5520 | 0.0000 | -6.4661 | -4.7192 |
| age_group75-79 | -5.7261 | 0.4607 | -12.4301 | 0.0000 | -6.6291 | -4.8231 |
| age_group80+ | -6.3830 | 0.4601 | -13.8732 | 0.0000 | -7.2849 | -5.4811 |
| income$15,000 to less than $25,000 | -1.0783 | 0.5051 | -2.1349 | 0.0328 | -2.0684 | -0.0882 |
| income$25,000 to less than $35,000 | -1.6461 | 0.4777 | -3.4461 | 0.0006 | -2.5825 | -0.7097 |
| income$35,000 to less than $50,000 | -2.1075 | 0.4677 | -4.5065 | 0.0000 | -3.0242 | -1.1907 |
| income$50,000 to less than $100,000 | -2.5683 | 0.4386 | -5.8554 | 0.0000 | -3.4281 | -1.7085 |
| income$100,000 to less than $200,000 | -3.0773 | 0.4547 | -6.7685 | 0.0000 | -3.9686 | -2.1861 |
| income$200,000 or more | -3.3737 | 0.5180 | -6.5134 | 0.0000 | -4.3891 | -2.3584 |
| educationHigh school diploma or GED | 0.1677 | 0.8565 | 0.1958 | 0.8448 | -1.5113 | 1.8467 |
| educationSome college or technical school | 1.1922 | 0.7170 | 1.6627 | 0.0964 | -0.2134 | 2.5977 |
| educationCollege graduate | 2.0245 | 0.7175 | 2.8216 | 0.0048 | 0.6180 | 3.4311 |
| educationGraduate or professional degree | 1.8020 | 0.7178 | 2.5104 | 0.0121 | 0.3949 | 3.2092 |
# Fit model B
tidy(model_B, conf.int = TRUE) |>
mutate(across(where(is.numeric), \(x) round(x, 4))) |>
kable(
caption = "Model B: With Sex x Smoking Interaction",
col.names = c("Term", "Estimate", "SE", "t", "p-value", "CI Lower", "CI Upper")
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Term | Estimate | SE | t | p-value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 5.7326 | 0.9369 | 6.1186 | 0.0000 | 3.8959 | 7.5692 |
| physhlth_days | 0.2589 | 0.0104 | 24.8193 | 0.0000 | 0.2385 | 0.2794 |
| bmi | 0.0305 | 0.0134 | 2.2762 | 0.0229 | 0.0042 | 0.0568 |
| sexFemale | 1.1832 | 0.1706 | 6.9349 | 0.0000 | 0.8488 | 1.5177 |
| smoker_currentCurrent smoker | 0.3466 | 0.5926 | 0.5848 | 0.5587 | -0.8151 | 1.5082 |
| exerciseNo | 0.7901 | 0.2204 | 3.5856 | 0.0003 | 0.3582 | 1.2221 |
| age_group25-29 | -1.2251 | 0.5102 | -2.4014 | 0.0164 | -2.2252 | -0.2250 |
| age_group30-34 | -0.9790 | 0.4987 | -1.9631 | 0.0497 | -1.9566 | -0.0014 |
| age_group35-39 | -1.6149 | 0.4835 | -3.3398 | 0.0008 | -2.5627 | -0.6670 |
| age_group40-44 | -2.7623 | 0.4817 | -5.7342 | 0.0000 | -3.7066 | -1.8180 |
| age_group45-49 | -3.0241 | 0.4935 | -6.1278 | 0.0000 | -3.9915 | -2.0567 |
| age_group50-54 | -3.2708 | 0.4789 | -6.8294 | 0.0000 | -4.2096 | -2.3320 |
| age_group55-59 | -4.0688 | 0.4704 | -8.6497 | 0.0000 | -4.9909 | -3.1467 |
| age_group60-64 | -4.1401 | 0.4531 | -9.1374 | 0.0000 | -5.0283 | -3.2519 |
| age_group65-69 | -5.1585 | 0.4403 | -11.7147 | 0.0000 | -6.0217 | -4.2953 |
| age_group70-74 | -5.6152 | 0.4456 | -12.6001 | 0.0000 | -6.4888 | -4.7416 |
| age_group75-79 | -5.7476 | 0.4607 | -12.4750 | 0.0000 | -6.6508 | -4.8444 |
| age_group80+ | -6.4077 | 0.4602 | -13.9236 | 0.0000 | -7.3099 | -5.5056 |
| income$15,000 to less than $25,000 | -1.0646 | 0.5051 | -2.1078 | 0.0351 | -2.0546 | -0.0745 |
| income$25,000 to less than $35,000 | -1.6168 | 0.4778 | -3.3834 | 0.0007 | -2.5535 | -0.6801 |
| income$35,000 to less than $50,000 | -2.0836 | 0.4678 | -4.4545 | 0.0000 | -3.0006 | -1.1667 |
| income$50,000 to less than $100,000 | -2.5421 | 0.4388 | -5.7935 | 0.0000 | -3.4022 | -1.6819 |
| income$100,000 to less than $200,000 | -3.0539 | 0.4547 | -6.7157 | 0.0000 | -3.9454 | -2.1625 |
| income$200,000 or more | -3.3516 | 0.5180 | -6.4702 | 0.0000 | -4.3671 | -2.3362 |
| educationHigh school diploma or GED | 0.1265 | 0.8566 | 0.1477 | 0.8826 | -1.5527 | 1.8058 |
| educationSome college or technical school | 1.1542 | 0.7172 | 1.6094 | 0.1076 | -0.2517 | 2.5601 |
| educationCollege graduate | 1.9794 | 0.7178 | 2.7577 | 0.0058 | 0.5724 | 3.3865 |
| educationGraduate or professional degree | 1.7613 | 0.7180 | 2.4529 | 0.0142 | 0.3537 | 3.1688 |
| sexFemale:smoker_currentCurrent smoker | 1.6643 | 0.8890 | 1.8721 | 0.0612 | -0.0784 | 3.4071 |
anova(model_A, model_B) |>
as.data.frame() |>
rownames_to_column("Model") |>
mutate(
Model = c("Model A (main effects)", "Model B (+ interaction)"),
across(where(is.numeric), \(x) round(x, 4))
) |>
kable(
caption = "Partial F-Test: Is the Sex x Smoking Interaction Significant?",
col.names = c("Model", "Res. df", "RSS", "df", "Sum of Sq", "F", "p-value")
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Model | Res. df | RSS | df | Sum of Sq | F | p-value |
|---|---|---|---|---|---|---|
| Model A (main effects) | 7314 | 365553.4 | NA | NA | NA | NA |
| Model B (+ interaction) | 7313 | 365378.3 | 1 | 175.1111 | 3.5048 | 0.0612 |
## (Intercept)
## 5.73256256
## physhlth_days
## 0.25892559
## bmi
## 0.03051634
## sexFemale
## 1.18320971
## smoker_currentCurrent smoker
## 0.34656437
## exerciseNo
## 0.79013619
## age_group25-29
## -1.22510044
## age_group30-34
## -0.97901547
## age_group35-39
## -1.61485044
## age_group40-44
## -2.76231085
## age_group45-49
## -3.02410915
## age_group50-54
## -3.27080644
## age_group55-59
## -4.06877862
## age_group60-64
## -4.14012758
## age_group65-69
## -5.15850546
## age_group70-74
## -5.61518306
## age_group75-79
## -5.74761146
## age_group80+
## -6.40773107
## income$15,000 to less than $25,000
## -1.06456606
## income$25,000 to less than $35,000
## -1.61678008
## income$35,000 to less than $50,000
## -2.08362232
## income$50,000 to less than $100,000
## -2.54205589
## income$100,000 to less than $200,000
## -3.05394739
## income$200,000 or more
## -3.35161097
## educationHigh school diploma or GED
## 0.12653810
## educationSome college or technical school
## 1.15419972
## educationCollege graduate
## 1.97944520
## educationGraduate or professional degree
## 1.76128228
## sexFemale:smoker_currentCurrent smoker
## 1.66434456
ggpredict(model_B, terms = c("smoker_current", "sex")) |>
plot() +
labs(
title = "Predicted Mentally Unhealthy Days by Smoking Status and Sex",
x = "Smoking Status",
y = "Predicted Mentally Unhealthy Days (past 30)",
color = "Sex"
) +
theme_minimal()
## Interpretation: Amone men, current smokers and non-smokers had
similar predicted mentally unhealthy days, with only small differences.
Among women, however, current smokers reported more mentally unhealthy
days than non-smokers. The wider gap between smoking groups in women
compared to men suggests that smoking may be more strongly associated
with poor mental health among woemn.