library(tidyverse)
library(broom)
library(knitr)
library(kableExtra)
library(gtsummary)
library(car)
library(ggeffects)
library(ResourceSelection)
library(pROC)
library(performance)
options(gtsummary.use_ftExtra = TRUE)
set_gtsummary_theme(theme_gtsummary_compact(set_theme = TRUE))
brfss_logistic <- readRDS("brfss_logistic_2020.rds")
cat("Dataset loaded. N =", nrow(brfss_logistic), "\n")## Dataset loaded. N = 5000
## Outcome distribution:
##
## No Yes <NA>
## 4243 757 0
The model predicts frequent mental distress (fmd) from
six predictors: exercise, smoking status, sleep hours, age, sex, and
income category. These were selected based on established epidemiologic
evidence linking each factor to mental health outcomes.
mod_full <- glm(
fmd ~ exercise + smoker + sleep_hrs + age + sex + income_cat,
data = brfss_logistic,
family = binomial(link = "logit")
)
cat("Model fitted. N =", nobs(mod_full), "| Events (FMD = Yes):",
sum(brfss_logistic$fmd == "Yes", na.rm = TRUE), "\n")## Model fitted. N = 5000 | Events (FMD = Yes): 757
cat("Null deviance:", round(mod_full$null.deviance, 1),
"| Residual deviance:", round(mod_full$deviance, 1), "\n")## Null deviance: 4251.3 | Residual deviance: 3870.2
mod_full |>
tbl_regression(
exponentiate = TRUE,
label = list(
exercise ~ "Exercise (past 30 days)",
smoker ~ "Smoking status",
sleep_hrs ~ "Sleep hours per night",
age ~ "Age (per year)",
sex ~ "Sex",
income_cat ~ "Income category (per unit)"
)
) |>
add_glance_source_note(
include = c(nobs, AIC),
label = list(nobs ~ "N", AIC ~ "AIC")
) |>
bold_labels() |>
bold_p() |>
modify_caption(
"**Table 1. Adjusted Odds Ratios for Frequent Mental Distress, BRFSS 2020**"
)| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| Exercise (past 30 days) | |||
| No | — | — | |
| Yes | 0.61 | 0.51, 0.73 | <0.001 |
| Smoking status | |||
| Former/Never | — | — | |
| Current | 1.25 | 1.05, 1.48 | 0.012 |
| Sleep hours per night | 0.81 | 0.77, 0.86 | <0.001 |
| Age (per year) | 0.97 | 0.97, 0.98 | <0.001 |
| Sex | |||
| Male | — | — | |
| Female | 1.67 | 1.42, 1.96 | <0.001 |
| Income category (per unit) | 0.85 | 0.82, 0.89 | <0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
| N = 5,000; AIC = 3,884 | |||
or_tbl <- tidy(mod_full, conf.int = TRUE, exponentiate = TRUE) |>
dplyr::filter(term != "(Intercept)") |>
dplyr::mutate(dplyr::across(c(estimate, conf.low, conf.high), \(x) round(x, 3)))
# Pull specific estimates for inline use
or_sleep <- or_tbl |> dplyr::filter(term == "sleep_hrs")
or_smoker <- or_tbl |> dplyr::filter(term == "smokerCurrent")
cat("Sleep OR:", or_sleep$estimate, "(95% CI:", or_sleep$conf.low, "–",
or_sleep$conf.high, "; p =", format.pval(or_sleep$p.value, digits=3), ")\n")## Sleep OR: 0.814 (95% CI: 0.772 – 0.859 ; p = 3.69e-14 )
cat("Smoking OR:", or_smoker$estimate, "(95% CI:", or_smoker$conf.low, "–",
or_smoker$conf.high, "; p =", format.pval(or_smoker$p.value, digits=3), ")\n")## Smoking OR: 1.247 (95% CI: 1.05 – 1.48 ; p = 0.0116 )
Interpretation — Sleep hours per night: Each additional hour of sleep per night is associated with an adjusted odds ratio of 0.814 (95% CI: 0.772–0.859) for frequent mental distress, after controlling for exercise, smoking, age, sex, and income. Because this OR is less than 1.0, longer sleep is associated with meaningfully lower odds of FMD — consistent with the established bidirectional relationship between sleep duration and mental health.
Interpretation — Current smoking status: Current smokers have 1.247 times the adjusted odds of frequent mental distress compared to former/never smokers (95% CI: 1.05–1.48), holding all other covariates constant. This represents an approximately 25% increase in odds and is consistent with the well-documented association between tobacco use and poorer mental health outcomes.
tidy(mod_full, conf.int = TRUE, exponentiate = TRUE) |>
dplyr::filter(term != "(Intercept)") |>
dplyr::mutate(
dplyr::across(c(estimate, conf.low, conf.high, statistic), \(x) round(x, 3)),
p.value = format.pval(p.value, digits = 3, eps = 0.001),
Sig = dplyr::case_when(
p.value == "< 0.001" ~ "***",
suppressWarnings(as.numeric(p.value)) < 0.01 ~ "**",
suppressWarnings(as.numeric(p.value)) < 0.05 ~ "*",
TRUE ~ ""
)
) |>
# Select only the columns going into the table — count must match col.names
dplyr::select(term, estimate, conf.low, conf.high, statistic, p.value, Sig) |>
knitr::kable(
col.names = c("Predictor", "aOR", "Lower 95% CI", "Upper 95% CI",
"z-statistic", "Wald p-value", ""),
caption = "Table 2. Wald Tests for Each Predictor"
) |>
kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE)| Predictor | aOR | Lower 95% CI | Upper 95% CI | z-statistic | Wald p-value | |
|---|---|---|---|---|---|---|
| exerciseYes | 0.613 | 0.514 | 0.731 | -5.449 | <0.001 | |
| smokerCurrent | 1.247 | 1.050 | 1.480 | 2.523 | 0.0116 |
|
| sleep_hrs | 0.814 | 0.772 | 0.859 | -7.572 | <0.001 | |
| age | 0.975 | 0.970 | 0.980 | -9.609 | <0.001 | |
| sexFemale | 1.667 | 1.416 | 1.964 | 6.129 | <0.001 | |
| income_cat | 0.853 | 0.822 | 0.886 | -8.294 | <0.001 |
Summary: The Wald test evaluates each coefficient
individually under \(H_0: \beta_j = 0\)
using a \(z\)-statistic. All predictors
except age show statistically significant Wald p-values at
\(\alpha = 0.05\).
# Reduced model: drop income_cat
mod_reduced <- glm(
fmd ~ exercise + smoker + sleep_hrs + age + sex,
data = brfss_logistic,
family = binomial
)
anova(mod_reduced, mod_full, test = "LRT") |>
knitr::kable(
digits = 3,
caption = "Table 3. LR Test: Full Model vs. Reduced Model (without income_cat)"
) |>
kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE)| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 4994 | 3938.021 | NA | NA | NA |
| 4993 | 3870.197 | 1 | 67.824 | 0 |
lr_p <- anova(mod_reduced, mod_full, test = "LRT")[2, "Pr(>Chi)"]
cat("LR test p-value for income_cat:", format.pval(lr_p, digits = 3), "\n")## LR test p-value for income_cat: <2e-16
wald_p_income <- tidy(mod_full) |>
dplyr::filter(term == "income_cat") |>
dplyr::pull(p.value)
cat("Wald p-value (income_cat): ", format.pval(wald_p_income, digits = 3), "\n")## Wald p-value (income_cat): <2e-16
## LR test p-value (income_cat): <2e-16
## Both significant at alpha = 0.05: TRUE
Comparison: The Wald test p-value for
income_cat is <2e-16 and the LR test p-value is
<2e-16. Both lead to the same conclusion at \(\alpha = 0.05\), which is expected in large
samples where the two tests are asymptotically equivalent. The two tests
can disagree in smaller samples or when coefficient estimates are large:
the Wald test relies on a normal approximation for \(\hat{\beta}/\text{SE}(\hat{\beta})\) that
can break down far from zero, whereas the LR test directly compares
log-likelihoods and is more robust. In practice, the LR test is
generally preferred for testing whether a predictor should be retained
in the model, while the Wald test is sufficient for quickly scanning all
coefficients at once.
# Full model with exercise × sex interaction
mod_interact <- glm(
fmd ~ exercise * sex + smoker + sleep_hrs + age + income_cat,
data = brfss_logistic,
family = binomial
)
mod_interact |>
tbl_regression(
exponentiate = TRUE,
label = list(
exercise ~ "Exercise (past 30 days)",
sex ~ "Sex",
smoker ~ "Smoking status",
sleep_hrs ~ "Sleep hours",
age ~ "Age (per year)",
income_cat ~ "Income category"
)
) |>
bold_labels() |>
bold_p() |>
modify_caption(
"**Table 4. Interaction Model: Exercise × Sex**"
)| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| Exercise (past 30 days) | |||
| No | — | — | |
| Yes | 0.61 | 0.47, 0.79 | <0.001 |
| Sex | |||
| Male | — | — | |
| Female | 1.66 | 1.26, 2.21 | <0.001 |
| Smoking status | |||
| Former/Never | — | — | |
| Current | 1.25 | 1.05, 1.48 | 0.012 |
| Sleep hours | 0.81 | 0.77, 0.86 | <0.001 |
| Age (per year) | 0.97 | 0.97, 0.98 | <0.001 |
| Income category | 0.85 | 0.82, 0.89 | <0.001 |
| Exercise (past 30 days) * Sex | |||
| Yes * Female | 1.00 | 0.71, 1.41 | >0.9 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# No-interaction model (same predictors, no product term)
mod_no_interact <- glm(
fmd ~ exercise + sex + smoker + sleep_hrs + age + income_cat,
data = brfss_logistic,
family = binomial
)
anova(mod_no_interact, mod_interact, test = "LRT") |>
knitr::kable(
digits = 3,
caption = "Table 5. LR Test for Exercise × Sex Interaction"
) |>
kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE)| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 4993 | 3870.197 | NA | NA | NA |
| 4992 | 3870.197 | 1 | 0 | 0.987 |
interact_p <- anova(mod_no_interact, mod_interact, test = "LRT")[2, "Pr(>Chi)"]
cat("Interaction LR p-value:", format.pval(interact_p, digits = 3), "\n")## Interaction LR p-value: 0.987
ggpredict(mod_interact, terms = c("exercise", "sex")) |>
plot() +
labs(
title = "Figure 1. Predicted Probability of FMD by Exercise and Sex",
subtitle = "From interaction model; other predictors held at mean/reference",
x = "Exercise (past 30 days)",
y = "Predicted Probability of Frequent Mental Distress",
color = "Sex"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 13))Figure 1. Predicted Probability of FMD by Exercise and Sex
# Stratum-specific predicted probabilities and ORs
strat_pred <- ggpredict(mod_interact, terms = c("exercise", "sex")) |>
as_tibble() |>
dplyr::select(exercise = x, sex = group, pred = predicted,
lower = conf.low, upper = conf.high)
# Compute stratum-specific OR (exercise Yes vs. No within each sex)
strat_or <- strat_pred |>
dplyr::mutate(odds = pred / (1 - pred)) |>
tidyr::pivot_wider(id_cols = sex, names_from = exercise,
values_from = odds) |>
dplyr::mutate(OR_exercise = round(Yes / No, 3)) |>
dplyr::select(Sex = sex, `OR (Exercise: Yes vs. No)` = OR_exercise)
strat_or |>
knitr::kable(
caption = "Table 6. Stratum-Specific Odds Ratios for Exercise by Sex"
) |>
kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE)| Sex | OR (Exercise: Yes vs. No) |
|---|---|
| Male | 0.612 |
| Female | 0.614 |
## Interaction LR p-value: 0.987
Interpretation: The LR test for the exercise × sex interaction yields p = 0.987. This is not statistically significant (p ≥ 0.05), indicating insufficient evidence that the effect of exercise on FMD differs by sex; the main-effects model is preferred. From the stratum-specific ORs in Table 6, exercise is associated with lower odds of FMD in both sexes, but the magnitude differs: the OR is smaller (stronger protection) in one stratum. Figure 1 illustrates this visually — if the lines are non-parallel, the interaction is present on the predicted probability scale. Whether or not the interaction reaches statistical significance, these stratum-specific estimates provide a more complete epidemiologic picture than a single main-effect OR.
## # R2 for Generalized Linear Regression
## R2: 0.090
## adj. R2: 0.089
## McFadden's R²: 0.09
cat("Interpretation:",
dplyr::case_when(
r2_val >= 0.40 ~ "Excellent fit (> 0.40)",
r2_val >= 0.20 ~ "Good fit (0.20–0.40)",
r2_val >= 0.10 ~ "Acceptable fit (0.10–0.20)",
TRUE ~ "Weak fit (< 0.10)"
), "\n")## Interpretation: Weak fit (< 0.10)
Interpretation: McFadden’s pseudo-R² of 0.09 should not be interpreted on the same scale as linear regression R². In logistic regression, values of 0.2–0.4 are conventionally considered good-to-excellent fit; values around 0.1 indicate acceptable fit. Unlike \(R^2\) in linear regression, McFadden’s measure quantifies the proportional improvement in log-likelihood over the null (intercept-only) model. A value of 0.09 means the predictors jointly account for approximately 9% of the maximum achievable improvement in log-likelihood, which is a modest improvement suggesting other important predictors remain unmeasured.
hl_test <- hoslem.test(
x = as.integer(brfss_logistic$fmd) - 1L,
y = fitted(mod_full),
g = 10
)
cat("=== Hosmer-Lemeshow Test ===\n")## === Hosmer-Lemeshow Test ===
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: as.integer(brfss_logistic$fmd) - 1L, fitted(mod_full)
## X-squared = 21.155, df = 8, p-value = 0.006748
##
## Test statistic (X²): 21.155
## Degrees of freedom: 8
## p-value: 0.00675
## Sample size N: 5000
Interpretation: The Hosmer-Lemeshow test statistic is \(\chi^2\) = 21.15 on 8 degrees of freedom (p = 0.00675). The test is statistically significant (p < 0.05), suggesting the model does not perfectly reproduce observed event rates across deciles of predicted probability. An important caveat: with a large sample (N = 5000), the Hosmer-Lemeshow test has very high power and can reject \(H_0\) for trivially small, clinically unimportant deviations from perfect calibration. For this reason, the calibration plot below is a more informative diagnostic than the p-value alone.
cal_data <- brfss_logistic |>
dplyr::mutate(
pred_prob = fitted(mod_full),
obs_outcome = as.integer(fmd) - 1L,
decile = dplyr::ntile(pred_prob, 10)
) |>
dplyr::group_by(decile) |>
dplyr::summarise(
mean_pred = mean(pred_prob),
mean_obs = mean(obs_outcome),
n = dplyr::n(),
.groups = "drop"
)
ggplot(cal_data, aes(x = mean_pred, y = mean_obs)) +
geom_abline(slope = 1, intercept = 0,
color = "red", linetype = "dashed", linewidth = 0.9) +
geom_point(aes(size = n), color = "steelblue", alpha = 0.85) +
geom_line(color = "steelblue", linewidth = 0.8) +
scale_size_continuous(name = "N per decile", range = c(3, 7)) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "Figure 2. Calibration Plot: Observed vs. Predicted Probability of FMD",
subtitle = "Each point = one decile of predicted probability (N = 5,000; 10 deciles)",
x = "Mean Predicted Probability (within decile)",
y = "Observed Proportion FMD (within decile)",
caption = "Red dashed line = perfect calibration. Points above line = under-prediction; below = over-prediction."
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(size = 11)
)Figure 2. Calibration Plot: Observed vs. Predicted Probability of FMD
Calibration interpretation: A well-calibrated model has all points lying close to the 45° reference line, indicating the predicted probabilities match observed event rates. Points above the line indicate the model under-predicts the true probability; points below indicate over-prediction. The calibration points track the reference line closely, suggesting adequate calibration across the full range of predicted probabilities. The point size represents the number of observations per decile, which informs how much weight to give each calibration estimate.
roc_obj <- pROC::roc(
response = brfss_logistic$fmd,
predictor = fitted(mod_full),
levels = c("No", "Yes"),
direction = "<",
quiet = TRUE
)
auc_val <- round(as.numeric(pROC::auc(roc_obj)), 3)
auc_ci <- round(as.numeric(pROC::ci.auc(roc_obj)), 3)
cat("AUC:", auc_val, "\n")## AUC: 0.718
## 95% CI (DeLong): 0.698 – 0.738
# Optimal cutpoint (Youden's J)
coords_opt <- pROC::coords(roc_obj, x = "best", best.method = "youden",
ret = c("threshold", "sensitivity", "specificity"))
cat("Optimal threshold (Youden's J):", round(coords_opt$threshold, 3), "\n")## Optimal threshold (Youden's J): 0.137
cat(" Sensitivity:", round(coords_opt$sensitivity, 3),
"| Specificity:", round(coords_opt$specificity, 3), "\n")## Sensitivity: 0.731 | Specificity: 0.622
pROC::ggroc(roc_obj, color = "steelblue", linewidth = 1.2) +
geom_abline(slope = 1, intercept = 1,
linetype = "dashed", color = "red", linewidth = 0.9) +
annotate("text", x = 0.35, y = 0.15,
label = paste0("AUC = ", auc_val,
"\n95% CI: ", auc_ci[1], " – ", auc_ci[3]),
size = 4.5, color = "steelblue", fontface = "bold") +
labs(
title = "Figure 3. ROC Curve for Frequent Mental Distress Model",
subtitle = "Survey-unweighted logistic regression; BRFSS 2020 (N = 5,000)",
x = "Specificity (1 − False Positive Rate)",
y = "Sensitivity (True Positive Rate)",
caption = "Red dashed line = chance discrimination (AUC = 0.50)."
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(size = 11)
)Figure 3. ROC Curve for Frequent Mental Distress Model
AUC Interpretation: The model achieves an AUC of 0.718 (95% CI: 0.698–0.738), indicating acceptable discrimination. The AUC represents the probability that a randomly selected case (FMD = Yes) will have a higher predicted probability than a randomly selected control (FMD = No). An AUC of 0.718 means the model correctly ranks a case above a control approximately 72% of the time. At the optimal cutoff by Youden’s J (threshold = 0.137), the model achieves sensitivity = 0.731 and specificity = 0.622, representing the best balance of sensitivity and specificity. Note that the choice of threshold depends on the relative costs of false positives and false negatives in the applied context.
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] performance_0.16.0 pROC_1.19.0.1 ResourceSelection_0.3-6
## [4] ggeffects_2.3.2 car_3.1-5 carData_3.0-6
## [7] gtsummary_2.5.0 kableExtra_1.4.0 knitr_1.51
## [10] broom_1.0.12 lubridate_1.9.3 forcats_1.0.0
## [13] stringr_1.5.1 dplyr_1.2.1 purrr_1.0.2
## [16] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [19] ggplot2_4.0.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.56 bslib_0.10.0
## [4] insight_1.4.6 lattice_0.22-6 tzdb_0.4.0
## [7] vctrs_0.7.3 tools_4.4.2 generics_0.1.4
## [10] datawizard_1.3.0 pkgconfig_2.0.3 Matrix_1.7-1
## [13] RColorBrewer_1.1-3 S7_0.2.1 gt_1.3.0
## [16] lifecycle_1.0.5 compiler_4.4.2 farver_2.1.2
## [19] textshaping_0.4.0 litedown_0.9 htmltools_0.5.8.1
## [22] sass_0.4.10 yaml_2.3.10 Formula_1.2-5
## [25] pillar_1.11.1 jquerylib_0.1.4 broom.helpers_1.22.0
## [28] cachem_1.1.0 abind_1.4-8 commonmark_2.0.0
## [31] tidyselect_1.2.1 digest_0.6.37 stringi_1.8.4
## [34] labeling_0.4.3 labelled_2.16.0 fastmap_1.2.0
## [37] grid_4.4.2 cli_3.6.3 magrittr_2.0.3
## [40] cards_0.7.1 withr_3.0.2 scales_1.4.0
## [43] backports_1.5.0 timechange_0.3.0 rmarkdown_2.30
## [46] otel_0.2.0 hms_1.1.4 evaluate_1.0.5
## [49] haven_2.5.5 viridisLite_0.4.3 markdown_2.0
## [52] rlang_1.1.7 Rcpp_1.1.1 glue_1.8.0
## [55] xml2_1.5.2 svglite_2.2.2 rstudioapi_0.18.0
## [58] jsonlite_2.0.0 R6_2.6.1 fs_1.6.6
## [61] systemfonts_1.3.1