library(tidyverse)
library(broom)
library(knitr)
library(kableExtra)
library(gtsummary)
library(ggeffects)
options(gtsummary.use_ftExtra = TRUE)
set_gtsummary_theme(theme_gtsummary_compact(set_theme = TRUE))
brfss_log <- readRDS("~/R/site-library/Epi553/code/brfss_logistic_2020.rds")brfss_log |>
count(fmd) |>
mutate(Percentage = round(100 * n / sum(n), 1)) |>
rename(`FMD Status` = fmd, Count = n) |>
kable(caption = "Frequency Table: Frequent Mental Distress (FMD)") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| FMD Status | Count | Percentage |
|---|---|---|
| No | 4243 | 84.9 |
| Yes | 757 | 15.1 |
Interpretation: The table shows the number and percentage of participants with and without frequent mental distress (14+ mentally unhealthy days in the past 30 days).
brfss_log |>
tbl_summary(
by = fmd,
include = c(sleep_hrs, age, sex, exercise, smoker, bmi, income_cat),
type = list(c(sleep_hrs, age, bmi, income_cat) ~ "continuous"),
statistic = list(all_continuous() ~ "{mean} ({sd})"),
label = list(
sleep_hrs ~ "Sleep hours per night",
age ~ "Age (years)",
sex ~ "Sex",
exercise ~ "Exercise in past 30 days",
smoker ~ "Smoking status",
bmi ~ "BMI",
income_cat ~ "Income category (1–8)"
)
) |>
add_overall() |>
add_p() |>
bold_labels() |>
modify_caption("**Descriptive Summary by FMD Status**")| Characteristic | Overall, N = 5,0001 | No, N = 4,2431 | Yes, N = 7571 | p-value2 |
|---|---|---|---|---|
| Sleep hours per night | 7.00 (1.48) | 7.09 (1.40) | 6.51 (1.83) | <0.001 |
| Age (years) | 56 (16) | 57 (16) | 50 (16) | <0.001 |
| Sex | <0.001 | |||
| Male | 2,701 (54%) | 2,378 (56%) | 323 (43%) | |
| Female | 2,299 (46%) | 1,865 (44%) | 434 (57%) | |
| Exercise in past 30 days | 3,673 (73%) | 3,192 (75%) | 481 (64%) | <0.001 |
| Smoking status | <0.001 | |||
| Former/Never | 3,280 (66%) | 2,886 (68%) | 394 (52%) | |
| Current | 1,720 (34%) | 1,357 (32%) | 363 (48%) | |
| BMI | 28.5 (6.3) | 28.4 (6.2) | 29.3 (7.0) | 0.001 |
| Income category (1–8) | 5.85 (2.11) | 6.00 (2.04) | 5.05 (2.29) | <0.001 |
| 1 Mean (SD); n (%) | ||||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | ||||
Interpretation: Participants with frequent mental distress sleep fewer hours, are younger on average, are more likely to be female, less likely to exercise, and more likely to be current smokers compared to those without FMD. All differences are statistically significant (p < 0.05).
brfss_log |>
group_by(exercise, fmd) |>
summarise(n = n(), .groups = "drop") |>
group_by(exercise) |>
mutate(prop = n / sum(n)) |>
filter(fmd == "Yes") |>
ggplot(aes(x = exercise, y = prop, fill = exercise)) +
geom_bar(stat = "identity", width = 0.5, show.legend = FALSE) +
geom_text(aes(label = paste0(round(prop * 100, 1), "%")),
vjust = -0.5, size = 4.5, fontface = "bold") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0, 0.35)) +
scale_fill_manual(values = c("No" = "#d45f5f", "Yes" = "#4e9abf")) +
labs(
title = "Proportion with Frequent Mental Distress by Exercise Status",
x = "Exercised in Past 30 Days",
y = "Proportion with FMD"
) +
theme_minimal(base_size = 13)Interpretation: A higher proportion of individuals who did not exercise reported frequent mental distress compared to those who did exercise, suggesting a protective association between physical activity and mental health.
mod_exercise <- glm(fmd ~ exercise, data = brfss_log, family = binomial(link = "logit"))
tidy(mod_exercise) |>
mutate(across(where(is.numeric), \(x) round(x, 4))) |>
kable(caption = "Simple Logistic Regression: FMD ~ Exercise (Log-Odds Scale)") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | -1.3371 | 0.0676 | -19.7689 | 0 |
| exerciseYes | -0.5554 | 0.0835 | -6.6545 | 0 |
Interpretation: The coefficient for
exerciseYes is the change in log-odds of FMD associated
with exercising versus not exercising. A negative coefficient indicates
that exercise is associated with lower log-odds of FMD.
mod_exercise |>
tbl_regression(
exponentiate = TRUE,
label = list(exercise ~ "Exercise in past 30 days")
) |>
bold_labels() |>
bold_p() |>
modify_caption("**Simple Logistic Regression: FMD ~ Exercise (Odds Ratios)**")| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| Exercise in past 30 days | |||
| No | — | — | |
| Yes | 0.57 | 0.49, 0.68 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
The odds ratio for exerciseYes represents the odds of
frequent mental distress among those who exercised in the past 30 days
compared to those who did not exercise. An OR < 1 indicates that
individuals who exercised have lower odds of experiencing frequent
mental distress, suggesting a protective association between regular
physical activity and mental health. For example, if OR = 0.60,
individuals who exercised have approximately 40% lower odds of FMD
compared to non-exercisers, after accounting for sampling
variability.
mod_sleep <- glm(fmd ~ sleep_hrs, data = brfss_log, family = binomial(link = "logit"))
ggpredict(mod_sleep, terms = "sleep_hrs") |>
plot() +
labs(
title = "Predicted Probability of Frequent Mental Distress by Sleep Hours",
x = "Sleep Hours per Night",
y = "Predicted Probability of FMD"
) +
theme_minimal(base_size = 13)Interpretation: The predicted probability of frequent mental distress decreases as sleep hours increase, consistent with well-established evidence linking insufficient sleep to poor mental health outcomes.
The three predictors chosen are sleep hours, smoking status, and income category.
get_or <- function(model, term_label, term_name) {
tidy(model, exponentiate = TRUE, conf.int = TRUE) |>
filter(term == term_name) |>
mutate(
Predictor = term_label,
OR = round(estimate, 3),
`95% CI` = paste0("(", round(conf.low, 3), ", ", round(conf.high, 3), ")"),
`p-value` = ifelse(p.value < 0.001, "< 0.001", round(p.value, 3))
) |>
dplyr::select(Predictor, OR, `95% CI`, `p-value`)
}
bind_rows(
get_or(mod_sleep, "Sleep hours (per hour)", "sleep_hrs"),
get_or(mod_smoker, "Smoking: Current vs. Former/Never","smokerCurrent"),
get_or(mod_income, "Income category (per unit)", "income_cat")
) |>
kable(caption = "Crude Odds Ratios from Three Simple Logistic Regression Models") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Predictor | OR | 95% CI | p-value |
|---|---|---|---|
| Sleep hours (per hour) | 0.765 | (0.725, 0.807) | < 0.001 |
| Smoking: Current vs. Former/Never | 1.959 | (1.675, 2.291) | < 0.001 |
| Income category (per unit) | 0.821 | (0.793, 0.85) | < 0.001 |
Among the three predictors,smoking status (current vs. former/never) has the strongest crude association with FMD, as evidenced by the odds ratio furthest from 1.0 and the narrowest confidence interval relative to its estimate. Current smokers have substantially higher odds of frequent mental distress than former/never smokers. Sleep hours and income are also significantly associated with FMD, but smoking status shows the largest magnitude of effect in this unadjusted analysis.
mod_multi |>
tbl_regression(
exponentiate = TRUE,
label = list(
exercise ~ "Exercise in past 30 days",
smoker ~ "Smoking status",
sleep_hrs ~ "Sleep hours (per hour)",
age ~ "Age (per year)",
sex ~ "Sex",
income_cat ~ "Income category (per unit)"
)
) |>
bold_labels() |>
bold_p() |>
modify_caption("**Multiple Logistic Regression: Adjusted Odds Ratios for FMD**")| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| Exercise in past 30 days | |||
| No | — | — | |
| Yes | 0.61 | 0.51, 0.73 | <0.001 |
| Smoking status | |||
| Former/Never | — | — | |
| Current | 1.25 | 1.05, 1.48 | 0.012 |
| Sleep hours (per hour) | 0.81 | 0.77, 0.86 | <0.001 |
| Age (per year) | 0.97 | 0.97, 0.98 | <0.001 |
| Sex | |||
| Male | — | — | |
| Female | 1.67 | 1.42, 1.96 | <0.001 |
| Income category (per unit) | 0.85 | 0.82, 0.89 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
extract_or <- function(model, term_name, model_label) {
tidy(model, exponentiate = TRUE, conf.int = TRUE) |>
filter(term == term_name) |>
mutate(
Model = model_label,
OR = round(estimate, 3),
`95% CI` = paste0("(", round(conf.low, 3), ", ", round(conf.high, 3), ")"),
`p-value` = ifelse(p.value < 0.001, "< 0.001", as.character(round(p.value, 3)))
) |>
dplyr::select(Model, OR, `95% CI`, `p-value`)
}
bind_rows(
extract_or(mod_smoker, "smokerCurrent", "Crude"),
extract_or(mod_multi, "smokerCurrent", "Adjusted")
) |>
kable(caption = "Crude vs. Adjusted OR for Smoking Status") |>
kable_styling(bootstrap_options = "striped", full_width = FALSE)| Model | OR | 95% CI | p-value |
|---|---|---|---|
| Crude | 1.959 | (1.675, 2.291) | < 0.001 |
| Adjusted | 1.247 | (1.05, 1.48) | 0.012 |
After adjusting for exercise, sleep hours, age, sex, and income category, the odds ratio for current smoking status attenuates somewhat toward the null compared to the crude OR. This attenuation indicates the presence of positive confounding other variables in the model (such as exercise and income, which are lower in current smokers) were partly explaining the crude smoking–FMD association. However, the adjusted OR remains elevated and statistically significant, indicating that smoking status retains an independent association with frequent mental distress even after accounting for these covariates.
End of Lab Activity