library(tidyverse)
library(broom)
library(knitr)
library(kableExtra)
library(gtsummary)
library(ggeffects)
library(ResourceSelection)
library(pROC)
library(performance)


options(gtsummary.use_ftExtra = TRUE)
set_gtsummary_theme(theme_gtsummary_compact(set_theme = TRUE))

brfss_log <- readRDS("~/R/site-library/Epi553/code/brfss_logistic_2020.rds")
mod_full <- glm(
  fmd ~ menthlth_days + physhlth_days + age + sex + exercise,
  data = brfss_log,
  family = binomial(link = "logit")
)

mod_full |>
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      exercise      ~ "Exercise (past 30 days)",
      age           ~ "Age (per year)",
      sex           ~ "Sex",
      menthlth_days ~ "Mentally unhealthy days (0–30)",
      physhlth_days ~ "Physically unhealthy days"
    )
  ) |>
  bold_labels() |>
  bold_p()
Characteristic OR1 95% CI1 p-value
Mentally unhealthy days (0–30) 296,738,945,580 14,215,900,200, 4,302,855,992,025 >0.9
Physically unhealthy days 0.92 0.00, 531,355,958 >0.9
Age (per year) 1.61 0.10, 27.6 >0.9
Sex


    Male
    Female 8.18 0.00, 593,383,052,743,472,399,581,238,928,340,198,808,403,710,852,642,008,727,552 >0.9
Exercise (past 30 days)


    No
    Yes 0.10 0.00, 221,204,212,238,675,859,620,896,788,341,161,541,959,487,470,976,026,882,996,921,434,112 >0.9
1 OR = Odds Ratio, CI = Confidence Interval
summary(mod_full)
## 
## Call:
## glm(formula = fmd ~ menthlth_days + physhlth_days + age + sex + 
##     exercise, family = binomial(link = "logit"), data = brfss_log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -3.663e+02  1.349e+04  -0.027    0.978
## menthlth_days  2.642e+01  9.112e+02   0.029    0.977
## physhlth_days -7.842e-02  4.699e+02   0.000    1.000
## age            4.788e-01  7.534e+01   0.006    0.995
## sexFemale      2.101e+00  3.116e+03   0.001    0.999
## exerciseYes   -2.321e+00  3.343e+03  -0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4.2513e+03  on 4999  degrees of freedom
## Residual deviance: 3.0637e-06  on 4994  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 25

1b. Publication-Quality Table of Adjusted ORs

mod_full |>
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      menthlth_days ~ "Mentally unhealthy days (per day)",
      physhlth_days ~ "Physically unhealthy days (per day)",
      age           ~ "Age (per year)",
      sex           ~ "Sex",
      exercise      ~ "Exercise (past 30 days)"
    )
  ) |>
  bold_labels() |>
  bold_p() |>
  modify_caption("**Adjusted Odds Ratios for Frequent Mental Distress, BRFSS 2020**")
Adjusted Odds Ratios for Frequent Mental Distress, BRFSS 2020
Characteristic OR1 95% CI1 p-value
Mentally unhealthy days (per day) 296,738,945,580 14,215,900,200, 4,302,855,992,025 >0.9
Physically unhealthy days (per day) 0.92 0.00, 531,355,958 >0.9
Age (per year) 1.61 0.10, 27.6 >0.9
Sex


    Male
    Female 8.18 0.00, 593,383,052,743,472,399,581,238,928,340,198,808,403,710,852,642,008,727,552 >0.9
Exercise (past 30 days)


    No
    Yes 0.10 0.00, 221,204,212,238,675,859,620,896,788,341,161,541,959,487,470,976,026,882,996,921,434,112 >0.9
1 OR = Odds Ratio, CI = Confidence Interval

1c. Interpretation of Two Predictors

menthlth_days: Each additional mentally unhealthy day in the past 30 days is associated with higher odds of frequent mental distress, holding all other predictors constant. Because FMD is itself based on mentally unhealthy days (≥14), this predictor is expected to show a strong positive associationwith the adjusted OR represents the multiplicative increase in odds per one additional mentally unhealthy day after accounting for physical health days, age, sex, and exercise.

exercise: The adjusted OR for exercise compares respondents who exercised in the past 30 days to those who did not, holding age, sex, and health days constant. An aOR < 1 would indicate that exercising is associated with lower odds of frequent mental distress, suggesting a protective role of physical activity on mental health.


Task 2: Wald and Likelihood Ratio Tests (15 pts)

2a. Wald p-values from tidy()

tidy(mod_full, conf.int = TRUE, exponentiate = TRUE) |>
  mutate(
    across(c(estimate, std.error, statistic, conf.low, conf.high), \(x) round(x, 3)),
    p.value = format.pval(p.value, digits = 3, eps = 0.001)
  ) |>
  kable(
    caption = "Wald Tests for Each Predictor (Exponentiated Coefficients)",
    col.names = c("Term", "aOR", "SE", "z", "p-value", "95% CI Lower", "95% CI Upper")
  ) |>
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Wald Tests for Each Predictor (Exponentiated Coefficients)
Term aOR SE z p-value 95% CI Lower 95% CI Upper
(Intercept) 0.000000e+00 13493.774 -0.027 0.978 0.00000e+00 0.000000e+00
menthlth_days 2.967389e+11 911.157 0.029 0.977 1.42159e+10 4.302856e+12
physhlth_days 9.250000e-01 469.852 0.000 1.000 0.00000e+00 5.313560e+08
age 1.614000e+00 75.344 0.006 0.995 9.80000e-02 2.758700e+01
sexFemale 8.175000e+00 3115.954 0.001 0.999 0.00000e+00 5.933831e+56
exerciseYes 9.800000e-02 3342.856 -0.001 0.999 0.00000e+00 2.212042e+65

2b. Likelihood Ratio Test — Dropping exercise

We fit a reduced model that omits exercise and compare it to the full model using an LR test.

mod_full_reduced <- glm(
  fmd ~ menthlth_days + physhlth_days + age + sex,
  data   = brfss_log,
  family = binomial(link = "logit")
)

anova(mod_full_reduced, mod_full, test = "LRT") |>
  kable(
    digits  = 3,
    caption = "LR Test: Does adding `exercise` improve the model?"
  ) |>
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
LR Test: Does adding exercise improve the model?
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
4995 0 NA NA NA
4994 0 1 0 NA

2c. Comparing Wald vs. LR Test

The Wald test evaluates the significance of exercise by dividing its estimated coefficient by its standard error (a z-test). The LR test evaluates the same question by comparing the fit of the full model to the reduced model via the difference in deviance. In large samples like BRFSS (typically n > 5,000), both tests will usually agree because the sampling distribution of the coefficient estimate is approximately normal, making the Wald approximation reliable. However, the two tests can disagree when the sample is small, when the coefficient estimate is very large which inflates the Wald SE through the curvature of the logistic function, or when a predictor has sparse categories in these situations, the LR test is generally preferred because it is based on the likelihood directly rather than an asymptotic approximation.


Task 3: Test an Interaction (20 pts)

3a. Fit the Interaction Model

We test whether the effect of exercise on FMD differs by sex by including an interaction term.

mod_interact <- glm(
  fmd ~ menthlth_days + physhlth_days + age + sex * exercise,
  data   = brfss_log,
  family = binomial(link = "logit")
)

mod_interact |>
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      menthlth_days ~ "Mentally unhealthy days",
      physhlth_days ~ "Physically unhealthy days",
      age           ~ "Age (per year)",
      sex           ~ "Sex",
      exercise      ~ "Exercise (past 30 days)"
    )
  ) |>
  bold_labels() |>
  bold_p() |>
  modify_caption("**Interaction Model: Sex × Exercise on FMD**")
Interaction Model: Sex × Exercise on FMD
Characteristic OR1 95% CI1 p-value
Mentally unhealthy days 254,398,651,094 12,315,318,274,813,036,919,956,359,427,703,481,954,272,267,585,227,985,892,852,665,695,147,091,146,531,487,283,144,904,663,775,269,765,778,251,191,288,411,854,832,755,659,505,664, 1,246,578,466,938,949,194,627,630,074,167,296 >0.9
Physically unhealthy days 0.80 0.00, 313,276 >0.9
Age (per year) 1.71 0.02, 184 >0.9
Sex


    Male
    Female 1,725 0.00, Inf >0.9
Exercise (past 30 days)


    No
    Yes 16.9 0.00, Inf >0.9
Sex * Exercise (past 30 days)


    Female * Yes 0.00 0.00, Inf >0.9
1 OR = Odds Ratio, CI = Confidence Interval

3b. LR Test for the Interaction

# Main effects model (no interaction) — same as mod_lab
mod_no_interact <- glm(
  fmd ~ menthlth_days + physhlth_days + age + sex + exercise,
  data   = brfss_log,
  family = binomial(link = "logit")
)

anova(mod_no_interact, mod_interact, test = "LRT") |>
  kable(
    digits  = 3,
    caption = "LR Test: Sex × Exercise Interaction"
  ) |>
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
LR Test: Sex × Exercise Interaction
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
4994 0 NA NA NA
4993 0 1 0 1

3c. Visualization of the Interaction

ggpredict(mod_interact, terms = c("exercise", "sex")) |>
  plot() +
  labs(
    title    = "Predicted Probability of FMD by Exercise Status and Sex",
    subtitle = "Adjusted for mentally unhealthy days, physically unhealthy days, and age",
    x        = "Exercise in Past 30 Days",
    y        = "Predicted Probability of FMD",
    color    = "Sex"
  ) +
  theme_minimal(base_size = 13)

3d. Interpretation of the Interaction

The interaction term sex × exercise tests whether the association between exercise and frequent mental distress differs for males versus females. If the LR test p-value is below 0.05, we conclude there is statistically significant effect modification: the protective (or risk-modifying) effect of exercise on FMD is not the same across sex. Parallel lines in the interaction plot would indicate no meaningful interaction, while diverging or crossing lines suggest that exercise modifies the FMD outcome differently for males compared to females. If significant, stratum-specific ORs should be reported separately for each sex rather than a single pooled adjusted OR, since a single OR would mask the heterogeneity in the exercise–FMD relationship.


Task 4: Goodness-of-Fit and Discrimination (25 pts)

4a. McFadden’s Pseudo-R²

performance::r2_mcfadden(mod_full)
## # R2 for Generalized Linear Regression
##        R2: 1.000
##   adj. R2: 1.000

Interpretation: McFadden’s pseudo-R^2 measures the proportional improvement of the fitted model’s log-likelihood over a null (intercept-only) model. Values between 0.2 and 0.4 are considered excellent; values between 0.1 and 0.2 indicate acceptable fit. Unlike the linear regression R^2, this measure is typically smaller in magnitude, so the same scale does not apply.

4b. Hosmer-Lemeshow Goodness-of-Fit Test

4c. Calibration Plot

brfss_log |>
  mutate(
    pred_prob   = fitted(mod_full),
    obs_outcome = as.numeric(fmd) - 1,
    decile      = ntile(pred_prob, 10)
  ) |>
  group_by(decile) |>
  summarise(
    mean_pred = mean(pred_prob),
    mean_obs  = mean(obs_outcome),
    .groups   = "drop"
  ) |>
  ggplot(aes(x = mean_pred, y = mean_obs)) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed", linewidth = 0.8) +
  geom_point(size = 4, color = "darkgreen") +
  geom_line(color = "darkgreen", linewidth = 0.8) +
  labs(
    title    = "Calibration Plot: Observed vs. Predicted Probability of FMD",
    subtitle = "Points near the dashed line indicate good calibration",
    x        = "Mean Predicted Probability (within decile)",
    y        = "Observed Proportion (within decile)"
  ) +
  theme_minimal(base_size = 13)

Interpretation: A well-calibrated model has its decile points lying close to the 45-degree reference line. Points consistently above the line indicate the model under-predicts the probability of FMD; points below indicate over-prediction. Examine the plot to assess whether any systematic pattern of departure exists, particularly in the high or low ranges of predicted probability.

4d. ROC Curve and AUC

roc_obj <- roc(
  response  = brfss_log$fmd,
  predictor = fitted(mod_full),
  levels    = c("No", "Yes"),
  direction = "<"
)

auc_val <- auc(roc_obj)

ggroc(roc_obj, color = "black", linewidth = 1.2) +
  geom_abline(slope = 1, intercept = 1, linetype = "dashed", color = "red3") +
  labs(
    title    = "ROC Curve — Frequent Mental Distress Model",
    subtitle = paste0("AUC = ", round(auc_val, 3)),
    x        = "Specificity",
    y        = "Sensitivity"
  ) +
  theme_minimal(base_size = 13)

cat("AUC:", round(auc_val, 3), "\n")
## AUC: 1

Interpretation: The AUC (area under the ROC curve) quantifies how well the model separates individuals with frequent mental distress from those without it across all possible probability thresholds. Based on the conventional benchmarks:

AUC Range Discrimination Label
0.5 No discrimination (chance)
0.6–0.7 Poor
0.7–0.8 Acceptable
0.8–0.9 Excellent
> 0.9 Outstanding

Given that menthlth_days is a direct component of the FMD outcome definition (FMD = ≥14 mentally unhealthy days), we expect the AUC to be notably high in the excellent to outstanding range. This should be acknowledged as a form of near-tautological association when reporting results: the model’s discrimination is partly a reflection of the outcome construction rather than purely the predictive value of the other covariates.