04-07-26library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ResourceSelection)
## ResourceSelection 0.3-6 2023-06-27
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(broom)
library(ggeffects)
## Warning: package 'ggeffects' was built under R version 4.5.2
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(flextable)
## Warning: package 'flextable' was built under R version 4.5.2
##
## Attaching package: 'flextable'
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
## The following objects are masked from 'package:plotly':
##
## highlight, style
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")
# Model 1: Unadjusted Model Without Covariates
Model_1 <- glm(
MentalDistress ~ HousingInstability,
data = brfss_2024_analysis,
family = binomial(link = "logit")
)
# gtsummary table
tbl_model1 <- tbl_regression(
Model_1,
exponentiate = TRUE,
label = list(HousingInstability ~ "Housing Instability")
) |>
modify_caption("Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability")
# Flextable
mod1_flex <- as_flex_table(tbl_model1)
mod1_flex
Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
Housing Instability | |||
No | — | — | |
Yes | 4.33 | 4.17, 4.49 | <0.001 |
Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# Model 2: Adjusted Logistic Regression
Model_2 <- glm(
MentalDistress ~ HousingInstability + Age + Sex + Education + Income + Employment,
data = brfss_2024_analysis,
family = binomial(link = "logit")
)
# gtsummary table
tbl_model2 <- tbl_regression(
Model_2,
exponentiate = TRUE,
label = list(
HousingInstability ~ "Housing Instability",
Age ~ "Age Group",
Sex ~ "Sex",
Education ~ "Level of Education Completed",
Income ~ "Household Income",
Employment ~ "Employment Status"
)
) |>
modify_caption("Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates")
# Flextable
mod2_flex <- as_flex_table(tbl_model2)
mod2_flex
Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
Housing Instability | |||
No | — | — | |
Yes | 2.43 | 2.33, 2.54 | <0.001 |
Age Group | |||
18-24 | — | — | |
25-34 | 0.94 | 0.88, 1.01 | 0.11 |
35-44 | 0.82 | 0.76, 0.88 | <0.001 |
45-54 | 0.68 | 0.64, 0.74 | <0.001 |
55-64 | 0.49 | 0.45, 0.53 | <0.001 |
65+ | 0.33 | 0.30, 0.36 | <0.001 |
Sex | |||
Male | — | — | |
Female | 1.43 | 1.39, 1.48 | <0.001 |
Level of Education Completed | |||
Less Than High School | — | — | |
High School Graduate | 1.16 | 1.09, 1.24 | <0.001 |
Some College/Technical School | 1.25 | 1.17, 1.34 | <0.001 |
College Graduate/Technical Graduate | 1.02 | 0.95, 1.10 | 0.5 |
Household Income | |||
<$15,000 | — | — | |
$15,000 to <$25,000 | 1.01 | 0.95, 1.08 | 0.7 |
$25,000 to <$35,000 | 0.97 | 0.91, 1.04 | 0.5 |
$35,000 to <$50,000 | 0.92 | 0.86, 0.99 | 0.024 |
$50,000 to <$100,000 | 0.77 | 0.72, 0.82 | <0.001 |
$100,000 to <$200,000 | 0.62 | 0.58, 0.67 | <0.001 |
$200,000+ | 0.42 | 0.38, 0.47 | <0.001 |
Employment Status | |||
Employed for Wages | — | — | |
Self-Employed | 0.86 | 0.81, 0.92 | <0.001 |
Out of Work 1 Year or More | 1.73 | 1.58, 1.90 | <0.001 |
Out of Work <1 Year | 1.56 | 1.43, 1.69 | <0.001 |
Homemaker | 0.90 | 0.83, 0.97 | 0.009 |
Student | 1.04 | 0.94, 1.16 | 0.4 |
Retired | 1.06 | 1.00, 1.12 | 0.055 |
Unable to Work | 3.48 | 3.29, 3.69 | <0.001 |
Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# Model 3: Adjusted Logistic Regression with Effect Modifier
Model_3 <- glm(
MentalDistress ~ HousingInstability * RaceEthnicity + Age + Sex + Education + Income + Employment,
data = brfss_2024_analysis,
family = binomial(link = "logit")
)
# gtsummary table
tbl_model3 <- tbl_regression(
Model_3,
exponentiate = TRUE,
label = list(
HousingInstability ~ "Housing Instability",
RaceEthnicity ~ "Race/Ethnicity",
Age ~ "Age Group",
Sex ~ "Sex",
Education ~ "Level of Education Completed",
Income ~ "Household Income",
Employment ~ "Employment Status"
)
) |>
modify_caption("Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier")
# Convert to flextable
mod3_flex <- as_flex_table(tbl_model3)
mod3_flex
Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
Housing Instability | |||
No | — | — | |
Yes | 3.12 | 2.95, 3.29 | <0.001 |
Race/Ethnicity | |||
White only, Non-Hispanic | — | — | |
Black only, Non-Hispanic | 0.80 | 0.75, 0.85 | <0.001 |
American Indian/Alaskan Native, Non-Hispanic | 1.02 | 0.91, 1.15 | 0.7 |
Asian only, Non-Hispanic | 0.65 | 0.57, 0.73 | <0.001 |
Native Hawaiian or other Pacific Islander only, Non-Hispanic | 0.89 | 0.68, 1.15 | 0.4 |
Multiracial, Non-Hispanic | 1.32 | 1.08, 1.60 | 0.005 |
Other Race only, Non-Hispanic | 1.29 | 1.17, 1.42 | <0.001 |
Hispanic | 0.73 | 0.69, 0.78 | <0.001 |
Age Group | |||
18-24 | — | — | |
25-34 | 0.95 | 0.89, 1.02 | 0.2 |
35-44 | 0.81 | 0.75, 0.87 | <0.001 |
45-54 | 0.67 | 0.63, 0.72 | <0.001 |
55-64 | 0.47 | 0.44, 0.51 | <0.001 |
65+ | 0.31 | 0.29, 0.34 | <0.001 |
Sex | |||
Male | — | — | |
Female | 1.44 | 1.40, 1.49 | <0.001 |
Level of Education Completed | |||
Less Than High School | — | — | |
High School Graduate | 1.05 | 0.98, 1.13 | 0.2 |
Some College/Technical School | 1.12 | 1.04, 1.20 | 0.002 |
College Graduate/Technical Graduate | 0.92 | 0.86, 0.99 | 0.029 |
Household Income | |||
<$15,000 | — | — | |
$15,000 to <$25,000 | 0.99 | 0.93, 1.06 | 0.8 |
$25,000 to <$35,000 | 0.94 | 0.88, 1.01 | 0.074 |
$35,000 to <$50,000 | 0.87 | 0.82, 0.94 | <0.001 |
$50,000 to <$100,000 | 0.70 | 0.66, 0.75 | <0.001 |
$100,000 to <$200,000 | 0.56 | 0.52, 0.60 | <0.001 |
$200,000+ | 0.39 | 0.35, 0.43 | <0.001 |
Employment Status | |||
Employed for Wages | — | — | |
Self-Employed | 0.86 | 0.81, 0.91 | <0.001 |
Out of Work 1 Year or More | 1.72 | 1.56, 1.89 | <0.001 |
Out of Work <1 Year | 1.59 | 1.46, 1.73 | <0.001 |
Homemaker | 0.92 | 0.85, 1.0 | 0.038 |
Student | 1.06 | 0.96, 1.18 | 0.2 |
Retired | 1.04 | 0.98, 1.10 | 0.2 |
Unable to Work | 3.32 | 3.13, 3.52 | <0.001 |
Housing Instability * Race/Ethnicity | |||
Yes * Black only, Non-Hispanic | 0.61 | 0.54, 0.69 | <0.001 |
Yes * American Indian/Alaskan Native, Non-Hispanic | 0.70 | 0.56, 0.87 | 0.001 |
Yes * Asian only, Non-Hispanic | 0.85 | 0.61, 1.16 | 0.3 |
Yes * Native Hawaiian or other Pacific Islander only, Non-Hispanic | 0.79 | 0.51, 1.23 | 0.3 |
Yes * Multiracial, Non-Hispanic | 0.54 | 0.35, 0.82 | 0.004 |
Yes * Other Race only, Non-Hispanic | 0.80 | 0.66, 0.97 | 0.026 |
Yes * Hispanic | 0.54 | 0.49, 0.60 | <0.001 |
Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
confounders <- list(
"Age" = glm(MentalDistress ~ HousingInstability + Age,
data = brfss_2024_analysis, family = binomial),
"Sex" = glm(MentalDistress ~ HousingInstability + Sex,
data = brfss_2024_analysis, family = binomial),
"Education" = glm(MentalDistress ~ HousingInstability + Education,
data = brfss_2024_analysis, family = binomial),
"Income" = glm(MentalDistress ~ HousingInstability + Income,
data = brfss_2024_analysis, family = binomial),
"Employment" = glm(MentalDistress ~ HousingInstability + Employment,
data = brfss_2024_analysis, family = binomial)
)
b_crude_val <- coef(Model_1)["HousingInstabilityYes"]
conf_table <- purrr::map_dfr(names(confounders), \(name) {
mod <- confounders[[name]]
b_adj_val <- coef(mod)["HousingInstabilityYes"]
tibble::tibble(
Covariate = name,
`Crude (log-odds)` = round(b_crude_val, 4),
`Adjusted (log-odds)` = round(b_adj_val, 4),
`% Change` = round(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100, 1),
Confounder = ifelse(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100 > 10,
"Yes (>10%)", "No")
)
})
conf_table
## # A tibble: 5 × 5
## Covariate `Crude (log-odds)` `Adjusted (log-odds)` `% Change` Confounder
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Age 1.47 1.31 10.5 Yes (>10%)
## 2 Sex 1.47 1.44 1.7 No
## 3 Education 1.47 1.38 5.8 No
## 4 Income 1.47 1.20 17.9 Yes (>10%)
## 5 Employment 1.47 1.18 19.7 Yes (>10%)
# Likelihood Ratio Test
lrt <- anova(Model_2, Model_3, test = "LRT")
lrt_clean <- as.data.frame(lrt) |>
dplyr::mutate(across(where(is.numeric), \(x) round(x, 4)))
lrt_table <- kable(
lrt_clean,
caption = "Likelihood Ratio Test Comparing Models With and Without Interaction Term",
format = "html"
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
# Crude OR for Housing Instability
crude_hi <- tidy(Model_1, exponentiate = TRUE, conf.int = TRUE) |>
filter(term == "HousingInstabilityYes") |>
select(term, estimate, conf.low, conf.high) |>
mutate(type = "Crude")
# Adjusted OR for Housing Instability (without interaction)
adj_hi <- tidy(Model_2, exponentiate = TRUE, conf.int = TRUE) |>
filter(term == "HousingInstabilityYes") |>
select(term, estimate, conf.low, conf.high) |>
mutate(type = "Adjusted")
# Interaction ORs for Housing Instability with Race/Ethnicity
interaction_hi <- tidy(Model_3, exponentiate = TRUE, conf.int = TRUE) |>
filter(grepl("HousingInstabilityYes:", term)) |>
select(term, estimate, conf.low, conf.high) |>
mutate(type = "Interaction")
# Combine Tables
hi_table <- bind_rows(crude_hi, adj_hi, interaction_hi) |>
mutate(
across(c(estimate, conf.low, conf.high), \(x) round(x, 3)),
Predictor = ifelse(
type == "Interaction",
gsub("HousingInstabilityYes:", "", term),
term
)
) |>
select(Predictor, estimate, conf.low, conf.high, type)
# Display Table
hi_table_kable <- kable(
hi_table,
col.names = c("Predictor", "OR", "95% CI Lower", "95% CI Upper", "Type"),
caption = "Crude, Adjusted, and Interaction Odds Ratios for Housing Instability"
) |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
lrt_table
| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 162904 | 116017.0 | NA | NA | NA |
| 162890 | 115309.5 | 14 | 707.4917 | 0 |
hi_table_kable
| Predictor | OR | 95% CI Lower | 95% CI Upper | Type |
|---|---|---|---|---|
| HousingInstabilityYes | 4.330 | 4.173 | 4.493 | Crude |
| HousingInstabilityYes | 2.434 | 2.335 | 2.537 | Adjusted |
| RaceEthnicityBlack only, Non-Hispanic | 0.610 | 0.541 | 0.687 | Interaction |
| RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic | 0.698 | 0.560 | 0.868 | Interaction |
| RaceEthnicityAsian only, Non-Hispanic | 0.846 | 0.613 | 1.159 | Interaction |
| RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic | 0.793 | 0.510 | 1.232 | Interaction |
| RaceEthnicityMultiracial, Non-Hispanic | 0.537 | 0.349 | 0.821 | Interaction |
| RaceEthnicityOther Race only, Non-Hispanic | 0.800 | 0.658 | 0.974 | Interaction |
| RaceEthnicityHispanic | 0.542 | 0.485 | 0.605 | Interaction |
# Deviance Residuals vs Fitted Values
resid_dev <- residuals(Model_3, type = "deviance")
fitted_vals <- fitted(Model_3)
plot(
fitted_vals, resid_dev,
xlab = "Fitted Probabilities",
ylab = "Deviance Residuals",
main = "Deviance Residuals vs Fitted",
pch = 19,
col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 0, col = "red", lty = 2)
# Calibration check: Hosmer-Lemeshow test
hl_test <- hoslem.test(Model_3$y, fitted(Model_3), g = 10)
cat("\nHosmer-Lemeshow Test:\n")
##
## Hosmer-Lemeshow Test:
cat("Chi-squared =", round(hl_test$statistic, 2),
", df =", hl_test$parameter,
", p-value =", hl_test$p.value, "\n")
## Chi-squared = 38.74 , df = 8 , p-value = 5.500582e-06
# Cook's Distance
cooksd <- cooks.distance(Model_3)
plot(
cooksd, type = "h",
main = "Cook's Distance",
xlab = "Observation",
ylab = "Cook's Distance",
col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 4/(nrow(brfss_2024_analysis) - length(coef(Model_3))), col = "red", lty = 2)
# Leverage vs Standardized Deviance Residuals
lev <- hatvalues(Model_3)
std_resid <- rstandard(Model_3)
plot(
lev, std_resid,
xlab = "Leverage",
ylab = "Standardized Deviance Residuals",
main = "Leverage vs Standardized Deviance Residuals",
pch = 19,
col = adjustcolor("#5DADE2", 0.5),
ylim = c(min(std_resid, -3), max(std_resid, 3)) # forces ±3 to show
)
abline(h = c(-3, 3), col = "red", lty = 2)
# Predicted Probabilities
pred_interact <- ggpredict(Model_3, terms = c("HousingInstability", "RaceEthnicity"))
race_colors <- c(
"White only, Non-Hispanic" = "#1B9E77",
"Black only, Non-Hispanic" = "red",
"American Indian/Alaskan Native, Non-Hispanic" = "#E7298A",
"Asian only, Non-Hispanic" = "#4DBBD5",
"Native Hawaiian or other Pacific Islander only, Non-Hispanic" = "#6a3d9a",
"Multiracial, Non-Hispanic" = "blue",
"Other Race only, Non-Hispanic" = "#FF7F0E",
"Hispanic" = "#2CA02C"
)
# Plot
p <- ggplot(pred_interact, aes(x = x, y = predicted, color = group, fill = group)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
labs(
title = "Figure 1. Predicted Probability of Frequent Mental Distress",
subtitle = "By Housing Instability and Race/Ethnicity",
x = "Housing Instability",
y = "Predicted Probability",
color = "Race/Ethnicity",
fill = "Race/Ethnicity"
) +
scale_color_manual(values = race_colors) +
scale_fill_manual(values = race_colors) +
scale_y_continuous(labels = scales::percent_format()) +
theme_minimal(base_size = 9) +
theme(legend.position = "bottom")
# Interactive Version
ggplotly(p, width = 1200, height = 500) |>
layout(
margin = list(l = 60, r = 50, t = 50, b = 70),
legend = list(orientation = "h", x = 0.1, y = -0.2)
)
Figure 2: Full Model Coefficient Plot
# Tidy Model with ORs and 95% CI
tidy_model <- tidy(Model_3, conf.int = TRUE, conf.method = "wald", exponentiate = TRUE) |>
filter(term != "(Intercept)")
# ggplot for Forest Plot
p <- ggplot(tidy_model, aes(x = estimate, y = fct_reorder(term, estimate))) +
geom_point(color = "#5DADE2", size = 3) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), height = 0.2, color = "#5DADE2") +
geom_vline(xintercept = 1, linetype = "dashed", color = "red") + # OR=1 reference
scale_x_log10() +
labs(
title = "Forest Plot - Odds Ratios and 95% CIs",
subtitle = "Adjusted logistic regression for frequent mental distress",
x = "Odds Ratio (log scale)",
y = "Predictors"
) +
theme_minimal(base_size = 12)
ggplotly(p, width = max(1200, length(tidy_model$term) * 50), height = 600)
## `height` was translated to `width`.