Download Packages and Reading in BRFSS Final Analytic Sample

Download Packages

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ResourceSelection)
## ResourceSelection 0.3-6   2023-06-27
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.5.2
library(broom)
library(ggeffects)
## Warning: package 'ggeffects' was built under R version 4.5.2
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(flextable)
## Warning: package 'flextable' was built under R version 4.5.2
## 
## Attaching package: 'flextable'
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## The following objects are masked from 'package:plotly':
## 
##     highlight, style
## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary

Reading in Brfss Final Analytic Sample

brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

Section 3: Regression Results

Model 1: Simple Unadjusted Logistic Regression

# Model 1: Unadjusted Model Without Covariates
Model_1 <- glm(
  MentalDistress ~ HousingInstability,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model1 <- tbl_regression(
  Model_1,
  exponentiate = TRUE,
  label = list(HousingInstability ~ "Housing Instability")
) |>
  modify_caption("Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability")

# Flextable
mod1_flex <- as_flex_table(tbl_model1)

mod1_flex
Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

4.33

4.17, 4.49

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 2: Adjusted Model with Covariates

# Model 2: Adjusted Logistic Regression
Model_2 <- glm(
  MentalDistress ~ HousingInstability + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model2 <- tbl_regression(
  Model_2,
  exponentiate = TRUE, 
  label = list(
    HousingInstability ~ "Housing Instability",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates")

# Flextable
mod2_flex <- as_flex_table(tbl_model2)

mod2_flex
Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

2.43

2.33, 2.54

<0.001

Age Group

18-24

25-34

0.94

0.88, 1.01

0.11

35-44

0.82

0.76, 0.88

<0.001

45-54

0.68

0.64, 0.74

<0.001

55-64

0.49

0.45, 0.53

<0.001

65+

0.33

0.30, 0.36

<0.001

Sex

Male

Female

1.43

1.39, 1.48

<0.001

Level of Education Completed

Less Than High School

High School Graduate

1.16

1.09, 1.24

<0.001

Some College/Technical School

1.25

1.17, 1.34

<0.001

College Graduate/Technical Graduate

1.02

0.95, 1.10

0.5

Household Income

<$15,000

$15,000 to <$25,000

1.01

0.95, 1.08

0.7

$25,000 to <$35,000

0.97

0.91, 1.04

0.5

$35,000 to <$50,000

0.92

0.86, 0.99

0.024

$50,000 to <$100,000

0.77

0.72, 0.82

<0.001

$100,000 to <$200,000

0.62

0.58, 0.67

<0.001

$200,000+

0.42

0.38, 0.47

<0.001

Employment Status

Employed for Wages

Self-Employed

0.86

0.81, 0.92

<0.001

Out of Work 1 Year or More

1.73

1.58, 1.90

<0.001

Out of Work <1 Year

1.56

1.43, 1.69

<0.001

Homemaker

0.90

0.83, 0.97

0.009

Student

1.04

0.94, 1.16

0.4

Retired

1.06

1.00, 1.12

0.055

Unable to Work

3.48

3.29, 3.69

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

# Model 3: Adjusted Logistic Regression with Effect Modifier
Model_3 <- glm(
  MentalDistress ~ HousingInstability * RaceEthnicity + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model3 <- tbl_regression(
  Model_3,
  exponentiate = TRUE,
  label = list(
    HousingInstability ~ "Housing Instability",
    RaceEthnicity ~ "Race/Ethnicity",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier")

# Convert to flextable
mod3_flex <- as_flex_table(tbl_model3)

mod3_flex
Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

3.12

2.95, 3.29

<0.001

Race/Ethnicity

White only, Non-Hispanic

Black only, Non-Hispanic

0.80

0.75, 0.85

<0.001

American Indian/Alaskan Native, Non-Hispanic

1.02

0.91, 1.15

0.7

Asian only, Non-Hispanic

0.65

0.57, 0.73

<0.001

Native Hawaiian or other Pacific Islander only, Non-Hispanic

0.89

0.68, 1.15

0.4

Multiracial, Non-Hispanic

1.32

1.08, 1.60

0.005

Other Race only, Non-Hispanic

1.29

1.17, 1.42

<0.001

Hispanic

0.73

0.69, 0.78

<0.001

Age Group

18-24

25-34

0.95

0.89, 1.02

0.2

35-44

0.81

0.75, 0.87

<0.001

45-54

0.67

0.63, 0.72

<0.001

55-64

0.47

0.44, 0.51

<0.001

65+

0.31

0.29, 0.34

<0.001

Sex

Male

Female

1.44

1.40, 1.49

<0.001

Level of Education Completed

Less Than High School

High School Graduate

1.05

0.98, 1.13

0.2

Some College/Technical School

1.12

1.04, 1.20

0.002

College Graduate/Technical Graduate

0.92

0.86, 0.99

0.029

Household Income

<$15,000

$15,000 to <$25,000

0.99

0.93, 1.06

0.8

$25,000 to <$35,000

0.94

0.88, 1.01

0.074

$35,000 to <$50,000

0.87

0.82, 0.94

<0.001

$50,000 to <$100,000

0.70

0.66, 0.75

<0.001

$100,000 to <$200,000

0.56

0.52, 0.60

<0.001

$200,000+

0.39

0.35, 0.43

<0.001

Employment Status

Employed for Wages

Self-Employed

0.86

0.81, 0.91

<0.001

Out of Work 1 Year or More

1.72

1.56, 1.89

<0.001

Out of Work <1 Year

1.59

1.46, 1.73

<0.001

Homemaker

0.92

0.85, 1.0

0.038

Student

1.06

0.96, 1.18

0.2

Retired

1.04

0.98, 1.10

0.2

Unable to Work

3.32

3.13, 3.52

<0.001

Housing Instability * Race/Ethnicity

Yes * Black only, Non-Hispanic

0.61

0.54, 0.69

<0.001

Yes * American Indian/Alaskan Native, Non-Hispanic

0.70

0.56, 0.87

0.001

Yes * Asian only, Non-Hispanic

0.85

0.61, 1.16

0.3

Yes * Native Hawaiian or other Pacific Islander only, Non-Hispanic

0.79

0.51, 1.23

0.3

Yes * Multiracial, Non-Hispanic

0.54

0.35, 0.82

0.004

Yes * Other Race only, Non-Hispanic

0.80

0.66, 0.97

0.026

Yes * Hispanic

0.54

0.49, 0.60

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Confounding Assessment

confounders <- list(
  "Age" = glm(MentalDistress ~ HousingInstability + Age,
              data = brfss_2024_analysis, family = binomial),

  "Sex" = glm(MentalDistress ~ HousingInstability + Sex,
              data = brfss_2024_analysis, family = binomial),

  "Education" = glm(MentalDistress ~ HousingInstability + Education,
                    data = brfss_2024_analysis, family = binomial),

  "Income" = glm(MentalDistress ~ HousingInstability + Income,
                 data = brfss_2024_analysis, family = binomial),

  "Employment" = glm(MentalDistress ~ HousingInstability + Employment,
                     data = brfss_2024_analysis, family = binomial)
)

b_crude_val <- coef(Model_1)["HousingInstabilityYes"]

conf_table <- purrr::map_dfr(names(confounders), \(name) {
  mod <- confounders[[name]]
  b_adj_val <- coef(mod)["HousingInstabilityYes"]

  tibble::tibble(
    Covariate = name,
    `Crude (log-odds)` = round(b_crude_val, 4),
    `Adjusted (log-odds)` = round(b_adj_val, 4),
    `% Change` = round(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100, 1),
    Confounder = ifelse(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100 > 10,
                        "Yes (>10%)", "No")
  )
})

conf_table
## # A tibble: 5 × 5
##   Covariate  `Crude (log-odds)` `Adjusted (log-odds)` `% Change` Confounder
##   <chr>                   <dbl>                 <dbl>      <dbl> <chr>     
## 1 Age                      1.47                  1.31       10.5 Yes (>10%)
## 2 Sex                      1.47                  1.44        1.7 No        
## 3 Education                1.47                  1.38        5.8 No        
## 4 Income                   1.47                  1.20       17.9 Yes (>10%)
## 5 Employment               1.47                  1.18       19.7 Yes (>10%)

Interaction Testing

# Likelihood Ratio Test
lrt <- anova(Model_2, Model_3, test = "LRT")

lrt_clean <- as.data.frame(lrt) |>
  dplyr::mutate(across(where(is.numeric), \(x) round(x, 4)))

lrt_table <- kable(
  lrt_clean,
  caption = "Likelihood Ratio Test Comparing Models With and Without Interaction Term",
  format = "html"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)


# Crude OR for Housing Instability
crude_hi <- tidy(Model_1, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Crude")

# Adjusted OR for Housing Instability (without interaction)
adj_hi <- tidy(Model_2, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Adjusted")

# Interaction ORs for Housing Instability with Race/Ethnicity
interaction_hi <- tidy(Model_3, exponentiate = TRUE, conf.int = TRUE) |>
  filter(grepl("HousingInstabilityYes:", term)) |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Interaction")

# Combine Tables
hi_table <- bind_rows(crude_hi, adj_hi, interaction_hi) |>
  mutate(
    across(c(estimate, conf.low, conf.high), \(x) round(x, 3)),
    Predictor = ifelse(
      type == "Interaction",
      gsub("HousingInstabilityYes:", "", term),
      term
    )
  ) |>
  select(Predictor, estimate, conf.low, conf.high, type)

# Display Table
hi_table_kable <- kable(
  hi_table,
  col.names = c("Predictor", "OR", "95% CI Lower", "95% CI Upper", "Type"),
  caption = "Crude, Adjusted, and Interaction Odds Ratios for Housing Instability"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

lrt_table
Likelihood Ratio Test Comparing Models With and Without Interaction Term
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
162904 116017.0 NA NA NA
162890 115309.5 14 707.4917 0
hi_table_kable
Crude, Adjusted, and Interaction Odds Ratios for Housing Instability
Predictor OR 95% CI Lower 95% CI Upper Type
HousingInstabilityYes 4.330 4.173 4.493 Crude
HousingInstabilityYes 2.434 2.335 2.537 Adjusted
RaceEthnicityBlack only, Non-Hispanic 0.610 0.541 0.687 Interaction
RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic 0.698 0.560 0.868 Interaction
RaceEthnicityAsian only, Non-Hispanic 0.846 0.613 1.159 Interaction
RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic 0.793 0.510 1.232 Interaction
RaceEthnicityMultiracial, Non-Hispanic 0.537 0.349 0.821 Interaction
RaceEthnicityOther Race only, Non-Hispanic 0.800 0.658 0.974 Interaction
RaceEthnicityHispanic 0.542 0.485 0.605 Interaction

Section 4: Model Diagnostics

# Deviance Residuals vs Fitted Values
resid_dev <- residuals(Model_3, type = "deviance")
fitted_vals <- fitted(Model_3)

plot(
  fitted_vals, resid_dev,
  xlab = "Fitted Probabilities",
  ylab = "Deviance Residuals",
  main = "Deviance Residuals vs Fitted",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 0, col = "red", lty = 2)

# Calibration check: Hosmer-Lemeshow test
hl_test <- hoslem.test(Model_3$y, fitted(Model_3), g = 10)
cat("\nHosmer-Lemeshow Test:\n")
## 
## Hosmer-Lemeshow Test:
cat("Chi-squared =", round(hl_test$statistic, 2),
    ", df =", hl_test$parameter,
    ", p-value =", hl_test$p.value, "\n")
## Chi-squared = 38.74 , df = 8 , p-value = 5.500582e-06
# Cook's Distance
cooksd <- cooks.distance(Model_3)
plot(
  cooksd, type = "h",
  main = "Cook's Distance",
  xlab = "Observation",
  ylab = "Cook's Distance",
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 4/(nrow(brfss_2024_analysis) - length(coef(Model_3))), col = "red", lty = 2)

# Leverage vs Standardized Deviance Residuals
lev <- hatvalues(Model_3)
std_resid <- rstandard(Model_3)

plot(
  lev, std_resid,
  xlab = "Leverage",
  ylab = "Standardized Deviance Residuals",
  main = "Leverage vs Standardized Deviance Residuals",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5),
  ylim = c(min(std_resid, -3), max(std_resid, 3))  # forces ±3 to show
)
abline(h = c(-3, 3), col = "red", lty = 2)

Section 5: Regression Visualizations

Figure 1: Primary Association Visualization

# Predicted Probabilities
pred_interact <- ggpredict(Model_3, terms = c("HousingInstability", "RaceEthnicity"))

race_colors <- c(
  "White only, Non-Hispanic" = "#1B9E77",
  "Black only, Non-Hispanic" = "red",
  "American Indian/Alaskan Native, Non-Hispanic" = "#E7298A",
  "Asian only, Non-Hispanic" = "#4DBBD5",
  "Native Hawaiian or other Pacific Islander only, Non-Hispanic" = "#6a3d9a",
  "Multiracial, Non-Hispanic" = "blue",
  "Other Race only, Non-Hispanic" = "#FF7F0E",
  "Hispanic" = "#2CA02C"
)

# Plot
p <- ggplot(pred_interact, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
  labs(
    title = "Figure 1. Predicted Probability of Frequent Mental Distress",
    subtitle = "By Housing Instability and Race/Ethnicity",
    x = "Housing Instability",
    y = "Predicted Probability",
    color = "Race/Ethnicity",
    fill = "Race/Ethnicity"
  ) +
  scale_color_manual(values = race_colors) +
  scale_fill_manual(values = race_colors) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal(base_size = 9) +
  theme(legend.position = "bottom")


# Interactive Version
ggplotly(p, width = 1200, height = 500) |>
  layout(
    margin = list(l = 60, r = 50, t = 50, b = 70),
    legend = list(orientation = "h", x = 0.1, y = -0.2)
  )

Figure 2: Full Model Coefficient Plot

# Tidy Model with ORs and 95% CI
tidy_model <- tidy(Model_3, conf.int = TRUE, conf.method = "wald", exponentiate = TRUE) |>
  filter(term != "(Intercept)")

# ggplot for Forest Plot
p <- ggplot(tidy_model, aes(x = estimate, y = fct_reorder(term, estimate))) +
  geom_point(color = "#5DADE2", size = 3) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), height = 0.2, color = "#5DADE2") +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") + # OR=1 reference
  scale_x_log10() +
  labs(
    title = "Forest Plot - Odds Ratios and 95% CIs",
    subtitle = "Adjusted logistic regression for frequent mental distress",
    x = "Odds Ratio (log scale)",
    y = "Predictors"
  ) +
  theme_minimal(base_size = 12)

ggplotly(p, width = max(1200, length(tidy_model$term) * 50), height = 600)
## `height` was translated to `width`.