Download Packages and Reading in BRFSS Final Analytic Sample

Download Packages

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.5.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(forcats)
library(ResourceSelection)

## ResourceSelection 0.3-6   2023-06-27

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.5.2

library(broom)
library(ggeffects)

## Warning: package 'ggeffects' was built under R version 4.5.2

library(ggplot2)
library(plotly)

## Warning: package 'plotly' was built under R version 4.5.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(knitr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(flextable)

## Warning: package 'flextable' was built under R version 4.5.2

## 
## Attaching package: 'flextable'

## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote

## The following objects are masked from 'package:plotly':
## 
##     highlight, style

## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary

Reading in Brfss Final Analytic Sample

brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

Section 3: Regression Results

Model 1: Simple Unadjusted Logistic Regression

# Model 1: Unadjusted Model Without Covariates
Model_1 <- glm(
  MentalDistress ~ HousingInstability,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model1 <- tbl_regression(
  Model_1,
  exponentiate = TRUE,
  label = list(HousingInstability ~ "Housing Instability")
) |>
  modify_caption("Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability")

# Flextable
mod1_flex <- as_flex_table(tbl_model1)

mod1_flex

Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	4.33	4.17, 4.49	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 2: Adjusted Model with Covariates

# Model 2: Adjusted Logistic Regression
Model_2 <- glm(
  MentalDistress ~ HousingInstability + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model2 <- tbl_regression(
  Model_2,
  exponentiate = TRUE, 
  label = list(
    HousingInstability ~ "Housing Instability",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates")

# Flextable
mod2_flex <- as_flex_table(tbl_model2)

mod2_flex

Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	2.43	2.33, 2.54	<0.001
Age Group
18-24	—	—
25-34	0.94	0.88, 1.01	0.11
35-44	0.82	0.76, 0.88	<0.001
45-54	0.68	0.64, 0.74	<0.001
55-64	0.49	0.45, 0.53	<0.001
65+	0.33	0.30, 0.36	<0.001
Sex
Male	—	—
Female	1.43	1.39, 1.48	<0.001
Level of Education Completed
Less Than High School	—	—
High School Graduate	1.16	1.09, 1.24	<0.001
Some College/Technical School	1.25	1.17, 1.34	<0.001
College Graduate/Technical Graduate	1.02	0.95, 1.10	0.5
Household Income
<$15,000	—	—
$15,000 to <$25,000	1.01	0.95, 1.08	0.7
$25,000 to <$35,000	0.97	0.91, 1.04	0.5
$35,000 to <$50,000	0.92	0.86, 0.99	0.024
$50,000 to <$100,000	0.77	0.72, 0.82	<0.001
$100,000 to <$200,000	0.62	0.58, 0.67	<0.001
$200,000+	0.42	0.38, 0.47	<0.001
Employment Status
Employed for Wages	—	—
Self-Employed	0.86	0.81, 0.92	<0.001
Out of Work 1 Year or More	1.73	1.58, 1.90	<0.001
Out of Work <1 Year	1.56	1.43, 1.69	<0.001
Homemaker	0.90	0.83, 0.97	0.009
Student	1.04	0.94, 1.16	0.4
Retired	1.06	1.00, 1.12	0.055
Unable to Work	3.48	3.29, 3.69	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

# Model 3: Adjusted Logistic Regression with Effect Modifier
Model_3 <- glm(
  MentalDistress ~ HousingInstability * RaceEthnicity + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model3 <- tbl_regression(
  Model_3,
  exponentiate = TRUE,
  label = list(
    HousingInstability ~ "Housing Instability",
    RaceEthnicity ~ "Race/Ethnicity",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier")

# Convert to flextable
mod3_flex <- as_flex_table(tbl_model3)

mod3_flex

Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	3.12	2.95, 3.29	<0.001
Race/Ethnicity
White only, Non-Hispanic	—	—
Black only, Non-Hispanic	0.80	0.75, 0.85	<0.001
American Indian/Alaskan Native, Non-Hispanic	1.02	0.91, 1.15	0.7
Asian only, Non-Hispanic	0.65	0.57, 0.73	<0.001
Native Hawaiian or other Pacific Islander only, Non-Hispanic	0.89	0.68, 1.15	0.4
Multiracial, Non-Hispanic	1.32	1.08, 1.60	0.005
Other Race only, Non-Hispanic	1.29	1.17, 1.42	<0.001
Hispanic	0.73	0.69, 0.78	<0.001
Age Group
18-24	—	—
25-34	0.95	0.89, 1.02	0.2
35-44	0.81	0.75, 0.87	<0.001
45-54	0.67	0.63, 0.72	<0.001
55-64	0.47	0.44, 0.51	<0.001
65+	0.31	0.29, 0.34	<0.001
Sex
Male	—	—
Female	1.44	1.40, 1.49	<0.001
Level of Education Completed
Less Than High School	—	—
High School Graduate	1.05	0.98, 1.13	0.2
Some College/Technical School	1.12	1.04, 1.20	0.002
College Graduate/Technical Graduate	0.92	0.86, 0.99	0.029
Household Income
<$15,000	—	—
$15,000 to <$25,000	0.99	0.93, 1.06	0.8
$25,000 to <$35,000	0.94	0.88, 1.01	0.074
$35,000 to <$50,000	0.87	0.82, 0.94	<0.001
$50,000 to <$100,000	0.70	0.66, 0.75	<0.001
$100,000 to <$200,000	0.56	0.52, 0.60	<0.001
$200,000+	0.39	0.35, 0.43	<0.001
Employment Status
Employed for Wages	—	—
Self-Employed	0.86	0.81, 0.91	<0.001
Out of Work 1 Year or More	1.72	1.56, 1.89	<0.001
Out of Work <1 Year	1.59	1.46, 1.73	<0.001
Homemaker	0.92	0.85, 1.0	0.038
Student	1.06	0.96, 1.18	0.2
Retired	1.04	0.98, 1.10	0.2
Unable to Work	3.32	3.13, 3.52	<0.001
Housing Instability * Race/Ethnicity
Yes * Black only, Non-Hispanic	0.61	0.54, 0.69	<0.001
Yes * American Indian/Alaskan Native, Non-Hispanic	0.70	0.56, 0.87	0.001
Yes * Asian only, Non-Hispanic	0.85	0.61, 1.16	0.3
Yes * Native Hawaiian or other Pacific Islander only, Non-Hispanic	0.79	0.51, 1.23	0.3
Yes * Multiracial, Non-Hispanic	0.54	0.35, 0.82	0.004
Yes * Other Race only, Non-Hispanic	0.80	0.66, 0.97	0.026
Yes * Hispanic	0.54	0.49, 0.60	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Confounding Assessment

confounders <- list(
  "Age" = glm(MentalDistress ~ HousingInstability + Age,
              data = brfss_2024_analysis, family = binomial),

  "Sex" = glm(MentalDistress ~ HousingInstability + Sex,
              data = brfss_2024_analysis, family = binomial),

  "Education" = glm(MentalDistress ~ HousingInstability + Education,
                    data = brfss_2024_analysis, family = binomial),

  "Income" = glm(MentalDistress ~ HousingInstability + Income,
                 data = brfss_2024_analysis, family = binomial),

  "Employment" = glm(MentalDistress ~ HousingInstability + Employment,
                     data = brfss_2024_analysis, family = binomial)
)

b_crude_val <- coef(Model_1)["HousingInstabilityYes"]

conf_table <- purrr::map_dfr(names(confounders), \(name) {
  mod <- confounders[[name]]
  b_adj_val <- coef(mod)["HousingInstabilityYes"]

  tibble::tibble(
    Covariate = name,
    `Crude (log-odds)` = round(b_crude_val, 4),
    `Adjusted (log-odds)` = round(b_adj_val, 4),
    `% Change` = round(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100, 1),
    Confounder = ifelse(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100 > 10,
                        "Yes (>10%)", "No")
  )
})

conf_table

## # A tibble: 5 × 5
##   Covariate  `Crude (log-odds)` `Adjusted (log-odds)` `% Change` Confounder
##   <chr>                   <dbl>                 <dbl>      <dbl> <chr>     
## 1 Age                      1.47                  1.31       10.5 Yes (>10%)
## 2 Sex                      1.47                  1.44        1.7 No        
## 3 Education                1.47                  1.38        5.8 No        
## 4 Income                   1.47                  1.20       17.9 Yes (>10%)
## 5 Employment               1.47                  1.18       19.7 Yes (>10%)

Interaction Testing

# Likelihood Ratio Test
lrt <- anova(Model_2, Model_3, test = "LRT")

lrt_clean <- as.data.frame(lrt) |>
  dplyr::mutate(across(where(is.numeric), \(x) round(x, 4)))

lrt_table <- kable(
  lrt_clean,
  caption = "Likelihood Ratio Test Comparing Models With and Without Interaction Term",
  format = "html"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)


# Crude OR for Housing Instability
crude_hi <- tidy(Model_1, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Crude")

# Adjusted OR for Housing Instability (without interaction)
adj_hi <- tidy(Model_2, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Adjusted")

# Interaction ORs for Housing Instability with Race/Ethnicity
interaction_hi <- tidy(Model_3, exponentiate = TRUE, conf.int = TRUE) |>
  filter(grepl("HousingInstabilityYes:", term)) |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Interaction")

# Combine Tables
hi_table <- bind_rows(crude_hi, adj_hi, interaction_hi) |>
  mutate(
    across(c(estimate, conf.low, conf.high), \(x) round(x, 3)),
    Predictor = ifelse(
      type == "Interaction",
      gsub("HousingInstabilityYes:", "", term),
      term
    )
  ) |>
  select(Predictor, estimate, conf.low, conf.high, type)

# Display Table
hi_table_kable <- kable(
  hi_table,
  col.names = c("Predictor", "OR", "95% CI Lower", "95% CI Upper", "Type"),
  caption = "Crude, Adjusted, and Interaction Odds Ratios for Housing Instability"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

lrt_table

Likelihood Ratio Test Comparing Models With and Without Interaction Term
Resid. Df	Resid. Dev	Df	Deviance	Pr(>Chi)
162904	116017.0	NA	NA	NA
162890	115309.5	14	707.4917	0

hi_table_kable

Crude, Adjusted, and Interaction Odds Ratios for Housing Instability
Predictor	OR	95% CI Lower	95% CI Upper	Type
HousingInstabilityYes	4.330	4.173	4.493	Crude
HousingInstabilityYes	2.434	2.335	2.537	Adjusted
RaceEthnicityBlack only, Non-Hispanic	0.610	0.541	0.687	Interaction
RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic	0.698	0.560	0.868	Interaction
RaceEthnicityAsian only, Non-Hispanic	0.846	0.613	1.159	Interaction
RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic	0.793	0.510	1.232	Interaction
RaceEthnicityMultiracial, Non-Hispanic	0.537	0.349	0.821	Interaction
RaceEthnicityOther Race only, Non-Hispanic	0.800	0.658	0.974	Interaction
RaceEthnicityHispanic	0.542	0.485	0.605	Interaction

Section 4: Model Diagnostics

# Deviance Residuals vs Fitted Values
resid_dev <- residuals(Model_3, type = "deviance")
fitted_vals <- fitted(Model_3)

plot(
  fitted_vals, resid_dev,
  xlab = "Fitted Probabilities",
  ylab = "Deviance Residuals",
  main = "Deviance Residuals vs Fitted",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 0, col = "red", lty = 2)

# Calibration check: Hosmer-Lemeshow test
hl_test <- hoslem.test(Model_3$y, fitted(Model_3), g = 10)
cat("\nHosmer-Lemeshow Test:\n")

## 
## Hosmer-Lemeshow Test:

cat("Chi-squared =", round(hl_test$statistic, 2),
    ", df =", hl_test$parameter,
    ", p-value =", hl_test$p.value, "\n")

## Chi-squared = 38.74 , df = 8 , p-value = 5.500582e-06

# Cook's Distance
cooksd <- cooks.distance(Model_3)
plot(
  cooksd, type = "h",
  main = "Cook's Distance",
  xlab = "Observation",
  ylab = "Cook's Distance",
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 4/(nrow(brfss_2024_analysis) - length(coef(Model_3))), col = "red", lty = 2)

# Leverage vs Standardized Deviance Residuals
lev <- hatvalues(Model_3)
std_resid <- rstandard(Model_3)

plot(
  lev, std_resid,
  xlab = "Leverage",
  ylab = "Standardized Deviance Residuals",
  main = "Leverage vs Standardized Deviance Residuals",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5),
  ylim = c(min(std_resid, -3), max(std_resid, 3))  # forces ±3 to show
)
abline(h = c(-3, 3), col = "red", lty = 2)

Section 5: Regression Visualizations

Figure 1: Primary Association Visualization

# Predicted Probabilities
pred_interact <- ggpredict(Model_3, terms = c("HousingInstability", "RaceEthnicity"))

race_colors <- c(
  "White only, Non-Hispanic" = "#1B9E77",
  "Black only, Non-Hispanic" = "red",
  "American Indian/Alaskan Native, Non-Hispanic" = "#E7298A",
  "Asian only, Non-Hispanic" = "#4DBBD5",
  "Native Hawaiian or other Pacific Islander only, Non-Hispanic" = "#6a3d9a",
  "Multiracial, Non-Hispanic" = "blue",
  "Other Race only, Non-Hispanic" = "#FF7F0E",
  "Hispanic" = "#2CA02C"
)

# Plot
p <- ggplot(pred_interact, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
  labs(
    title = "Figure 1. Predicted Probability of Frequent Mental Distress",
    subtitle = "By Housing Instability and Race/Ethnicity",
    x = "Housing Instability",
    y = "Predicted Probability",
    color = "Race/Ethnicity",
    fill = "Race/Ethnicity"
  ) +
  scale_color_manual(values = race_colors) +
  scale_fill_manual(values = race_colors) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal(base_size = 9) +
  theme(legend.position = "bottom")


# Interactive Version
ggplotly(p, width = 1200, height = 500) |>
  layout(
    margin = list(l = 60, r = 50, t = 50, b = 70),
    legend = list(orientation = "h", x = 0.1, y = -0.2)
  )

Figure 2: Full Model Coefficient Plot

# Tidy Model with ORs and 95% CI
tidy_model <- tidy(Model_3, conf.int = TRUE, conf.method = "wald", exponentiate = TRUE) |>
  filter(term != "(Intercept)")

# ggplot for Forest Plot
p <- ggplot(tidy_model, aes(x = estimate, y = fct_reorder(term, estimate))) +
  geom_point(color = "#5DADE2", size = 3) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), height = 0.2, color = "#5DADE2") +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") + # OR=1 reference
  scale_x_log10() +
  labs(
    title = "Forest Plot - Odds Ratios and 95% CIs",
    subtitle = "Adjusted logistic regression for frequent mental distress",
    x = "Odds Ratio (log scale)",
    y = "Predictors"
  ) +
  theme_minimal(base_size = 12)

ggplotly(p, width = max(1200, length(tidy_model$term) * 50), height = 600)

## `height` was translated to `width`.

Check In 2

EPI 553 - Final Project

Lauren Martin

`04-07-26`

Download Packages and Reading in BRFSS Final Analytic Sample

Download Packages

Reading in Brfss Final Analytic Sample

Section 3: Regression Results

Model 1: Simple Unadjusted Logistic Regression

Model 2: Adjusted Model with Covariates

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

Confounding Assessment

Interaction Testing

Section 4: Model Diagnostics

Section 5: Regression Visualizations

Figure 1: Primary Association Visualization