Section 1: Data Loading and Analytical Sample

Loading Packages

# Load required packages
library(haven)
library(tidyverse)
library(dplyr)
library(janitor)
library(gtsummary)
library(ggeffects)
library(ggplot2)
library(plotly)
library(broom)
library(kableExtra)
library(forcats)
library(ResourceSelection)
library(knitr)
library(flextable)

Dataset Setup

brfss_2024_full <- readRDS("brfss_2024_full.rds")

brfss_2024_clean <- brfss_2024_full |>
  clean_names()

saveRDS(brfss_2024_clean, "brfss_2024_clean.rds")

Section 2: Variable Selection, Recoding, and Data Cleaning

# Selecting needed variables, renaming variables
brfss_2024_subset <- brfss_2024_clean |>
  select(
    menthlth,
    sdhbills,
    race,
    age_g,
    sexvar,
    incomg1,
    educag,
    employ1
  ) |>
  rename(
    MentalDistress = menthlth,
    HousingInstability = sdhbills,
    RaceEthnicity = race,
    Age = age_g,
    Sex = sexvar,
    Income = incomg1,
    Education = educag,
    Employment = employ1
  )

Recoding Variables and Missing Values

brfss_2024_subset <- brfss_2024_subset |>
  mutate(

# Outcome: Frequent Mental Distress (Binary)
MentalDistress = case_when(
  MentalDistress >= 14 & MentalDistress <= 30 ~ 1,
  MentalDistress < 14 | MentalDistress == 88 ~ 0,
  MentalDistress %in% c(77, 99) ~ NA_real_
),

# Primary Exposure: Housing Instability (Binary)
HousingInstability = case_when(
  HousingInstability == 1 ~ 1, # yes, housing instability present 
  HousingInstability == 2 ~ 0, # no
  HousingInstability %in% c (7,9) ~ NA_real_
),
HousingInstability = factor(HousingInstability,
                            levels = c(0,1),
                            labels = c("No","Yes")),

# Effect Modifier: Race/Ethnicity (Categorical)
RaceEthnicity = case_when(
  RaceEthnicity == 1 ~ "White only, Non-Hispanic",
  RaceEthnicity == 2 ~ "Black only, Non-Hispanic",
  RaceEthnicity == 3 ~ "American Indian/Alaskan Native, Non-Hispanic",
  RaceEthnicity == 4 ~ "Asian only, Non-Hispanic",
  RaceEthnicity == 5 ~ "Native Hawaiian or other Pacific Islander only, Non-Hispanic",
  RaceEthnicity == 6 ~ "Multiracial, Non-Hispanic",
  RaceEthnicity == 7 ~ "Other Race only, Non-Hispanic",
  RaceEthnicity == 8 ~ "Hispanic",
  RaceEthnicity == 9 ~ NA_character_
),
RaceEthnicity = factor(RaceEthnicity,
                       levels = c(
                         "White only, Non-Hispanic",
                         "Black only, Non-Hispanic",
                         "American Indian/Alaskan Native, Non-Hispanic",
                         "Asian only, Non-Hispanic",
                         "Native Hawaiian or other Pacific Islander only, Non-Hispanic",
                         "Multiracial, Non-Hispanic",
                         "Other Race only, Non-Hispanic",
                         "Hispanic"
                       )),

# Covariates

# Age Group
Age = case_when(
  Age == 1 ~ "18-24",
  Age == 2 ~ "25-34",
  Age == 3 ~ "35-44",
  Age == 4 ~ "45-54",
  Age == 5 ~ "55-64",
  Age == 6 ~ "65+"
),
Age = factor(Age,
             levels = c("18-24","25-34","35-44","45-54","55-64","65+")),

# Sex 
Sex = factor(Sex, 
             levels = c(1,2),
             labels = c("Male","Female")),

# Education Status
Education = case_when(
  Education == 1 ~ 1,
  Education == 2 ~ 2,
  Education == 3 ~ 3,
  Education == 4 ~ 4,
  Education == 9 ~ NA_real_
),
Education = factor(Education,
                   levels = 1:4,
                   labels = c(
                     "Less Than High School",
                     "High School Graduate",
                     "Some College/Technical School",
                     "College Graduate/Technical Graduate"
                   )),

# Household Income
Income = case_when(
  Income %in% 1:7 ~ Income,
  Income == 9 ~ NA_real_
),
Income = factor(Income,
                levels = 1:7,
                labels = c(
                  "<$15,000",
                  "$15,000 to <$25,000",
                  "$25,000 to <$35,000",
                  "$35,000 to <$50,000",
                  "$50,000 to <$100,000",
                  "$100,000 to <$200,000",
                  "$200,000+"
                )),

# Employment Status
Employment = case_when(
  Employment %in% 1:8 ~ Employment,
  Employment %in% c(77,99) ~ NA_real_
),
Employment = factor(Employment,
                    levels = 1:8,
                    labels = c(
                      "Employed for Wages",
                      "Self-Employed",
                      "Out of Work 1 Year or More",
                      "Out of Work <1 Year",
                      "Homemaker",
                      "Student",
                      "Retired",
                      "Unable to Work"
                    ))
)

Caluculating Missing Values

# To calculate the missing values 
missing_summary <- brfss_2024_subset |>
  summarise(across(c(MentalDistress, HousingInstability, RaceEthnicity,
                     Age, Sex, Education, Income, Employment),
                   list(
                     missing_n = ~ sum(is.na(.)),
                     missing_pct = ~ mean(is.na(.)) * 100
                   )))

missing_table <- missing_summary |>
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", ".value"),
    names_pattern = "(.*)_(missing_n|missing_pct)"
  ) |>

  mutate(missing_pct = round(missing_pct, 3))

# Display table
missing_table |>
  kable(caption = "Missing Data Summary",
        col.names = c("Variable", "Missing (n)", "Missing (%)")) |>
  kable_styling(full_width = FALSE)

Missing Data Summary
Variable	Missing (n)	Missing (%)
MentalDistress	8156	1.782
HousingInstability	256223	55.984
RaceEthnicity	9103	1.989
Age	0	0.000
Sex	0	0.000
Education	2363	0.516
Income	87423	19.102
Employment	8402	1.836

Dropping Missing Values

# Drop any missing values
brfss_2024_analysis <- brfss_2024_subset |>
  drop_na(MentalDistress, HousingInstability, RaceEthnicity, Age, Sex, Education, Income, Employment)

Frequency of Outcome Variable: Frequent Mental Distress

# Frequncy and percent calculation 
brfss_2024_analysis |>
  count(MentalDistress) |>
  mutate(percent = n / sum(n) * 100)

## # A tibble: 2 × 3
##   MentalDistress      n percent
##            <dbl>  <int>   <dbl>
## 1              0 141132    86.6
## 2              1  21796    13.4

Saving Final Analytic Dataset

saveRDS(brfss_2024_analysis, "brfss_2024_analysis.rds")

Section 3: Descriptive Statistics (Table 1)

Table 1: Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928

# Read in dataset
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

# Table 1
brfss_2024_analysis |>
  select(HousingInstability, MentalDistress, RaceEthnicity, Age,
         Sex, Education, Income, Employment) |>
  
  tbl_summary(
    by = HousingInstability,
    label = list(
      MentalDistress ~ "Frequent Mental Distress (≥14 days past 30)",
      RaceEthnicity ~ "Race/Ethnicity",
      Age ~ "Age Group (years)",
      Sex ~ "Sex",
      Education ~ "Level of Education Completed",
      Income ~ "Household Income",
      Employment ~ "Employment Status"
    ),
    
    statistic = list(
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) |>
  
add_n() |>
# add_overall() |>
bold_labels() |>
italicize_levels() |>
modify_caption("Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928") |>
modify_footnote(everything() ~ "Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.") |>
  
  as_flex_table()

Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928
Characteristic1	N1	No N = 147,6771	Yes N = 15,2511
Frequent Mental Distress (≥14 days past 30)	162,928	16,434 (11%)	5,362 (35%)
Race/Ethnicity	162,928
White only, Non-Hispanic		112,721 (76%)	7,507 (49%)
Black only, Non-Hispanic		11,272 (7.6%)	2,607 (17%)
American Indian/Alaskan Native, Non-Hispanic		2,332 (1.6%)	586 (3.8%)
Asian only, Non-Hispanic		3,713 (2.5%)	243 (1.6%)
Native Hawaiian or other Pacific Islander only, Non-Hispanic		516 (0.3%)	150 (1.0%)
Multiracial, Non-Hispanic		897 (0.6%)	126 (0.8%)
Other Race only, Non-Hispanic		3,233 (2.2%)	638 (4.2%)
Hispanic		12,993 (8.8%)	3,394 (22%)
Age Group (years)	162,928
18-24		6,987 (4.7%)	963 (6.3%)
25-34		14,340 (9.7%)	2,658 (17%)
35-44		19,136 (13%)	3,328 (22%)
45-54		20,376 (14%)	3,192 (21%)
55-64		26,934 (18%)	2,871 (19%)
65+		59,904 (41%)	2,239 (15%)
Sex	162,928
Male		72,532 (49%)	6,128 (40%)
Female		75,145 (51%)	9,123 (60%)
Level of Education Completed	162,928
Less Than High School		5,860 (4.0%)	1,844 (12%)
High School Graduate		33,514 (23%)	5,032 (33%)
Some College/Technical School		38,840 (26%)	4,913 (32%)
College Graduate/Technical Graduate		69,463 (47%)	3,462 (23%)
Household Income	162,928
<$15,000		5,781 (3.9%)	2,661 (17%)
$15,000 to <$25,000		10,557 (7.1%)	3,244 (21%)
$25,000 to <$35,000		14,687 (9.9%)	2,914 (19%)
$35,000 to <$50,000		19,347 (13%)	2,537 (17%)
$50,000 to <$100,000		47,549 (32%)	2,946 (19%)
$100,000 to <$200,000		36,677 (25%)	839 (5.5%)
$200,000+		13,079 (8.9%)	110 (0.7%)
Employment Status	162,928
Employed for Wages		64,882 (44%)	6,657 (44%)
Self-Employed		12,967 (8.8%)	1,527 (10%)
Out of Work 1 Year or More		1,885 (1.3%)	814 (5.3%)
Out of Work <1 Year		2,338 (1.6%)	1,085 (7.1%)
Homemaker		4,971 (3.4%)	740 (4.9%)
Student		2,698 (1.8%)	298 (2.0%)
Retired		51,459 (35%)	1,599 (10%)
Unable to Work		6,477 (4.4%)	2,531 (17%)
1Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.

Section 3.1: Regression Results

Model 1: Simple Unadjusted Logistic Regression

# Model 1: Unadjusted Model Without Covariates
Model_1 <- glm(
  MentalDistress ~ HousingInstability,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model1 <- tbl_regression(
  Model_1,
  exponentiate = TRUE,
  label = list(HousingInstability ~ "Housing Instability")
) |>
  modify_caption("Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability")

# Flextable
mod1_flex <- as_flex_table(tbl_model1)

mod1_flex

Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	4.33	4.17, 4.49	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 2: Adjusted Model with Covariates

# Model 2: Adjusted Logistic Regression
Model_2 <- glm(
  MentalDistress ~ HousingInstability + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model2 <- tbl_regression(
  Model_2,
  exponentiate = TRUE, 
  label = list(
    HousingInstability ~ "Housing Instability",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates")

# Flextable
mod2_flex <- as_flex_table(tbl_model2)

mod2_flex

Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	2.43	2.33, 2.54	<0.001
Age Group
18-24	—	—
25-34	0.94	0.88, 1.01	0.11
35-44	0.82	0.76, 0.88	<0.001
45-54	0.68	0.64, 0.74	<0.001
55-64	0.49	0.45, 0.53	<0.001
65+	0.33	0.30, 0.36	<0.001
Sex
Male	—	—
Female	1.43	1.39, 1.48	<0.001
Level of Education Completed
Less Than High School	—	—
High School Graduate	1.16	1.09, 1.24	<0.001
Some College/Technical School	1.25	1.17, 1.34	<0.001
College Graduate/Technical Graduate	1.02	0.95, 1.10	0.5
Household Income
<$15,000	—	—
$15,000 to <$25,000	1.01	0.95, 1.08	0.7
$25,000 to <$35,000	0.97	0.91, 1.04	0.5
$35,000 to <$50,000	0.92	0.86, 0.99	0.024
$50,000 to <$100,000	0.77	0.72, 0.82	<0.001
$100,000 to <$200,000	0.62	0.58, 0.67	<0.001
$200,000+	0.42	0.38, 0.47	<0.001
Employment Status
Employed for Wages	—	—
Self-Employed	0.86	0.81, 0.92	<0.001
Out of Work 1 Year or More	1.73	1.58, 1.90	<0.001
Out of Work <1 Year	1.56	1.43, 1.69	<0.001
Homemaker	0.90	0.83, 0.97	0.009
Student	1.04	0.94, 1.16	0.4
Retired	1.06	1.00, 1.12	0.055
Unable to Work	3.48	3.29, 3.69	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

# Model 3: Adjusted Logistic Regression with Effect Modifier
Model_3 <- glm(
  MentalDistress ~ HousingInstability * RaceEthnicity + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model3 <- tbl_regression(
  Model_3,
  exponentiate = TRUE,
  label = list(
    HousingInstability ~ "Housing Instability",
    RaceEthnicity ~ "Race/Ethnicity",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier")

# Convert to flextable
mod3_flex <- as_flex_table(tbl_model3)

mod3_flex

Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier
Characteristic	OR	95% CI	p-value
Housing Instability
No	—	—
Yes	3.12	2.95, 3.29	<0.001
Race/Ethnicity
White only, Non-Hispanic	—	—
Black only, Non-Hispanic	0.80	0.75, 0.85	<0.001
American Indian/Alaskan Native, Non-Hispanic	1.02	0.91, 1.15	0.7
Asian only, Non-Hispanic	0.65	0.57, 0.73	<0.001
Native Hawaiian or other Pacific Islander only, Non-Hispanic	0.89	0.68, 1.15	0.4
Multiracial, Non-Hispanic	1.32	1.08, 1.60	0.005
Other Race only, Non-Hispanic	1.29	1.17, 1.42	<0.001
Hispanic	0.73	0.69, 0.78	<0.001
Age Group
18-24	—	—
25-34	0.95	0.89, 1.02	0.2
35-44	0.81	0.75, 0.87	<0.001
45-54	0.67	0.63, 0.72	<0.001
55-64	0.47	0.44, 0.51	<0.001
65+	0.31	0.29, 0.34	<0.001
Sex
Male	—	—
Female	1.44	1.40, 1.49	<0.001
Level of Education Completed
Less Than High School	—	—
High School Graduate	1.05	0.98, 1.13	0.2
Some College/Technical School	1.12	1.04, 1.20	0.002
College Graduate/Technical Graduate	0.92	0.86, 0.99	0.029
Household Income
<$15,000	—	—
$15,000 to <$25,000	0.99	0.93, 1.06	0.8
$25,000 to <$35,000	0.94	0.88, 1.01	0.074
$35,000 to <$50,000	0.87	0.82, 0.94	<0.001
$50,000 to <$100,000	0.70	0.66, 0.75	<0.001
$100,000 to <$200,000	0.56	0.52, 0.60	<0.001
$200,000+	0.39	0.35, 0.43	<0.001
Employment Status
Employed for Wages	—	—
Self-Employed	0.86	0.81, 0.91	<0.001
Out of Work 1 Year or More	1.72	1.56, 1.89	<0.001
Out of Work <1 Year	1.59	1.46, 1.73	<0.001
Homemaker	0.92	0.85, 1.0	0.038
Student	1.06	0.96, 1.18	0.2
Retired	1.04	0.98, 1.10	0.2
Unable to Work	3.32	3.13, 3.52	<0.001
Housing Instability * Race/Ethnicity
Yes * Black only, Non-Hispanic	0.61	0.54, 0.69	<0.001
Yes * American Indian/Alaskan Native, Non-Hispanic	0.70	0.56, 0.87	0.001
Yes * Asian only, Non-Hispanic	0.85	0.61, 1.16	0.3
Yes * Native Hawaiian or other Pacific Islander only, Non-Hispanic	0.79	0.51, 1.23	0.3
Yes * Multiracial, Non-Hispanic	0.54	0.35, 0.82	0.004
Yes * Other Race only, Non-Hispanic	0.80	0.66, 0.97	0.026
Yes * Hispanic	0.54	0.49, 0.60	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Confounding Assessment

confounders <- list(
  "Age" = glm(MentalDistress ~ HousingInstability + Age,
              data = brfss_2024_analysis, family = binomial),

  "Sex" = glm(MentalDistress ~ HousingInstability + Sex,
              data = brfss_2024_analysis, family = binomial),

  "Education" = glm(MentalDistress ~ HousingInstability + Education,
                    data = brfss_2024_analysis, family = binomial),

  "Income" = glm(MentalDistress ~ HousingInstability + Income,
                 data = brfss_2024_analysis, family = binomial),

  "Employment" = glm(MentalDistress ~ HousingInstability + Employment,
                     data = brfss_2024_analysis, family = binomial)
)

b_crude_val <- coef(Model_1)["HousingInstabilityYes"]

conf_table <- purrr::map_dfr(names(confounders), \(name) {
  mod <- confounders[[name]]
  b_adj_val <- coef(mod)["HousingInstabilityYes"]

  tibble::tibble(
    Covariate = name,
    `Crude (log-odds)` = round(b_crude_val, 4),
    `Adjusted (log-odds)` = round(b_adj_val, 4),
    `% Change` = round(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100, 1),
    Confounder = ifelse(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100 > 10,
                        "Yes (>10%)", "No")
  )
})

knitr::kable(conf_table,
             caption = "Table 2. Assessment of Confounding Effects on the Association Between Housing Instability and Mental Distress") |>
  kableExtra::kable_styling(full_width = FALSE)

Table 2. Assessment of Confounding Effects on the Association Between Housing Instability and Mental Distress
Covariate	Crude (log-odds)	Adjusted (log-odds)	% Change	Confounder
Age	1.4656	1.3123	10.5	Yes (>10%)
Sex	1.4656	1.4410	1.7	No
Education	1.4656	1.3804	5.8	No
Income	1.4656	1.2027	17.9	Yes (>10%)
Employment	1.4656	1.1765	19.7	Yes (>10%)

Interaction Testing

# Likelihood Ratio Test
lrt <- anova(Model_2, Model_3, test = "LRT")

lrt_clean <- as.data.frame(lrt) |>
  dplyr::mutate(across(where(is.numeric), \(x) round(x, 4)))

lrt_table <- kable(
  lrt_clean,
  caption = "Table 3. Likelihood Ratio Test Comparing Models With and Without Interaction Term",
  format = "html"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)


# Crude OR for Housing Instability
crude_hi <- tidy(Model_1, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Crude")

# Adjusted OR for Housing Instability (without interaction)
adj_hi <- tidy(Model_2, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Adjusted")

# Interaction OR for Housing Instability with Race/Ethnicity
interaction_hi <- tidy(Model_3, exponentiate = TRUE, conf.int = TRUE) |>
  filter(grepl("HousingInstabilityYes:", term)) |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Interaction")

# Combine Tables
hi_table <- bind_rows(crude_hi, adj_hi, interaction_hi) |>
  mutate(
    across(c(estimate, conf.low, conf.high), \(x) round(x, 3)),
    Predictor = ifelse(
      type == "Interaction",
      gsub("HousingInstabilityYes:", "", term),
      term
    )
  ) |>
  select(Predictor, estimate, conf.low, conf.high, type)

# Display Table
hi_table_kable <- kable(
  hi_table,
  col.names = c("Predictor", "OR", "95% CI Lower", "95% CI Upper", "Type"),
  caption = "Table 4. Crude, Adjusted, and Interaction Odds Ratios for Housing Instability"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

lrt_table

Table 3. Likelihood Ratio Test Comparing Models With and Without Interaction Term
Resid. Df	Resid. Dev	Df	Deviance	Pr(>Chi)
162904	116017.0	NA	NA	NA
162890	115309.5	14	707.4917	0

hi_table_kable

Table 4. Crude, Adjusted, and Interaction Odds Ratios for Housing Instability
Predictor	OR	95% CI Lower	95% CI Upper	Type
HousingInstabilityYes	4.330	4.173	4.493	Crude
HousingInstabilityYes	2.434	2.335	2.537	Adjusted
RaceEthnicityBlack only, Non-Hispanic	0.610	0.541	0.687	Interaction
RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic	0.698	0.560	0.868	Interaction
RaceEthnicityAsian only, Non-Hispanic	0.846	0.613	1.159	Interaction
RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic	0.793	0.510	1.232	Interaction
RaceEthnicityMultiracial, Non-Hispanic	0.537	0.349	0.821	Interaction
RaceEthnicityOther Race only, Non-Hispanic	0.800	0.658	0.974	Interaction
RaceEthnicityHispanic	0.542	0.485	0.605	Interaction

Section 4: Exploratory Data Analysis (EDA)

Bar Plot: Association Between Outcome and Primary Exposure

# Bar plot: Mental Distress by Housing Instability
bp_md_hi <- brfss_2024_analysis |>
  group_by(HousingInstability) |>
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_hi, aes(x = HousingInstability, y = Pct_MD)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Figure 1. Frequent Mental Distress by Housing Instability",
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Housing Instability (0 = No, 1 = Yes)",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 13)

Bar Plot of Effect Modifier

# Bar plot: Mental Distress by Race/Ethnicity
bp_md_re <- brfss_2024_analysis |>
  group_by(RaceEthnicity) |>
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_re, aes(x = RaceEthnicity, y = Pct_MD, fill = RaceEthnicity)) +
  geom_col(show.legend = FALSE) +
  labs(
    title = "Figure 2. Frequent Mental Distress by Race/Ethnicity",
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Race/Ethnicity Group",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 9) + 
  theme(axis.text.x = element_text(
  angle = 30, hjust = 1, vjust = 1, size = 7), 
  plot.margin = unit(c(12, 12, 12, 12), "pt")
  )

Bar Plot: Visualization of the Interaction Hypothesis

# Calculating percentages
interaction_plot <- brfss_2024_analysis |>
  group_by(RaceEthnicity, HousingInstability) |>
  summarise(
    n = n(),
    mental_distress_count = sum(MentalDistress == 1),
    pct_mental_distress = (mental_distress_count / n) * 100,
    .groups = "drop"
  )

# Plot Interaction 
interaction_plot$RaceEthnicity <- stringr::str_wrap(interaction_plot$RaceEthnicity, width = 15)

ggplot(interaction_plot, aes(x = HousingInstability, y = pct_mental_distress, fill = HousingInstability)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~RaceEthnicity, nrow = 2) +   # splits into 2 rows
  labs(
    title = "Figure 3. Preliminary Visualization: Mental Distress by Housing Instability, Grouped by Race/Ethnicity", 
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Housing Instability",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(size = 10, face = "bold"),     
    plot.subtitle = element_text(size = 9),                
    axis.title = element_text(size = 8),
    axis.text = element_text(size = 7)
  )

Section 4: Model Diagnostics

# Deviance Residuals vs Fitted Values
resid_dev <- residuals(Model_3, type = "deviance")
fitted_vals <- fitted(Model_3)

plot(
  fitted_vals, resid_dev,
  xlab = "Fitted Probabilities",
  ylab = "Deviance Residuals",
  main = "Figure 4. Deviance Residuals vs Fitted",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 0, col = "red", lty = 2)

# Calibration check: Hosmer-Lemeshow test
hl_test <- hoslem.test(Model_3$y, fitted(Model_3), g = 10)
cat("\nHosmer-Lemeshow Test:\n")

## 
## Hosmer-Lemeshow Test:

cat("Chi-squared =", round(hl_test$statistic, 2),
    ", df =", hl_test$parameter,
    ", p-value =", hl_test$p.value, "\n")

## Chi-squared = 38.74 , df = 8 , p-value = 5.500582e-06

# Cook's Distance
cooksd <- cooks.distance(Model_3)
plot(
  cooksd, type = "h",
  main = "Figure 5. Cook's Distance",
  xlab = "Observation",
  ylab = "Cook's Distance",
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 4/(nrow(brfss_2024_analysis) - length(coef(Model_3))), col = "red", lty = 2)

# Leverage vs Standardized Deviance Residuals
lev <- hatvalues(Model_3)
std_resid <- rstandard(Model_3)

plot(
  lev, std_resid,
  xlab = "Leverage",
  ylab = "Standardized Deviance Residuals",
  main = "Figure 6. Leverage vs Standardized Deviance Residuals",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5),
  ylim = c(min(std_resid, -3), max(std_resid, 3))  # forces ±3 to show
)
abline(h = c(-3, 3), col = "red", lty = 2)

Section 5: Regression Visualizations

Figure 7: Primary Association Visualization

# Predicted Probabilities
pred_interact <- ggpredict(Model_3, terms = c("HousingInstability", "RaceEthnicity"))

race_colors <- c(
  "White only, Non-Hispanic" = "#1B9E77",
  "Black only, Non-Hispanic" = "red",
  "American Indian/Alaskan Native, Non-Hispanic" = "#E7298A",
  "Asian only, Non-Hispanic" = "#4DBBD5",
  "Native Hawaiian or other Pacific Islander only, Non-Hispanic" = "#6a3d9a",
  "Multiracial, Non-Hispanic" = "blue",
  "Other Race only, Non-Hispanic" = "#FF7F0E",
  "Hispanic" = "#2CA02C"
)

# Plot
p <- ggplot(pred_interact, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
  labs(
    title = "Figure 7. Predicted Probability of Frequent Mental Distress",
    subtitle = "By Housing Instability and Race/Ethnicity",
    x = "Housing Instability",
    y = "Predicted Probability",
    color = "Race/Ethnicity",
    fill = "Race/Ethnicity"
  ) +
  scale_color_manual(values = race_colors) +
  scale_fill_manual(values = race_colors) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal(base_size = 9) +
  theme(legend.position = "bottom")


# Interactive Version
ggplotly(p, width = 1200, height = 500) |>
  layout(
    margin = list(l = 60, r = 50, t = 50, b = 70),
    legend = list(orientation = "h", x = 0.1, y = -0.2)
  )

Figure 8: Forest Plot with Race/Ethnicity as an Effect Modifier

# Tidy model (KEEP EVERYTHING except intercept)
tidy_model <- tidy(Model_3, conf.int = TRUE, exponentiate = TRUE) |>
  filter(term != "(Intercept)")

# Main effect (White reference group effect of housing instability)
main <- tidy_model |>
  filter(term == "HousingInstabilityYes")

main_effect <- main$estimate
main_low <- main$conf.low
main_high <- main$conf.high


# Extract interaction terms
interaction_terms <- tidy_model |>
 filter(grepl("HousingInstabilityYes:RaceEthnicity", term)) |>
  mutate(
    group = case_when(
      term == "HousingInstabilityYes:RaceEthnicityBlack only, Non-Hispanic" ~ "Black",
      term == "HousingInstabilityYes:RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic" ~ "American Indian/Alaska Native",
      term == "HousingInstabilityYes:RaceEthnicityAsian only, Non-Hispanic" ~ "Asian",
      term == "HousingInstabilityYes:RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic" ~ "Native Hawaiian/Pacific Islander",
      term == "HousingInstabilityYes:RaceEthnicityMultiracial, Non-Hispanic" ~ "Multiracial",
      term == "HousingInstabilityYes:RaceEthnicityOther Race only, Non-Hispanic" ~ "Other", 
      term == "HousingInstabilityYes:RaceEthnicityHispanic" ~ "Hispanic",
      TRUE ~ term
    ),
    
    # Compute group-specific ORs
    OR = main_effect * estimate,
    conf.low = main_low * conf.low,
    conf.high = main_high * conf.high
  ) |>
  select(group, OR, conf.low, conf.high)


# Add White reference group
white_row <- data.frame(
  group = "White, Non-Hispanic",
  OR = main_effect,
  conf.low = main_low,
  conf.high = main_high
)

plot_data <- bind_rows(white_row, interaction_terms)

# Order for nicer visual
plot_data$group <- factor(plot_data$group,
  levels = plot_data$group[order(plot_data$OR)]
)

# Round values for display
plot_data <- plot_data |>
  mutate(
    OR = round(OR, 2),
    conf.low = round(conf.low, 2),
    conf.high = round(conf.high, 2)
  )

# FINAL FOREST PLOT
p <- ggplot(plot_data, aes(x = OR, y = group)) +
  geom_point(size = 3, color = "#2E86C1") +
  geom_errorbar(
    aes(xmin = conf.low, xmax = conf.high),
    width = 0.2,
    color = "#2E86C1"
  ) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  scale_x_log10() +
  labs(
    title = str_wrap(
      "Figure 8. Housing Instability and Frequent Mental Distress: Effect Modification by Race/Ethnicity",
      width = 50
    ),
    x = "Odds Ratio (log scale)",
    y = "Race/Ethnicity"
  ) +
  theme_minimal(base_size = 10) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.minor = element_blank()
  )

p

Examining Housing Instability and Mental Distress Among Different Racial/Ethnic Groups: A Cross-Sectional Analysis of 2024 BRFSS Data

EPI 553 - Final Report R Code

Lauren Martin

May 5, 2026

Section 1: Data Loading and Analytical Sample

Loading Packages

Dataset Setup

Section 2: Variable Selection, Recoding, and Data Cleaning

Recoding Variables and Missing Values

Caluculating Missing Values

Dropping Missing Values

Frequency of Outcome Variable: Frequent Mental Distress

Saving Final Analytic Dataset

Section 3: Descriptive Statistics (Table 1)

Table 1: Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928

Section 3.1: Regression Results

Model 1: Simple Unadjusted Logistic Regression

Model 2: Adjusted Model with Covariates

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

Confounding Assessment

Interaction Testing

Section 4: Exploratory Data Analysis (EDA)

Bar Plot: Association Between Outcome and Primary Exposure

Bar Plot of Effect Modifier

Bar Plot: Visualization of the Interaction Hypothesis

Section 4: Model Diagnostics

Section 5: Regression Visualizations

Figure 7: Primary Association Visualization

Figure 8: Forest Plot with Race/Ethnicity as an Effect Modifier