Section 1: Data Loading and Analytical Sample

Loading Packages

# Load required packages
library(haven)
library(tidyverse)
library(dplyr)
library(janitor)
library(gtsummary)
library(ggeffects)
library(ggplot2)
library(plotly)
library(broom)
library(kableExtra)
library(forcats)
library(ResourceSelection)
library(knitr)
library(flextable)

Dataset Setup

brfss_2024_full <- readRDS("brfss_2024_full.rds")

brfss_2024_clean <- brfss_2024_full |>
  clean_names()

saveRDS(brfss_2024_clean, "brfss_2024_clean.rds")

Section 2: Variable Selection, Recoding, and Data Cleaning

# Selecting needed variables, renaming variables
brfss_2024_subset <- brfss_2024_clean |>
  select(
    menthlth,
    sdhbills,
    race,
    age_g,
    sexvar,
    incomg1,
    educag,
    employ1
  ) |>
  rename(
    MentalDistress = menthlth,
    HousingInstability = sdhbills,
    RaceEthnicity = race,
    Age = age_g,
    Sex = sexvar,
    Income = incomg1,
    Education = educag,
    Employment = employ1
  )

Recoding Variables and Missing Values

brfss_2024_subset <- brfss_2024_subset |>
  mutate(

# Outcome: Frequent Mental Distress (Binary)
MentalDistress = case_when(
  MentalDistress >= 14 & MentalDistress <= 30 ~ 1,
  MentalDistress < 14 | MentalDistress == 88 ~ 0,
  MentalDistress %in% c(77, 99) ~ NA_real_
),

# Primary Exposure: Housing Instability (Binary)
HousingInstability = case_when(
  HousingInstability == 1 ~ 1, # yes, housing instability present 
  HousingInstability == 2 ~ 0, # no
  HousingInstability %in% c (7,9) ~ NA_real_
),
HousingInstability = factor(HousingInstability,
                            levels = c(0,1),
                            labels = c("No","Yes")),

# Effect Modifier: Race/Ethnicity (Categorical)
RaceEthnicity = case_when(
  RaceEthnicity == 1 ~ "White only, Non-Hispanic",
  RaceEthnicity == 2 ~ "Black only, Non-Hispanic",
  RaceEthnicity == 3 ~ "American Indian/Alaskan Native, Non-Hispanic",
  RaceEthnicity == 4 ~ "Asian only, Non-Hispanic",
  RaceEthnicity == 5 ~ "Native Hawaiian or other Pacific Islander only, Non-Hispanic",
  RaceEthnicity == 6 ~ "Multiracial, Non-Hispanic",
  RaceEthnicity == 7 ~ "Other Race only, Non-Hispanic",
  RaceEthnicity == 8 ~ "Hispanic",
  RaceEthnicity == 9 ~ NA_character_
),
RaceEthnicity = factor(RaceEthnicity,
                       levels = c(
                         "White only, Non-Hispanic",
                         "Black only, Non-Hispanic",
                         "American Indian/Alaskan Native, Non-Hispanic",
                         "Asian only, Non-Hispanic",
                         "Native Hawaiian or other Pacific Islander only, Non-Hispanic",
                         "Multiracial, Non-Hispanic",
                         "Other Race only, Non-Hispanic",
                         "Hispanic"
                       )),

# Covariates

# Age Group
Age = case_when(
  Age == 1 ~ "18-24",
  Age == 2 ~ "25-34",
  Age == 3 ~ "35-44",
  Age == 4 ~ "45-54",
  Age == 5 ~ "55-64",
  Age == 6 ~ "65+"
),
Age = factor(Age,
             levels = c("18-24","25-34","35-44","45-54","55-64","65+")),

# Sex 
Sex = factor(Sex, 
             levels = c(1,2),
             labels = c("Male","Female")),

# Education Status
Education = case_when(
  Education == 1 ~ 1,
  Education == 2 ~ 2,
  Education == 3 ~ 3,
  Education == 4 ~ 4,
  Education == 9 ~ NA_real_
),
Education = factor(Education,
                   levels = 1:4,
                   labels = c(
                     "Less Than High School",
                     "High School Graduate",
                     "Some College/Technical School",
                     "College Graduate/Technical Graduate"
                   )),

# Household Income
Income = case_when(
  Income %in% 1:7 ~ Income,
  Income == 9 ~ NA_real_
),
Income = factor(Income,
                levels = 1:7,
                labels = c(
                  "<$15,000",
                  "$15,000 to <$25,000",
                  "$25,000 to <$35,000",
                  "$35,000 to <$50,000",
                  "$50,000 to <$100,000",
                  "$100,000 to <$200,000",
                  "$200,000+"
                )),

# Employment Status
Employment = case_when(
  Employment %in% 1:8 ~ Employment,
  Employment %in% c(77,99) ~ NA_real_
),
Employment = factor(Employment,
                    levels = 1:8,
                    labels = c(
                      "Employed for Wages",
                      "Self-Employed",
                      "Out of Work 1 Year or More",
                      "Out of Work <1 Year",
                      "Homemaker",
                      "Student",
                      "Retired",
                      "Unable to Work"
                    ))
)

Caluculating Missing Values

# To calculate the missing values 
missing_summary <- brfss_2024_subset |>
  summarise(across(c(MentalDistress, HousingInstability, RaceEthnicity,
                     Age, Sex, Education, Income, Employment),
                   list(
                     missing_n = ~ sum(is.na(.)),
                     missing_pct = ~ mean(is.na(.)) * 100
                   )))

missing_table <- missing_summary |>
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", ".value"),
    names_pattern = "(.*)_(missing_n|missing_pct)"
  ) |>

  mutate(missing_pct = round(missing_pct, 3))

# Display table
missing_table |>
  kable(caption = "Missing Data Summary",
        col.names = c("Variable", "Missing (n)", "Missing (%)")) |>
  kable_styling(full_width = FALSE)
Missing Data Summary
Variable Missing (n) Missing (%)
MentalDistress 8156 1.782
HousingInstability 256223 55.984
RaceEthnicity 9103 1.989
Age 0 0.000
Sex 0 0.000
Education 2363 0.516
Income 87423 19.102
Employment 8402 1.836

Dropping Missing Values

# Drop any missing values
brfss_2024_analysis <- brfss_2024_subset |>
  drop_na(MentalDistress, HousingInstability, RaceEthnicity, Age, Sex, Education, Income, Employment)

Frequency of Outcome Variable: Frequent Mental Distress

# Frequncy and percent calculation 
brfss_2024_analysis |>
  count(MentalDistress) |>
  mutate(percent = n / sum(n) * 100)
## # A tibble: 2 × 3
##   MentalDistress      n percent
##            <dbl>  <int>   <dbl>
## 1              0 141132    86.6
## 2              1  21796    13.4

Saving Final Analytic Dataset

saveRDS(brfss_2024_analysis, "brfss_2024_analysis.rds")

Section 3: Descriptive Statistics (Table 1)

Table 1: Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928

# Read in dataset
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

# Table 1
brfss_2024_analysis |>
  select(HousingInstability, MentalDistress, RaceEthnicity, Age,
         Sex, Education, Income, Employment) |>
  
  tbl_summary(
    by = HousingInstability,
    label = list(
      MentalDistress ~ "Frequent Mental Distress (≥14 days past 30)",
      RaceEthnicity ~ "Race/Ethnicity",
      Age ~ "Age Group (years)",
      Sex ~ "Sex",
      Education ~ "Level of Education Completed",
      Income ~ "Household Income",
      Employment ~ "Employment Status"
    ),
    
    statistic = list(
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) |>
  
add_n() |>
# add_overall() |>
bold_labels() |>
italicize_levels() |>
modify_caption("Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928") |>
modify_footnote(everything() ~ "Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.") |>
  
  as_flex_table()
Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, Stratified by Housing Instability, n = 162,928

Characteristic1

N1

No
N = 147,6771

Yes
N = 15,2511

Frequent Mental Distress (≥14 days past 30)

162,928

16,434 (11%)

5,362 (35%)

Race/Ethnicity

162,928

White only, Non-Hispanic

112,721 (76%)

7,507 (49%)

Black only, Non-Hispanic

11,272 (7.6%)

2,607 (17%)

American Indian/Alaskan Native, Non-Hispanic

2,332 (1.6%)

586 (3.8%)

Asian only, Non-Hispanic

3,713 (2.5%)

243 (1.6%)

Native Hawaiian or other Pacific Islander only, Non-Hispanic

516 (0.3%)

150 (1.0%)

Multiracial, Non-Hispanic

897 (0.6%)

126 (0.8%)

Other Race only, Non-Hispanic

3,233 (2.2%)

638 (4.2%)

Hispanic

12,993 (8.8%)

3,394 (22%)

Age Group (years)

162,928

18-24

6,987 (4.7%)

963 (6.3%)

25-34

14,340 (9.7%)

2,658 (17%)

35-44

19,136 (13%)

3,328 (22%)

45-54

20,376 (14%)

3,192 (21%)

55-64

26,934 (18%)

2,871 (19%)

65+

59,904 (41%)

2,239 (15%)

Sex

162,928

Male

72,532 (49%)

6,128 (40%)

Female

75,145 (51%)

9,123 (60%)

Level of Education Completed

162,928

Less Than High School

5,860 (4.0%)

1,844 (12%)

High School Graduate

33,514 (23%)

5,032 (33%)

Some College/Technical School

38,840 (26%)

4,913 (32%)

College Graduate/Technical Graduate

69,463 (47%)

3,462 (23%)

Household Income

162,928

<$15,000

5,781 (3.9%)

2,661 (17%)

$15,000 to <$25,000

10,557 (7.1%)

3,244 (21%)

$25,000 to <$35,000

14,687 (9.9%)

2,914 (19%)

$35,000 to <$50,000

19,347 (13%)

2,537 (17%)

$50,000 to <$100,000

47,549 (32%)

2,946 (19%)

$100,000 to <$200,000

36,677 (25%)

839 (5.5%)

$200,000+

13,079 (8.9%)

110 (0.7%)

Employment Status

162,928

Employed for Wages

64,882 (44%)

6,657 (44%)

Self-Employed

12,967 (8.8%)

1,527 (10%)

Out of Work 1 Year or More

1,885 (1.3%)

814 (5.3%)

Out of Work <1 Year

2,338 (1.6%)

1,085 (7.1%)

Homemaker

4,971 (3.4%)

740 (4.9%)

Student

2,698 (1.8%)

298 (2.0%)

Retired

51,459 (35%)

1,599 (10%)

Unable to Work

6,477 (4.4%)

2,531 (17%)

1Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.

Section 3.1: Regression Results

Model 1: Simple Unadjusted Logistic Regression

# Model 1: Unadjusted Model Without Covariates
Model_1 <- glm(
  MentalDistress ~ HousingInstability,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model1 <- tbl_regression(
  Model_1,
  exponentiate = TRUE,
  label = list(HousingInstability ~ "Housing Instability")
) |>
  modify_caption("Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability")

# Flextable
mod1_flex <- as_flex_table(tbl_model1)

mod1_flex
Model 1: Unadjusted Logistic Regression of Frequent Mental Distress on Housing Instability

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

4.33

4.17, 4.49

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 2: Adjusted Model with Covariates

# Model 2: Adjusted Logistic Regression
Model_2 <- glm(
  MentalDistress ~ HousingInstability + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model2 <- tbl_regression(
  Model_2,
  exponentiate = TRUE, 
  label = list(
    HousingInstability ~ "Housing Instability",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates")

# Flextable
mod2_flex <- as_flex_table(tbl_model2)

mod2_flex
Model 2: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability and Covariates

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

2.43

2.33, 2.54

<0.001

Age Group

18-24

25-34

0.94

0.88, 1.01

0.11

35-44

0.82

0.76, 0.88

<0.001

45-54

0.68

0.64, 0.74

<0.001

55-64

0.49

0.45, 0.53

<0.001

65+

0.33

0.30, 0.36

<0.001

Sex

Male

Female

1.43

1.39, 1.48

<0.001

Level of Education Completed

Less Than High School

High School Graduate

1.16

1.09, 1.24

<0.001

Some College/Technical School

1.25

1.17, 1.34

<0.001

College Graduate/Technical Graduate

1.02

0.95, 1.10

0.5

Household Income

<$15,000

$15,000 to <$25,000

1.01

0.95, 1.08

0.7

$25,000 to <$35,000

0.97

0.91, 1.04

0.5

$35,000 to <$50,000

0.92

0.86, 0.99

0.024

$50,000 to <$100,000

0.77

0.72, 0.82

<0.001

$100,000 to <$200,000

0.62

0.58, 0.67

<0.001

$200,000+

0.42

0.38, 0.47

<0.001

Employment Status

Employed for Wages

Self-Employed

0.86

0.81, 0.92

<0.001

Out of Work 1 Year or More

1.73

1.58, 1.90

<0.001

Out of Work <1 Year

1.56

1.43, 1.69

<0.001

Homemaker

0.90

0.83, 0.97

0.009

Student

1.04

0.94, 1.16

0.4

Retired

1.06

1.00, 1.12

0.055

Unable to Work

3.48

3.29, 3.69

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Model 3: Interaction Between Housing Instability and Mental Distress with Race/Ethnicity as Effect Modifier

# Model 3: Adjusted Logistic Regression with Effect Modifier
Model_3 <- glm(
  MentalDistress ~ HousingInstability * RaceEthnicity + Age + Sex + Education + Income + Employment,
  data = brfss_2024_analysis,
  family = binomial(link = "logit")
)

# gtsummary table
tbl_model3 <- tbl_regression(
  Model_3,
  exponentiate = TRUE,
  label = list(
    HousingInstability ~ "Housing Instability",
    RaceEthnicity ~ "Race/Ethnicity",
    Age ~ "Age Group",
    Sex ~ "Sex",
    Education ~ "Level of Education Completed",
    Income ~ "Household Income",
    Employment ~ "Employment Status"
  )
) |>
  modify_caption("Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier")

# Convert to flextable
mod3_flex <- as_flex_table(tbl_model3)

mod3_flex
Model 3: Adjusted Logistic Regression of Frequent Mental Distress on Housing Instability with Race/Ethnicity as Effect Modifier

Characteristic

OR

95% CI

p-value

Housing Instability

No

Yes

3.12

2.95, 3.29

<0.001

Race/Ethnicity

White only, Non-Hispanic

Black only, Non-Hispanic

0.80

0.75, 0.85

<0.001

American Indian/Alaskan Native, Non-Hispanic

1.02

0.91, 1.15

0.7

Asian only, Non-Hispanic

0.65

0.57, 0.73

<0.001

Native Hawaiian or other Pacific Islander only, Non-Hispanic

0.89

0.68, 1.15

0.4

Multiracial, Non-Hispanic

1.32

1.08, 1.60

0.005

Other Race only, Non-Hispanic

1.29

1.17, 1.42

<0.001

Hispanic

0.73

0.69, 0.78

<0.001

Age Group

18-24

25-34

0.95

0.89, 1.02

0.2

35-44

0.81

0.75, 0.87

<0.001

45-54

0.67

0.63, 0.72

<0.001

55-64

0.47

0.44, 0.51

<0.001

65+

0.31

0.29, 0.34

<0.001

Sex

Male

Female

1.44

1.40, 1.49

<0.001

Level of Education Completed

Less Than High School

High School Graduate

1.05

0.98, 1.13

0.2

Some College/Technical School

1.12

1.04, 1.20

0.002

College Graduate/Technical Graduate

0.92

0.86, 0.99

0.029

Household Income

<$15,000

$15,000 to <$25,000

0.99

0.93, 1.06

0.8

$25,000 to <$35,000

0.94

0.88, 1.01

0.074

$35,000 to <$50,000

0.87

0.82, 0.94

<0.001

$50,000 to <$100,000

0.70

0.66, 0.75

<0.001

$100,000 to <$200,000

0.56

0.52, 0.60

<0.001

$200,000+

0.39

0.35, 0.43

<0.001

Employment Status

Employed for Wages

Self-Employed

0.86

0.81, 0.91

<0.001

Out of Work 1 Year or More

1.72

1.56, 1.89

<0.001

Out of Work <1 Year

1.59

1.46, 1.73

<0.001

Homemaker

0.92

0.85, 1.0

0.038

Student

1.06

0.96, 1.18

0.2

Retired

1.04

0.98, 1.10

0.2

Unable to Work

3.32

3.13, 3.52

<0.001

Housing Instability * Race/Ethnicity

Yes * Black only, Non-Hispanic

0.61

0.54, 0.69

<0.001

Yes * American Indian/Alaskan Native, Non-Hispanic

0.70

0.56, 0.87

0.001

Yes * Asian only, Non-Hispanic

0.85

0.61, 1.16

0.3

Yes * Native Hawaiian or other Pacific Islander only, Non-Hispanic

0.79

0.51, 1.23

0.3

Yes * Multiracial, Non-Hispanic

0.54

0.35, 0.82

0.004

Yes * Other Race only, Non-Hispanic

0.80

0.66, 0.97

0.026

Yes * Hispanic

0.54

0.49, 0.60

<0.001

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Confounding Assessment

confounders <- list(
  "Age" = glm(MentalDistress ~ HousingInstability + Age,
              data = brfss_2024_analysis, family = binomial),

  "Sex" = glm(MentalDistress ~ HousingInstability + Sex,
              data = brfss_2024_analysis, family = binomial),

  "Education" = glm(MentalDistress ~ HousingInstability + Education,
                    data = brfss_2024_analysis, family = binomial),

  "Income" = glm(MentalDistress ~ HousingInstability + Income,
                 data = brfss_2024_analysis, family = binomial),

  "Employment" = glm(MentalDistress ~ HousingInstability + Employment,
                     data = brfss_2024_analysis, family = binomial)
)

b_crude_val <- coef(Model_1)["HousingInstabilityYes"]

conf_table <- purrr::map_dfr(names(confounders), \(name) {
  mod <- confounders[[name]]
  b_adj_val <- coef(mod)["HousingInstabilityYes"]

  tibble::tibble(
    Covariate = name,
    `Crude (log-odds)` = round(b_crude_val, 4),
    `Adjusted (log-odds)` = round(b_adj_val, 4),
    `% Change` = round(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100, 1),
    Confounder = ifelse(abs(b_crude_val - b_adj_val) / abs(b_crude_val) * 100 > 10,
                        "Yes (>10%)", "No")
  )
})

knitr::kable(conf_table,
             caption = "Table 2. Assessment of Confounding Effects on the Association Between Housing Instability and Mental Distress") |>
  kableExtra::kable_styling(full_width = FALSE)
Table 2. Assessment of Confounding Effects on the Association Between Housing Instability and Mental Distress
Covariate Crude (log-odds) Adjusted (log-odds) % Change Confounder
Age 1.4656 1.3123 10.5 Yes (>10%)
Sex 1.4656 1.4410 1.7 No
Education 1.4656 1.3804 5.8 No
Income 1.4656 1.2027 17.9 Yes (>10%)
Employment 1.4656 1.1765 19.7 Yes (>10%)

Interaction Testing

# Likelihood Ratio Test
lrt <- anova(Model_2, Model_3, test = "LRT")

lrt_clean <- as.data.frame(lrt) |>
  dplyr::mutate(across(where(is.numeric), \(x) round(x, 4)))

lrt_table <- kable(
  lrt_clean,
  caption = "Table 3. Likelihood Ratio Test Comparing Models With and Without Interaction Term",
  format = "html"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)


# Crude OR for Housing Instability
crude_hi <- tidy(Model_1, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Crude")

# Adjusted OR for Housing Instability (without interaction)
adj_hi <- tidy(Model_2, exponentiate = TRUE, conf.int = TRUE) |>
  filter(term == "HousingInstabilityYes") |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Adjusted")

# Interaction OR for Housing Instability with Race/Ethnicity
interaction_hi <- tidy(Model_3, exponentiate = TRUE, conf.int = TRUE) |>
  filter(grepl("HousingInstabilityYes:", term)) |>
  select(term, estimate, conf.low, conf.high) |>
  mutate(type = "Interaction")

# Combine Tables
hi_table <- bind_rows(crude_hi, adj_hi, interaction_hi) |>
  mutate(
    across(c(estimate, conf.low, conf.high), \(x) round(x, 3)),
    Predictor = ifelse(
      type == "Interaction",
      gsub("HousingInstabilityYes:", "", term),
      term
    )
  ) |>
  select(Predictor, estimate, conf.low, conf.high, type)

# Display Table
hi_table_kable <- kable(
  hi_table,
  col.names = c("Predictor", "OR", "95% CI Lower", "95% CI Upper", "Type"),
  caption = "Table 4. Crude, Adjusted, and Interaction Odds Ratios for Housing Instability"
) |>
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

lrt_table
Table 3. Likelihood Ratio Test Comparing Models With and Without Interaction Term
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
162904 116017.0 NA NA NA
162890 115309.5 14 707.4917 0
hi_table_kable
Table 4. Crude, Adjusted, and Interaction Odds Ratios for Housing Instability
Predictor OR 95% CI Lower 95% CI Upper Type
HousingInstabilityYes 4.330 4.173 4.493 Crude
HousingInstabilityYes 2.434 2.335 2.537 Adjusted
RaceEthnicityBlack only, Non-Hispanic 0.610 0.541 0.687 Interaction
RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic 0.698 0.560 0.868 Interaction
RaceEthnicityAsian only, Non-Hispanic 0.846 0.613 1.159 Interaction
RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic 0.793 0.510 1.232 Interaction
RaceEthnicityMultiracial, Non-Hispanic 0.537 0.349 0.821 Interaction
RaceEthnicityOther Race only, Non-Hispanic 0.800 0.658 0.974 Interaction
RaceEthnicityHispanic 0.542 0.485 0.605 Interaction

Section 4: Exploratory Data Analysis (EDA)

Bar Plot: Association Between Outcome and Primary Exposure

# Bar plot: Mental Distress by Housing Instability
bp_md_hi <- brfss_2024_analysis |>
  group_by(HousingInstability) |>
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_hi, aes(x = HousingInstability, y = Pct_MD)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Figure 1. Frequent Mental Distress by Housing Instability",
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Housing Instability (0 = No, 1 = Yes)",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 13)

Bar Plot of Effect Modifier

# Bar plot: Mental Distress by Race/Ethnicity
bp_md_re <- brfss_2024_analysis |>
  group_by(RaceEthnicity) |>
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_re, aes(x = RaceEthnicity, y = Pct_MD, fill = RaceEthnicity)) +
  geom_col(show.legend = FALSE) +
  labs(
    title = "Figure 2. Frequent Mental Distress by Race/Ethnicity",
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Race/Ethnicity Group",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 9) + 
  theme(axis.text.x = element_text(
  angle = 30, hjust = 1, vjust = 1, size = 7), 
  plot.margin = unit(c(12, 12, 12, 12), "pt")
  )

Bar Plot: Visualization of the Interaction Hypothesis

# Calculating percentages
interaction_plot <- brfss_2024_analysis |>
  group_by(RaceEthnicity, HousingInstability) |>
  summarise(
    n = n(),
    mental_distress_count = sum(MentalDistress == 1),
    pct_mental_distress = (mental_distress_count / n) * 100,
    .groups = "drop"
  )

# Plot Interaction 
interaction_plot$RaceEthnicity <- stringr::str_wrap(interaction_plot$RaceEthnicity, width = 15)

ggplot(interaction_plot, aes(x = HousingInstability, y = pct_mental_distress, fill = HousingInstability)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~RaceEthnicity, nrow = 2) +   # splits into 2 rows
  labs(
    title = "Figure 3. Preliminary Visualization: Mental Distress by Housing Instability, Grouped by Race/Ethnicity", 
    subtitle = "BRFSS 2024 Analytic Sample (n = 162,928)",
    x = "Housing Instability",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1), 
    plot.title = element_text(size = 10, face = "bold"),     
    plot.subtitle = element_text(size = 9),                
    axis.title = element_text(size = 8),
    axis.text = element_text(size = 7)
  )

Section 4: Model Diagnostics

# Deviance Residuals vs Fitted Values
resid_dev <- residuals(Model_3, type = "deviance")
fitted_vals <- fitted(Model_3)

plot(
  fitted_vals, resid_dev,
  xlab = "Fitted Probabilities",
  ylab = "Deviance Residuals",
  main = "Figure 4. Deviance Residuals vs Fitted",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 0, col = "red", lty = 2)

# Calibration check: Hosmer-Lemeshow test
hl_test <- hoslem.test(Model_3$y, fitted(Model_3), g = 10)
cat("\nHosmer-Lemeshow Test:\n")
## 
## Hosmer-Lemeshow Test:
cat("Chi-squared =", round(hl_test$statistic, 2),
    ", df =", hl_test$parameter,
    ", p-value =", hl_test$p.value, "\n")
## Chi-squared = 38.74 , df = 8 , p-value = 5.500582e-06
# Cook's Distance
cooksd <- cooks.distance(Model_3)
plot(
  cooksd, type = "h",
  main = "Figure 5. Cook's Distance",
  xlab = "Observation",
  ylab = "Cook's Distance",
  col = adjustcolor("#5DADE2", 0.5)
)
abline(h = 4/(nrow(brfss_2024_analysis) - length(coef(Model_3))), col = "red", lty = 2)

# Leverage vs Standardized Deviance Residuals
lev <- hatvalues(Model_3)
std_resid <- rstandard(Model_3)

plot(
  lev, std_resid,
  xlab = "Leverage",
  ylab = "Standardized Deviance Residuals",
  main = "Figure 6. Leverage vs Standardized Deviance Residuals",
  pch = 19,
  col = adjustcolor("#5DADE2", 0.5),
  ylim = c(min(std_resid, -3), max(std_resid, 3))  # forces ±3 to show
)
abline(h = c(-3, 3), col = "red", lty = 2)

Section 5: Regression Visualizations

Figure 7: Primary Association Visualization

# Predicted Probabilities
pred_interact <- ggpredict(Model_3, terms = c("HousingInstability", "RaceEthnicity"))

race_colors <- c(
  "White only, Non-Hispanic" = "#1B9E77",
  "Black only, Non-Hispanic" = "red",
  "American Indian/Alaskan Native, Non-Hispanic" = "#E7298A",
  "Asian only, Non-Hispanic" = "#4DBBD5",
  "Native Hawaiian or other Pacific Islander only, Non-Hispanic" = "#6a3d9a",
  "Multiracial, Non-Hispanic" = "blue",
  "Other Race only, Non-Hispanic" = "#FF7F0E",
  "Hispanic" = "#2CA02C"
)

# Plot
p <- ggplot(pred_interact, aes(x = x, y = predicted, color = group, fill = group)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
  labs(
    title = "Figure 7. Predicted Probability of Frequent Mental Distress",
    subtitle = "By Housing Instability and Race/Ethnicity",
    x = "Housing Instability",
    y = "Predicted Probability",
    color = "Race/Ethnicity",
    fill = "Race/Ethnicity"
  ) +
  scale_color_manual(values = race_colors) +
  scale_fill_manual(values = race_colors) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal(base_size = 9) +
  theme(legend.position = "bottom")


# Interactive Version
ggplotly(p, width = 1200, height = 500) |>
  layout(
    margin = list(l = 60, r = 50, t = 50, b = 70),
    legend = list(orientation = "h", x = 0.1, y = -0.2)
  )

Figure 8: Forest Plot with Race/Ethnicity as an Effect Modifier

# Tidy model (KEEP EVERYTHING except intercept)
tidy_model <- tidy(Model_3, conf.int = TRUE, exponentiate = TRUE) |>
  filter(term != "(Intercept)")

# Main effect (White reference group effect of housing instability)
main <- tidy_model |>
  filter(term == "HousingInstabilityYes")

main_effect <- main$estimate
main_low <- main$conf.low
main_high <- main$conf.high


# Extract interaction terms
interaction_terms <- tidy_model |>
 filter(grepl("HousingInstabilityYes:RaceEthnicity", term)) |>
  mutate(
    group = case_when(
      term == "HousingInstabilityYes:RaceEthnicityBlack only, Non-Hispanic" ~ "Black",
      term == "HousingInstabilityYes:RaceEthnicityAmerican Indian/Alaskan Native, Non-Hispanic" ~ "American Indian/Alaska Native",
      term == "HousingInstabilityYes:RaceEthnicityAsian only, Non-Hispanic" ~ "Asian",
      term == "HousingInstabilityYes:RaceEthnicityNative Hawaiian or other Pacific Islander only, Non-Hispanic" ~ "Native Hawaiian/Pacific Islander",
      term == "HousingInstabilityYes:RaceEthnicityMultiracial, Non-Hispanic" ~ "Multiracial",
      term == "HousingInstabilityYes:RaceEthnicityOther Race only, Non-Hispanic" ~ "Other", 
      term == "HousingInstabilityYes:RaceEthnicityHispanic" ~ "Hispanic",
      TRUE ~ term
    ),
    
    # Compute group-specific ORs
    OR = main_effect * estimate,
    conf.low = main_low * conf.low,
    conf.high = main_high * conf.high
  ) |>
  select(group, OR, conf.low, conf.high)


# Add White reference group
white_row <- data.frame(
  group = "White, Non-Hispanic",
  OR = main_effect,
  conf.low = main_low,
  conf.high = main_high
)

plot_data <- bind_rows(white_row, interaction_terms)

# Order for nicer visual
plot_data$group <- factor(plot_data$group,
  levels = plot_data$group[order(plot_data$OR)]
)

# Round values for display
plot_data <- plot_data |>
  mutate(
    OR = round(OR, 2),
    conf.low = round(conf.low, 2),
    conf.high = round(conf.high, 2)
  )

# FINAL FOREST PLOT
p <- ggplot(plot_data, aes(x = OR, y = group)) +
  geom_point(size = 3, color = "#2E86C1") +
  geom_errorbar(
    aes(xmin = conf.low, xmax = conf.high),
    width = 0.2,
    color = "#2E86C1"
  ) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  scale_x_log10() +
  labs(
    title = str_wrap(
      "Figure 8. Housing Instability and Frequent Mental Distress: Effect Modification by Race/Ethnicity",
      width = 50
    ),
    x = "Odds Ratio (log scale)",
    y = "Race/Ethnicity"
  ) +
  theme_minimal(base_size = 10) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.minor = element_blank()
  )

p