Section 1: Data Loading and Analytical Sample

Loading Packages

# Load required packages
library(haven)
library(tidyverse)
library(dplyr)
library(janitor)
library(gtsummary)
library(ggplot2)
library(plotly)
library(broom)
library(kableExtra)

Dataset Setup

brfss_2024_full <- readRDS("brfss_2024_full.rds")

brfss_2024_clean <- brfss_2024_full %>%
  clean_names()

saveRDS(brfss_2024_clean, "brfss_2024_clean.rds")

Section 2: Variable Selection, Recoding, and Data Cleaning

# Selecting needed variables, renaming variables
brfss_2024_subset <- brfss_2024_clean %>%
  select(
    menthlth,
    sdhbills,
    racegr3,
    age_g,
    sexvar,
    incomg1,
    educag,
    employ1
  ) %>%
  rename(
    MentalDistress = menthlth,
    HousingInstability = sdhbills,
    RaceEthnicity = racegr3,
    Age = age_g,
    Sex = sexvar,
    Income = incomg1,
    Education = educag,
    Employment = employ1
  )

Recoding Variables and Missing Values

brfss_2024_subset <- brfss_2024_subset %>%
  mutate(

# Outcome: Frequent Mental Distress (Binary)
MentalDistress = case_when(
  MentalDistress >= 14 & MentalDistress <= 30 ~ 1,
  MentalDistress < 14 | MentalDistress == 88 ~ 0,
  MentalDistress %in% c(77, 99) ~ NA_real_
),

# Primary Exposure: Housing Instability (Binary)
HousingInstability = case_when(
  HousingInstability == 1 ~ 1, # yes, housing instability present 
  HousingInstability == 2 ~ 0, # no
  HousingInstability %in% c (7,9) ~ NA_real_
),
HousingInstability = factor(HousingInstability,
                            levels = c(0,1),
                            labels = c("No","Yes")),

# Effect Modifier: Race/Ethnicity (Categorical)
RaceEthnicity = case_when(
  RaceEthnicity == 1 ~ "White only, Non-Hispanic",
  RaceEthnicity == 2 ~ "Black only, Non-Hispanic",
  RaceEthnicity == 3 ~ "Other Race only, Non-Hispanic",
  RaceEthnicity == 4 ~ "Multiracial, Non-Hispanic",
  RaceEthnicity == 5 ~ "Hispanic",
  RaceEthnicity == 9 ~ NA_character_
),
RaceEthnicity = factor(RaceEthnicity,
                       levels = c(
                         "White only, Non-Hispanic",
                         "Black only, Non-Hispanic",
                         "Other Race only, Non-Hispanic",
                         "Multiracial, Non-Hispanic",
                         "Hispanic"
                       )),

# Covariates

# Age 
Age = case_when(
  Age == 1 ~ "18-24",
  Age == 2 ~ "25-34",
  Age == 3 ~ "35-44",
  Age == 4 ~ "45-54",
  Age == 5 ~ "55-64",
  Age == 6 ~ "65+"
),
Age = factor(Age,
             levels = c("18-24","25-34","35-44","45-54","55-64","65+")),

# Sex 
Sex = factor(Sex, 
             levels = c(1,2),
             labels = c("Male","Female")),

# Education
Education = case_when(
  Education == 1 ~ 1,
  Education == 2 ~ 2,
  Education == 3 ~ 3,
  Education == 4 ~ 4,
  Education == 9 ~ NA_real_
),
Education = factor(Education,
                   levels = 1:4,
                   labels = c(
                     "Less Than High School",
                     "High School Graduate",
                     "Some College/Technical School",
                     "College Graduate/Technical Graduate"
                   )),

# Income
Income = case_when(
  Income %in% 1:7 ~ Income,
  Income == 9 ~ NA_real_
),
Income = factor(Income,
                levels = 1:7,
                labels = c(
                  "<$15,000",
                  "$15,000 to <$25,000",
                  "$25,000 to <$35,000",
                  "$35,000 to <$50,000",
                  "$50,000 to <$100,000",
                  "$100,000 to <$200,000",
                  "$200,000+"
                )),

# Employment
Employment = case_when(
  Employment %in% 1:8 ~ Employment,
  Employment %in% c(77,99) ~ NA_real_
),
Employment = factor(Employment,
                    levels = 1:8,
                    labels = c(
                      "Employed for Wages",
                      "Self-Employed",
                      "Out of Work 1 Year or More",
                      "Out of Work <1 Year",
                      "Homemaker",
                      "Student",
                      "Retired",
                      "Unable to Work"
                    ))
)

Caluculating Missing Values

# To calculate the missing values 
missing_summary <- brfss_2024_subset %>%
  summarise(across(c(MentalDistress, HousingInstability, RaceEthnicity,
                     Age, Sex, Education, Income, Employment),
                   list(
                     missing_n = ~ sum(is.na(.)),
                     missing_pct = ~ mean(is.na(.)) * 100
                   )))

# View the summary
missing_summary
## # A tibble: 1 × 16
##   MentalDistress_missing_n MentalDistress_missing_pct HousingInstability_missi…¹
##                      <int>                      <dbl>                      <int>
## 1                     8156                       1.78                     256223
## # ℹ abbreviated name: ¹​HousingInstability_missing_n
## # ℹ 13 more variables: HousingInstability_missing_pct <dbl>,
## #   RaceEthnicity_missing_n <int>, RaceEthnicity_missing_pct <dbl>,
## #   Age_missing_n <int>, Age_missing_pct <dbl>, Sex_missing_n <int>,
## #   Sex_missing_pct <dbl>, Education_missing_n <int>,
## #   Education_missing_pct <dbl>, Income_missing_n <int>,
## #   Income_missing_pct <dbl>, Employment_missing_n <int>, …

Dropping Missing Values

# Drop any missing values
brfss_2024_analysis <- brfss_2024_subset %>%
  drop_na(MentalDistress, HousingInstability, RaceEthnicity, Age, Sex, Education, Income, Employment)

Frequency of Outcome Variable: Frequent Mental Distress

# Frequncy and percent calculation 
brfss_2024_analysis %>%
  count(MentalDistress) %>%
  mutate(percent = n / sum(n) * 100)
## # A tibble: 2 × 3
##   MentalDistress      n percent
##            <dbl>  <int>   <dbl>
## 1              0 141132    86.6
## 2              1  21796    13.4

Saving Final Analytic Dataset

saveRDS(brfss_2024_analysis, "brfss_2024_analysis.rds")

Section 3: Descriptive Statistics (Table 1)

Table 1: Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928

# Read in dataset
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

# Table 1
brfss_2024_analysis %>%
  select(MentalDistress, HousingInstability, RaceEthnicity, Age,
         Sex, Education, Income, Employment) %>%
  
  tbl_summary(
    label = list(
      MentalDistress ~ "Frequent Mental Distress (≥14 days past 30)",
      HousingInstability ~ "Unable to Pay Rent/Mortgage/Utilities (past 12 months)",
      RaceEthnicity ~ "Race/Ethnicity",
      Age ~ "Age Group (years)",
      Sex ~ "Sex",
      Education ~ "Level of Education Completed",
      Income ~ "Household Income",
      Employment ~ "Employment Status"
    ),
    
    statistic = list(
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  
add_n() %>%
# add_overall() %>%
bold_labels() %>%
italicize_levels() %>%
modify_caption("Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928") %>%
modify_footnote(everything() ~ "Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded."
    )%>%
  
  as_flex_table()
Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928

Characteristic1

N1

N = 162,9281

Frequent Mental Distress (≥14 days past 30)

162,928

21,796 (13%)

Unable to Pay Rent/Mortgage/Utilities (past 12 months)

162,928

15,251 (9.4%)

Race/Ethnicity

162,928

White only, Non-Hispanic

120,228 (74%)

Black only, Non-Hispanic

13,879 (8.5%)

Other Race only, Non-Hispanic

8,563 (5.3%)

Multiracial, Non-Hispanic

3,871 (2.4%)

Hispanic

16,387 (10%)

Age Group (years)

162,928

18-24

7,950 (4.9%)

25-34

16,998 (10%)

35-44

22,464 (14%)

45-54

23,568 (14%)

55-64

29,805 (18%)

65+

62,143 (38%)

Sex

162,928

Male

78,660 (48%)

Female

84,268 (52%)

Level of Education Completed

162,928

Less Than High School

7,704 (4.7%)

High School Graduate

38,546 (24%)

Some College/Technical School

43,753 (27%)

College Graduate/Technical Graduate

72,925 (45%)

Household Income

162,928

<$15,000

8,442 (5.2%)

$15,000 to <$25,000

13,801 (8.5%)

$25,000 to <$35,000

17,601 (11%)

$35,000 to <$50,000

21,884 (13%)

$50,000 to <$100,000

50,495 (31%)

$100,000 to <$200,000

37,516 (23%)

$200,000+

13,189 (8.1%)

Employment Status

162,928

Employed for Wages

71,539 (44%)

Self-Employed

14,494 (8.9%)

Out of Work 1 Year or More

2,699 (1.7%)

Out of Work <1 Year

3,423 (2.1%)

Homemaker

5,711 (3.5%)

Student

2,996 (1.8%)

Retired

53,058 (33%)

Unable to Work

9,008 (5.5%)

1Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.

Section 4: Exploratory Data Analysis (EDA)

Histogram of Outcome Variable: Frequent Mental Distress

A histogram was not used because the outcome variable was recoded into a binary variable (≥ 14 days or < 14 days). Histograms are designed for continuous variables, whereas bar plots are more appropriate for categorical or binary variables.


Bar Plot: Association Between Outcome and Primary Exposure

# Bar plot: Mental Distress by Housing Instability
bp_md_hi <- brfss_2024_analysis %>%
  group_by(HousingInstability) %>%
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_hi, aes(x = HousingInstability, y = Pct_MD)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Frequent Mental Distress by Housing Instability",
    subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
    x = "Housing Instability (0 = No, 1 = Yes)",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 13)


Addtional Exploratory Plot

# Bar plot: Mental Distress by Race/Ethnicity
bp_md_re <- brfss_2024_analysis %>%
  group_by(RaceEthnicity) %>%
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_re, aes(x = RaceEthnicity, y = Pct_MD, fill = RaceEthnicity)) +
  geom_col(show.legend = FALSE) +
  labs(
    title = "Frequent Mental Distress by Race/Ethnicity",
    subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
    x = "Race/Ethnicity Group",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 10) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


Preliminary Regression Preview

# Simple unadjusted logistic regression
pre_model <- glm(MentalDistress ~ HousingInstability,
                 data = brfss_2024_analysis,
                 family = binomial(link = "logit"))

# Display results
tidy(pre_model, exponentiate = TRUE, conf.int = TRUE) %>%
  kable(
    caption = "Preliminary Logistic Regression: Frequent Mental Distress ~ Housing Instability (Unadjusted)",
    digits = 3,
    col.names = c("Term", "Odds Ratio", "Std. Error", "z-statistic", "p-value", "95% CI Lower", "95% CI Upper")
  )
Preliminary Logistic Regression: Frequent Mental Distress ~ Housing Instability (Unadjusted)
Term Odds Ratio Std. Error z-statistic p-value 95% CI Lower 95% CI Upper
(Intercept) 0.125 0.008 -251.100 0 0.123 0.127
HousingInstabilityYes 4.330 0.019 77.668 0 4.173 4.493