Section 1: Data Loading and Analytical Sample

Loading Packages

# Load required packages
library(haven)
library(tidyverse)
library(dplyr)
library(janitor)
library(gtsummary)
library(ggplot2)
library(plotly)
library(broom)
library(kableExtra)

Dataset Setup

brfss_2024_full <- readRDS("brfss_2024_full.rds")

brfss_2024_clean <- brfss_2024_full %>%
  clean_names()

saveRDS(brfss_2024_clean, "brfss_2024_clean.rds")

Section 2: Variable Selection, Recoding, and Data Cleaning

# Selecting needed variables, renaming variables
brfss_2024_subset <- brfss_2024_clean %>%
  select(
    menthlth,
    sdhbills,
    racegr3,
    age_g,
    sexvar,
    incomg1,
    educag,
    employ1
  ) %>%
  rename(
    MentalDistress = menthlth,
    HousingInstability = sdhbills,
    RaceEthnicity = racegr3,
    Age = age_g,
    Sex = sexvar,
    Income = incomg1,
    Education = educag,
    Employment = employ1
  )

Recoding Variables and Missing Values

brfss_2024_subset <- brfss_2024_subset %>%
  mutate(

# Outcome: Frequent Mental Distress (Binary)
MentalDistress = case_when(
  MentalDistress >= 14 & MentalDistress <= 30 ~ 1,
  MentalDistress < 14 | MentalDistress == 88 ~ 0,
  MentalDistress %in% c(77, 99) ~ NA_real_
),

# Primary Exposure: Housing Instability (Binary)
HousingInstability = case_when(
  HousingInstability == 1 ~ 1, # yes, housing instability present 
  HousingInstability == 2 ~ 0, # no
  HousingInstability %in% c (7,9) ~ NA_real_
),
HousingInstability = factor(HousingInstability,
                            levels = c(0,1),
                            labels = c("No","Yes")),

# Effect Modifier: Race/Ethnicity (Categorical)
RaceEthnicity = case_when(
  RaceEthnicity == 1 ~ "White only, Non-Hispanic",
  RaceEthnicity == 2 ~ "Black only, Non-Hispanic",
  RaceEthnicity == 3 ~ "Other Race only, Non-Hispanic",
  RaceEthnicity == 4 ~ "Multiracial, Non-Hispanic",
  RaceEthnicity == 5 ~ "Hispanic",
  RaceEthnicity == 9 ~ NA_character_
),
RaceEthnicity = factor(RaceEthnicity,
                       levels = c(
                         "White only, Non-Hispanic",
                         "Black only, Non-Hispanic",
                         "Other Race only, Non-Hispanic",
                         "Multiracial, Non-Hispanic",
                         "Hispanic"
                       )),

# Covariates

# Age 
Age = case_when(
  Age == 1 ~ "18-24",
  Age == 2 ~ "25-34",
  Age == 3 ~ "35-44",
  Age == 4 ~ "45-54",
  Age == 5 ~ "55-64",
  Age == 6 ~ "65+"
),
Age = factor(Age,
             levels = c("18-24","25-34","35-44","45-54","55-64","65+")),

# Sex 
Sex = factor(Sex, 
             levels = c(1,2),
             labels = c("Male","Female")),

# Education
Education = case_when(
  Education == 1 ~ 1,
  Education == 2 ~ 2,
  Education == 3 ~ 3,
  Education == 4 ~ 4,
  Education == 9 ~ NA_real_
),
Education = factor(Education,
                   levels = 1:4,
                   labels = c(
                     "Less Than High School",
                     "High School Graduate",
                     "Some College/Technical School",
                     "College Graduate/Technical Graduate"
                   )),

# Income
Income = case_when(
  Income %in% 1:7 ~ Income,
  Income == 9 ~ NA_real_
),
Income = factor(Income,
                levels = 1:7,
                labels = c(
                  "<$15,000",
                  "$15,000 to <$25,000",
                  "$25,000 to <$35,000",
                  "$35,000 to <$50,000",
                  "$50,000 to <$100,000",
                  "$100,000 to <$200,000",
                  "$200,000+"
                )),

# Employment
Employment = case_when(
  Employment %in% 1:8 ~ Employment,
  Employment %in% c(77,99) ~ NA_real_
),
Employment = factor(Employment,
                    levels = 1:8,
                    labels = c(
                      "Employed for Wages",
                      "Self-Employed",
                      "Out of Work 1 Year or More",
                      "Out of Work <1 Year",
                      "Homemaker",
                      "Student",
                      "Retired",
                      "Unable to Work"
                    ))
)

Caluculating Missing Values

# To calculate the missing values 
missing_summary <- brfss_2024_subset %>%
  summarise(across(c(MentalDistress, HousingInstability, RaceEthnicity,
                     Age, Sex, Education, Income, Employment),
                   list(
                     missing_n = ~ sum(is.na(.)),
                     missing_pct = ~ mean(is.na(.)) * 100
                   )))

# View the summary
missing_summary

## # A tibble: 1 × 16
##   MentalDistress_missing_n MentalDistress_missing_pct HousingInstability_missi…¹
##                      <int>                      <dbl>                      <int>
## 1                     8156                       1.78                     256223
## # ℹ abbreviated name: ¹HousingInstability_missing_n
## # ℹ 13 more variables: HousingInstability_missing_pct <dbl>,
## #   RaceEthnicity_missing_n <int>, RaceEthnicity_missing_pct <dbl>,
## #   Age_missing_n <int>, Age_missing_pct <dbl>, Sex_missing_n <int>,
## #   Sex_missing_pct <dbl>, Education_missing_n <int>,
## #   Education_missing_pct <dbl>, Income_missing_n <int>,
## #   Income_missing_pct <dbl>, Employment_missing_n <int>, …

Dropping Missing Values

# Drop any missing values
brfss_2024_analysis <- brfss_2024_subset %>%
  drop_na(MentalDistress, HousingInstability, RaceEthnicity, Age, Sex, Education, Income, Employment)

Frequency of Outcome Variable: Frequent Mental Distress

# Frequncy and percent calculation 
brfss_2024_analysis %>%
  count(MentalDistress) %>%
  mutate(percent = n / sum(n) * 100)

## # A tibble: 2 × 3
##   MentalDistress      n percent
##            <dbl>  <int>   <dbl>
## 1              0 141132    86.6
## 2              1  21796    13.4

Saving Final Analytic Dataset

saveRDS(brfss_2024_analysis, "brfss_2024_analysis.rds")

Section 3: Descriptive Statistics (Table 1)

Table 1: Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928

# Read in dataset
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")

# Table 1
brfss_2024_analysis %>%
  select(MentalDistress, HousingInstability, RaceEthnicity, Age,
         Sex, Education, Income, Employment) %>%
  
  tbl_summary(
    label = list(
      MentalDistress ~ "Frequent Mental Distress (≥14 days past 30)",
      HousingInstability ~ "Unable to Pay Rent/Mortgage/Utilities (past 12 months)",
      RaceEthnicity ~ "Race/Ethnicity",
      Age ~ "Age Group (years)",
      Sex ~ "Sex",
      Education ~ "Level of Education Completed",
      Income ~ "Household Income",
      Employment ~ "Employment Status"
    ),
    
    statistic = list(
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 1,
    missing = "no"
  ) %>%
  
add_n() %>%
# add_overall() %>%
bold_labels() %>%
italicize_levels() %>%
modify_caption("Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928") %>%
modify_footnote(everything() ~ "Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded."
    )%>%
  
  as_flex_table()

Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928
Characteristic1	N1	N = 162,9281
Frequent Mental Distress (≥14 days past 30)	162,928	21,796 (13%)
Unable to Pay Rent/Mortgage/Utilities (past 12 months)	162,928	15,251 (9.4%)
Race/Ethnicity	162,928
White only, Non-Hispanic		120,228 (74%)
Black only, Non-Hispanic		13,879 (8.5%)
Other Race only, Non-Hispanic		8,563 (5.3%)
Multiracial, Non-Hispanic		3,871 (2.4%)
Hispanic		16,387 (10%)
Age Group (years)	162,928
18-24		7,950 (4.9%)
25-34		16,998 (10%)
35-44		22,464 (14%)
45-54		23,568 (14%)
55-64		29,805 (18%)
65+		62,143 (38%)
Sex	162,928
Male		78,660 (48%)
Female		84,268 (52%)
Level of Education Completed	162,928
Less Than High School		7,704 (4.7%)
High School Graduate		38,546 (24%)
Some College/Technical School		43,753 (27%)
College Graduate/Technical Graduate		72,925 (45%)
Household Income	162,928
<$15,000		8,442 (5.2%)
$15,000 to <$25,000		13,801 (8.5%)
$25,000 to <$35,000		17,601 (11%)
$35,000 to <$50,000		21,884 (13%)
$50,000 to <$100,000		50,495 (31%)
$100,000 to <$200,000		37,516 (23%)
$200,000+		13,189 (8.1%)
Employment Status	162,928
Employed for Wages		71,539 (44%)
Self-Employed		14,494 (8.9%)
Out of Work 1 Year or More		2,699 (1.7%)
Out of Work <1 Year		3,423 (2.1%)
Homemaker		5,711 (3.5%)
Student		2,996 (1.8%)
Retired		53,058 (33%)
Unable to Work		9,008 (5.5%)
1Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded.

Section 4: Exploratory Data Analysis (EDA)

Histogram of Outcome Variable: Frequent Mental Distress

A histogram was not used because the outcome variable was recoded into a binary variable (≥ 14 days or < 14 days). Histograms are designed for continuous variables, whereas bar plots are more appropriate for categorical or binary variables.

Bar Plot: Association Between Outcome and Primary Exposure

# Bar plot: Mental Distress by Housing Instability
bp_md_hi <- brfss_2024_analysis %>%
  group_by(HousingInstability) %>%
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_hi, aes(x = HousingInstability, y = Pct_MD)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Frequent Mental Distress by Housing Instability",
    subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
    x = "Housing Instability (0 = No, 1 = Yes)",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 13)

Addtional Exploratory Plot

# Bar plot: Mental Distress by Race/Ethnicity
bp_md_re <- brfss_2024_analysis %>%
  group_by(RaceEthnicity) %>%
  summarise(
    Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
    .groups = "drop"
  )

ggplot(bp_md_re, aes(x = RaceEthnicity, y = Pct_MD, fill = RaceEthnicity)) +
  geom_col(show.legend = FALSE) +
  labs(
    title = "Frequent Mental Distress by Race/Ethnicity",
    subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
    x = "Race/Ethnicity Group",
    y = "Percent with Frequent Mental Distress"
  ) +
  theme_minimal(base_size = 10) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Preliminary Regression Preview

# Simple unadjusted logistic regression
pre_model <- glm(MentalDistress ~ HousingInstability,
                 data = brfss_2024_analysis,
                 family = binomial(link = "logit"))

# Display results
tidy(pre_model, exponentiate = TRUE, conf.int = TRUE) %>%
  kable(
    caption = "Preliminary Logistic Regression: Frequent Mental Distress ~ Housing Instability (Unadjusted)",
    digits = 3,
    col.names = c("Term", "Odds Ratio", "Std. Error", "z-statistic", "p-value", "95% CI Lower", "95% CI Upper")
  )

Preliminary Logistic Regression: Frequent Mental Distress ~ Housing Instability (Unadjusted)
Term	Odds Ratio	Std. Error	z-statistic	p-value	95% CI Lower	95% CI Upper
(Intercept)	0.125	0.008	-251.100	0	0.123	0.127
HousingInstabilityYes	4.330	0.019	77.668	0	4.173	4.493

Check In 1

EPI 553 - Final Project

Lauren Martin

`3-10-26`