3-10-26# Load required packages
library(haven)
library(tidyverse)
library(dplyr)
library(janitor)
library(gtsummary)
library(ggplot2)
library(plotly)
library(broom)
library(kableExtra)
brfss_2024_full <- readRDS("brfss_2024_full.rds")
brfss_2024_clean <- brfss_2024_full %>%
clean_names()
saveRDS(brfss_2024_clean, "brfss_2024_clean.rds")
# Selecting needed variables, renaming variables
brfss_2024_subset <- brfss_2024_clean %>%
select(
menthlth,
sdhbills,
racegr3,
age_g,
sexvar,
incomg1,
educag,
employ1
) %>%
rename(
MentalDistress = menthlth,
HousingInstability = sdhbills,
RaceEthnicity = racegr3,
Age = age_g,
Sex = sexvar,
Income = incomg1,
Education = educag,
Employment = employ1
)
brfss_2024_subset <- brfss_2024_subset %>%
mutate(
# Outcome: Frequent Mental Distress (Binary)
MentalDistress = case_when(
MentalDistress >= 14 & MentalDistress <= 30 ~ 1,
MentalDistress < 14 | MentalDistress == 88 ~ 0,
MentalDistress %in% c(77, 99) ~ NA_real_
),
# Primary Exposure: Housing Instability (Binary)
HousingInstability = case_when(
HousingInstability == 1 ~ 1, # yes, housing instability present
HousingInstability == 2 ~ 0, # no
HousingInstability %in% c (7,9) ~ NA_real_
),
HousingInstability = factor(HousingInstability,
levels = c(0,1),
labels = c("No","Yes")),
# Effect Modifier: Race/Ethnicity (Categorical)
RaceEthnicity = case_when(
RaceEthnicity == 1 ~ "White only, Non-Hispanic",
RaceEthnicity == 2 ~ "Black only, Non-Hispanic",
RaceEthnicity == 3 ~ "Other Race only, Non-Hispanic",
RaceEthnicity == 4 ~ "Multiracial, Non-Hispanic",
RaceEthnicity == 5 ~ "Hispanic",
RaceEthnicity == 9 ~ NA_character_
),
RaceEthnicity = factor(RaceEthnicity,
levels = c(
"White only, Non-Hispanic",
"Black only, Non-Hispanic",
"Other Race only, Non-Hispanic",
"Multiracial, Non-Hispanic",
"Hispanic"
)),
# Covariates
# Age
Age = case_when(
Age == 1 ~ "18-24",
Age == 2 ~ "25-34",
Age == 3 ~ "35-44",
Age == 4 ~ "45-54",
Age == 5 ~ "55-64",
Age == 6 ~ "65+"
),
Age = factor(Age,
levels = c("18-24","25-34","35-44","45-54","55-64","65+")),
# Sex
Sex = factor(Sex,
levels = c(1,2),
labels = c("Male","Female")),
# Education
Education = case_when(
Education == 1 ~ 1,
Education == 2 ~ 2,
Education == 3 ~ 3,
Education == 4 ~ 4,
Education == 9 ~ NA_real_
),
Education = factor(Education,
levels = 1:4,
labels = c(
"Less Than High School",
"High School Graduate",
"Some College/Technical School",
"College Graduate/Technical Graduate"
)),
# Income
Income = case_when(
Income %in% 1:7 ~ Income,
Income == 9 ~ NA_real_
),
Income = factor(Income,
levels = 1:7,
labels = c(
"<$15,000",
"$15,000 to <$25,000",
"$25,000 to <$35,000",
"$35,000 to <$50,000",
"$50,000 to <$100,000",
"$100,000 to <$200,000",
"$200,000+"
)),
# Employment
Employment = case_when(
Employment %in% 1:8 ~ Employment,
Employment %in% c(77,99) ~ NA_real_
),
Employment = factor(Employment,
levels = 1:8,
labels = c(
"Employed for Wages",
"Self-Employed",
"Out of Work 1 Year or More",
"Out of Work <1 Year",
"Homemaker",
"Student",
"Retired",
"Unable to Work"
))
)
# To calculate the missing values
missing_summary <- brfss_2024_subset %>%
summarise(across(c(MentalDistress, HousingInstability, RaceEthnicity,
Age, Sex, Education, Income, Employment),
list(
missing_n = ~ sum(is.na(.)),
missing_pct = ~ mean(is.na(.)) * 100
)))
# View the summary
missing_summary
## # A tibble: 1 × 16
## MentalDistress_missing_n MentalDistress_missing_pct HousingInstability_missi…¹
## <int> <dbl> <int>
## 1 8156 1.78 256223
## # ℹ abbreviated name: ¹​HousingInstability_missing_n
## # ℹ 13 more variables: HousingInstability_missing_pct <dbl>,
## # RaceEthnicity_missing_n <int>, RaceEthnicity_missing_pct <dbl>,
## # Age_missing_n <int>, Age_missing_pct <dbl>, Sex_missing_n <int>,
## # Sex_missing_pct <dbl>, Education_missing_n <int>,
## # Education_missing_pct <dbl>, Income_missing_n <int>,
## # Income_missing_pct <dbl>, Employment_missing_n <int>, …
# Drop any missing values
brfss_2024_analysis <- brfss_2024_subset %>%
drop_na(MentalDistress, HousingInstability, RaceEthnicity, Age, Sex, Education, Income, Employment)
# Frequncy and percent calculation
brfss_2024_analysis %>%
count(MentalDistress) %>%
mutate(percent = n / sum(n) * 100)
## # A tibble: 2 × 3
## MentalDistress n percent
## <dbl> <int> <dbl>
## 1 0 141132 86.6
## 2 1 21796 13.4
saveRDS(brfss_2024_analysis, "brfss_2024_analysis.rds")
# Read in dataset
brfss_2024_analysis <- readRDS("brfss_2024_analysis.rds")
# Table 1
brfss_2024_analysis %>%
select(MentalDistress, HousingInstability, RaceEthnicity, Age,
Sex, Education, Income, Employment) %>%
tbl_summary(
label = list(
MentalDistress ~ "Frequent Mental Distress (≥14 days past 30)",
HousingInstability ~ "Unable to Pay Rent/Mortgage/Utilities (past 12 months)",
RaceEthnicity ~ "Race/Ethnicity",
Age ~ "Age Group (years)",
Sex ~ "Sex",
Education ~ "Level of Education Completed",
Income ~ "Household Income",
Employment ~ "Employment Status"
),
statistic = list(
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) %>%
add_n() %>%
# add_overall() %>%
bold_labels() %>%
italicize_levels() %>%
modify_caption("Table 1. Descriptive Characteristics of the BRFSS 2024 Analytic Sample, n = 162,928") %>%
modify_footnote(everything() ~ "Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded."
)%>%
as_flex_table()
Characteristic1 | N1 | N = 162,9281 |
|---|---|---|
Frequent Mental Distress (≥14 days past 30) | 162,928 | 21,796 (13%) |
Unable to Pay Rent/Mortgage/Utilities (past 12 months) | 162,928 | 15,251 (9.4%) |
Race/Ethnicity | 162,928 | |
White only, Non-Hispanic | 120,228 (74%) | |
Black only, Non-Hispanic | 13,879 (8.5%) | |
Other Race only, Non-Hispanic | 8,563 (5.3%) | |
Multiracial, Non-Hispanic | 3,871 (2.4%) | |
Hispanic | 16,387 (10%) | |
Age Group (years) | 162,928 | |
18-24 | 7,950 (4.9%) | |
25-34 | 16,998 (10%) | |
35-44 | 22,464 (14%) | |
45-54 | 23,568 (14%) | |
55-64 | 29,805 (18%) | |
65+ | 62,143 (38%) | |
Sex | 162,928 | |
Male | 78,660 (48%) | |
Female | 84,268 (52%) | |
Level of Education Completed | 162,928 | |
Less Than High School | 7,704 (4.7%) | |
High School Graduate | 38,546 (24%) | |
Some College/Technical School | 43,753 (27%) | |
College Graduate/Technical Graduate | 72,925 (45%) | |
Household Income | 162,928 | |
<$15,000 | 8,442 (5.2%) | |
$15,000 to <$25,000 | 13,801 (8.5%) | |
$25,000 to <$35,000 | 17,601 (11%) | |
$35,000 to <$50,000 | 21,884 (13%) | |
$50,000 to <$100,000 | 50,495 (31%) | |
$100,000 to <$200,000 | 37,516 (23%) | |
$200,000+ | 13,189 (8.1%) | |
Employment Status | 162,928 | |
Employed for Wages | 71,539 (44%) | |
Self-Employed | 14,494 (8.9%) | |
Out of Work 1 Year or More | 2,699 (1.7%) | |
Out of Work <1 Year | 3,423 (2.1%) | |
Homemaker | 5,711 (3.5%) | |
Student | 2,996 (1.8%) | |
Retired | 53,058 (33%) | |
Unable to Work | 9,008 (5.5%) | |
1Analytic sample size: n = 162,928. Missing values were handled using complete-case analysis; observations with missing data were excluded. | ||
A histogram was not used because the outcome variable was recoded into a binary variable (≥ 14 days or < 14 days). Histograms are designed for continuous variables, whereas bar plots are more appropriate for categorical or binary variables.
# Bar plot: Mental Distress by Housing Instability
bp_md_hi <- brfss_2024_analysis %>%
group_by(HousingInstability) %>%
summarise(
Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
.groups = "drop"
)
ggplot(bp_md_hi, aes(x = HousingInstability, y = Pct_MD)) +
geom_col(fill = "steelblue") +
labs(
title = "Frequent Mental Distress by Housing Instability",
subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
x = "Housing Instability (0 = No, 1 = Yes)",
y = "Percent with Frequent Mental Distress"
) +
theme_minimal(base_size = 13)
# Bar plot: Mental Distress by Race/Ethnicity
bp_md_re <- brfss_2024_analysis %>%
group_by(RaceEthnicity) %>%
summarise(
Pct_MD = mean(MentalDistress == 1, na.rm = TRUE) * 100,
.groups = "drop"
)
ggplot(bp_md_re, aes(x = RaceEthnicity, y = Pct_MD, fill = RaceEthnicity)) +
geom_col(show.legend = FALSE) +
labs(
title = "Frequent Mental Distress by Race/Ethnicity",
subtitle = paste0("BRFSS 2024 Analytic Sample (n = ", nrow(brfss_2024_analysis), ")"),
x = "Race/Ethnicity Group",
y = "Percent with Frequent Mental Distress"
) +
theme_minimal(base_size = 10) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Simple unadjusted logistic regression
pre_model <- glm(MentalDistress ~ HousingInstability,
data = brfss_2024_analysis,
family = binomial(link = "logit"))
# Display results
tidy(pre_model, exponentiate = TRUE, conf.int = TRUE) %>%
kable(
caption = "Preliminary Logistic Regression: Frequent Mental Distress ~ Housing Instability (Unadjusted)",
digits = 3,
col.names = c("Term", "Odds Ratio", "Std. Error", "z-statistic", "p-value", "95% CI Lower", "95% CI Upper")
)
| Term | Odds Ratio | Std. Error | z-statistic | p-value | 95% CI Lower | 95% CI Upper |
|---|---|---|---|---|---|---|
| (Intercept) | 0.125 | 0.008 | -251.100 | 0 | 0.123 | 0.127 |
| HousingInstabilityYes | 4.330 | 0.019 | 77.668 | 0 | 4.173 | 4.493 |