This file extends the headline ownership and cost-burden analyses by
introducing race as a second axis of stratification. The substantive
question: are the cohort patterns in m1_pi and m2 uniform across
race groups, or does the affordability–ownership relationship operate
differently for White, Black, Asian/PI, and Other-race households?
The analysis proceeds in four steps: (1) descriptive ownership and
cost-burden rates by RACE_cat × GENERATION; (2) stratified ownership
models (one per race group) for interpretation; (3) a single three-way
FAI × GENERATION × RACE_cat interaction model for formal hypothesis
tests; (4) a parallel cost-burden version; and (5) a faceted
predicted-probability plot.
library(data.table)
library(dplyr)
library(ggplot2)
library(lmtest)
library(sandwich)
in_dir <- "~/Desktop/DA6833/FinancialNihilism"
model_dt <- readRDS(file.path(in_dir, "model_dt.rds"))
setDT(model_dt)
stopifnot(levels(model_dt$GENERATION) ==
c("Boomer", "GenX", "Millennial", "GenZ"))
stopifnot(levels(model_dt$RACE_cat) ==
c("White", "Black", "Asian/PI", "Other"))
cat("Rows loaded :", nrow(model_dt), "\n")
## Rows loaded : 3150831
cat("Years :", paste(sort(unique(model_dt$YEAR)), collapse = ", "), "\n")
## Years : 1980, 1990, 2000, 2010, 2015, 2019, 2022, 2024
own_table <- model_dt[, .(
own_rate = round(weighted.mean(own, w), 3),
n = .N
), by = .(RACE_cat, GENERATION)][
order(RACE_cat, GENERATION)
]
own_table
## RACE_cat GENERATION own_rate n
## <fctr> <fctr> <num> <int>
## 1: White Boomer 0.606 604149
## 2: White GenX 0.612 509733
## 3: White Millennial 0.572 1116390
## 4: White GenZ 0.505 85959
## 5: Black Boomer 0.389 76113
## 6: Black GenX 0.400 71823
## 7: Black Millennial 0.365 139688
## 8: Black GenZ 0.366 11133
## 9: Asian/PI Boomer 0.479 19389
## 10: Asian/PI GenX 0.493 35160
## 11: Asian/PI Millennial 0.494 116982
## 12: Asian/PI GenZ 0.441 10321
## 13: Other Boomer 0.384 24416
## 14: Other GenX 0.409 68270
## 15: Other Millennial 0.449 228989
## 16: Other GenZ 0.441 32316
burden_table <- model_dt[!is.na(cost_burdened), .(
share_burdened = round(weighted.mean(cost_burdened, w), 3),
n = .N
), by = .(RACE_cat, GENERATION)][
order(RACE_cat, GENERATION)
]
burden_table
## RACE_cat GENERATION share_burdened n
## <fctr> <fctr> <num> <int>
## 1: White Boomer 0.245 250991
## 2: White GenX 0.292 202058
## 3: White Millennial 0.334 444915
## 4: White GenZ 0.336 38833
## 5: Black Boomer 0.327 47595
## 6: Black GenX 0.384 43272
## 7: Black Millennial 0.450 84547
## 8: Black GenZ 0.438 6341
## 9: Asian/PI Boomer 0.322 10540
## 10: Asian/PI GenX 0.298 18280
## 11: Asian/PI Millennial 0.308 56204
## 12: Asian/PI GenZ 0.352 5395
## 13: Other Boomer 0.347 15300
## 14: Other GenX 0.378 40576
## 15: Other Millennial 0.427 117316
## 16: Other GenZ 0.402 16688
fai_table <- model_dt[!is.na(FAI_pi), .(
fai_pi_median = round(median(FAI_pi), 3),
n = .N
), by = .(RACE_cat, GENERATION)][
order(RACE_cat, GENERATION)
]
fai_table
## RACE_cat GENERATION fai_pi_median n
## <fctr> <fctr> <num> <int>
## 1: White Boomer -0.357 604149
## 2: White GenX -0.363 509733
## 3: White Millennial -0.266 1116390
## 4: White GenZ -0.175 85959
## 5: Black Boomer -0.275 76113
## 6: Black GenX -0.248 71823
## 7: Black Millennial -0.062 139688
## 8: Black GenZ -0.013 11133
## 9: Asian/PI Boomer -0.249 19389
## 10: Asian/PI GenX -0.306 35160
## 11: Asian/PI Millennial -0.212 116982
## 12: Asian/PI GenZ -0.144 10321
## 13: Other Boomer -0.269 24416
## 14: Other GenX -0.251 68270
## 15: Other Millennial -0.115 228989
## 16: Other GenZ -0.094 32316
Re-fitting m1_pi separately within each RACE_cat. Each stratified
model retains the FAI × GENERATION interaction structure but drops
RACE_cat from the right-hand side. Cluster-robust SEs by YEAR per the
parent specification.
fit_stratified_ownership <- function(race_label) {
dat <- model_dt[RACE_cat == race_label & !is.na(FAI_pi)]
fit <- glm(
own ~ FAI_pi * GENERATION + MARST + HISPAN + EDUC_cat,
data = dat,
weights = w,
family = binomial(link = "logit")
)
vc <- vcovCL(fit, cluster = ~YEAR, type = "HC1")
list(fit = fit, vcov = vc, n = nrow(dat), race = race_label)
}
strat_own <- lapply(levels(model_dt$RACE_cat), fit_stratified_ownership)
names(strat_own) <- levels(model_dt$RACE_cat)
extract_fai_slopes <- function(s) {
cf <- coef(s$fit)
vc <- s$vcov
rows <- c("FAI_pi", "FAI_pi:GENERATIONGenX",
"FAI_pi:GENERATIONMillennial", "FAI_pi:GENERATIONGenZ")
data.table(
race = s$race,
n = s$n,
term = rows,
estimate = cf[rows],
std_error = sqrt(diag(vc))[rows]
)
}
slope_dt <- rbindlist(lapply(strat_own, extract_fai_slopes))
slope_dt[, z := estimate / std_error]
slope_dt[, p := 2 * pnorm(-abs(z))]
slope_dt[, .(race, n, term, estimate = round(estimate, 3),
std_error = round(std_error, 3),
z = round(z, 2),
p = signif(p, 3))]
## race n term estimate std_error z
## <char> <int> <char> <num> <num> <num>
## 1: White 2316231 FAI_pi -0.845 0.143 -5.89
## 2: White 2316231 FAI_pi:GENERATIONGenX 0.107 0.172 0.62
## 3: White 2316231 FAI_pi:GENERATIONMillennial 0.267 0.159 1.67
## 4: White 2316231 FAI_pi:GENERATIONGenZ 0.173 0.144 1.21
## 5: Black 298757 FAI_pi -1.753 0.375 -4.67
## 6: Black 298757 FAI_pi:GENERATIONGenX 0.367 0.406 0.90
## 7: Black 298757 FAI_pi:GENERATIONMillennial 0.736 0.395 1.87
## 8: Black 298757 FAI_pi:GENERATIONGenZ 0.687 0.379 1.81
## 9: Asian/PI 181852 FAI_pi -0.177 0.113 -1.56
## 10: Asian/PI 181852 FAI_pi:GENERATIONGenX 0.101 0.143 0.71
## 11: Asian/PI 181852 FAI_pi:GENERATIONMillennial 0.203 0.114 1.78
## 12: Asian/PI 181852 FAI_pi:GENERATIONGenZ 0.066 0.113 0.59
## 13: Other 353991 FAI_pi -0.748 0.044 -17.12
## 14: Other 353991 FAI_pi:GENERATIONGenX -0.051 0.083 -0.61
## 15: Other 353991 FAI_pi:GENERATIONMillennial 0.291 0.057 5.09
## 16: Other 353991 FAI_pi:GENERATIONGenZ 0.260 0.051 5.12
## p
## <num>
## 1: 3.81e-09
## 2: 5.36e-01
## 3: 9.44e-02
## 4: 2.27e-01
## 5: 3.03e-06
## 6: 3.67e-01
## 7: 6.22e-02
## 8: 6.99e-02
## 9: 1.20e-01
## 10: 4.80e-01
## 11: 7.54e-02
## 12: 5.58e-01
## 13: 9.84e-66
## 14: 5.40e-01
## 15: 3.55e-07
## 16: 2.98e-07
cohort_slope_grid <- function(s) {
cf <- coef(s$fit)
base <- cf["FAI_pi"]
data.table(
race = s$race,
GENERATION = c("Boomer", "GenX", "Millennial", "GenZ"),
fai_slope = c(
base,
base + cf["FAI_pi:GENERATIONGenX"],
base + cf["FAI_pi:GENERATIONMillennial"],
base + cf["FAI_pi:GENERATIONGenZ"]
)
)
}
slope_grid <- rbindlist(lapply(strat_own, cohort_slope_grid))
dcast(slope_grid, race ~ GENERATION, value.var = "fai_slope")
## Key: <race>
## race Boomer GenX GenZ Millennial
## <char> <num> <num> <num> <num>
## 1: Asian/PI -0.1765461 -0.07540767 -0.1101335 0.02647164
## 2: Black -1.7528831 -1.38631983 -1.0654289 -1.01680026
## 3: Other -0.7484163 -0.79901096 -0.4884985 -0.45741326
## 4: White -0.8445244 -0.73789929 -0.6711334 -0.57783852
Single model on the full sample, used for formal hypothesis tests on whether the FAI × GENERATION pattern differs by race. The stratified models in Section 2 are preferred for interpretation.
m_threeway <- glm(
own ~ FAI_pi * GENERATION * RACE_cat + MARST + HISPAN + EDUC_cat,
data = model_dt[!is.na(FAI_pi)],
weights = w,
family = binomial(link = "logit")
)
vc_threeway <- vcovCL(m_threeway, cluster = ~YEAR, type = "HC1")
coef_threeway <- coeftest(m_threeway, vcov. = vc_threeway)
keep_rows <- grep("FAI_pi|GENERATION|RACE_cat",
rownames(coef_threeway), value = TRUE)
coef_threeway[keep_rows, ]
## Estimate Std. Error
## FAI_pi -0.855978016 0.14435227
## GENERATIONGenX 0.155127388 0.11805255
## GENERATIONMillennial 0.179324173 0.10930434
## GENERATIONGenZ 0.032662831 0.07474544
## RACE_catBlack -0.838141231 0.06479362
## RACE_catAsian/PI -0.357081234 0.01510785
## RACE_catOther -0.388121821 0.05031558
## FAI_pi:GENERATIONGenX 0.108376285 0.17273886
## FAI_pi:GENERATIONMillennial 0.274255731 0.16057718
## FAI_pi:GENERATIONGenZ 0.187225887 0.14496744
## FAI_pi:RACE_catBlack -0.812154608 0.23758598
## FAI_pi:RACE_catAsian/PI 0.687196087 0.03838870
## FAI_pi:RACE_catOther 0.093366455 0.09693140
## GENERATIONGenX:RACE_catBlack 0.074609061 0.05601653
## GENERATIONMillennial:RACE_catBlack 0.205050554 0.07100830
## GENERATIONGenZ:RACE_catBlack 0.508740865 0.06963041
## GENERATIONGenX:RACE_catAsian/PI -0.095368473 0.03938321
## GENERATIONMillennial:RACE_catAsian/PI -0.035907258 0.02427966
## GENERATIONGenZ:RACE_catAsian/PI 0.115650013 0.01950351
## GENERATIONGenX:RACE_catOther 0.003652974 0.05770979
## GENERATIONMillennial:RACE_catOther 0.286934906 0.04740081
## GENERATIONGenZ:RACE_catOther 0.518599710 0.04106982
## FAI_pi:GENERATIONGenX:RACE_catBlack 0.236846821 0.25464226
## FAI_pi:GENERATIONMillennial:RACE_catBlack 0.387607480 0.24360658
## FAI_pi:GENERATIONGenZ:RACE_catBlack 0.397390730 0.24672868
## FAI_pi:GENERATIONGenX:RACE_catAsian/PI 0.007208996 0.04245640
## FAI_pi:GENERATIONMillennial:RACE_catAsian/PI -0.059861911 0.08666807
## FAI_pi:GENERATIONGenZ:RACE_catAsian/PI -0.127262816 0.03898802
## FAI_pi:GENERATIONGenX:RACE_catOther -0.155820835 0.10314946
## FAI_pi:GENERATIONMillennial:RACE_catOther 0.019576158 0.10513503
## FAI_pi:GENERATIONGenZ:RACE_catOther 0.063003734 0.09834113
## z value Pr(>|z|)
## FAI_pi -5.92978544 3.033308e-09
## GENERATIONGenX 1.31405372 1.888281e-01
## GENERATIONMillennial 1.64059517 1.008815e-01
## GENERATIONGenZ 0.43698762 6.621203e-01
## RACE_catBlack -12.93555267 2.835765e-38
## RACE_catAsian/PI -23.63547037 1.665149e-123
## RACE_catOther -7.71375105 1.221727e-14
## FAI_pi:GENERATIONGenX 0.62739958 5.303973e-01
## FAI_pi:GENERATIONMillennial 1.70793719 8.764800e-02
## FAI_pi:GENERATIONGenZ 1.29150305 1.965293e-01
## FAI_pi:RACE_catBlack -3.41836089 6.299951e-04
## FAI_pi:RACE_catAsian/PI 17.90099858 1.158264e-71
## FAI_pi:RACE_catOther 0.96322198 3.354361e-01
## GENERATIONGenX:RACE_catBlack 1.33191151 1.828893e-01
## GENERATIONMillennial:RACE_catBlack 2.88769855 3.880716e-03
## GENERATIONGenZ:RACE_catBlack 7.30630288 2.745928e-13
## GENERATIONGenX:RACE_catAsian/PI -2.42155182 1.545440e-02
## GENERATIONMillennial:RACE_catAsian/PI -1.47890304 1.391662e-01
## GENERATIONGenZ:RACE_catAsian/PI 5.92970168 3.034856e-09
## GENERATIONGenX:RACE_catOther 0.06329904 9.495284e-01
## GENERATIONMillennial:RACE_catOther 6.05337620 1.418411e-09
## GENERATIONGenZ:RACE_catOther 12.62727068 1.493697e-36
## FAI_pi:GENERATIONGenX:RACE_catBlack 0.93011593 3.523111e-01
## FAI_pi:GENERATIONMillennial:RACE_catBlack 1.59112072 1.115824e-01
## FAI_pi:GENERATIONGenZ:RACE_catBlack 1.61063855 1.072585e-01
## FAI_pi:GENERATIONGenX:RACE_catAsian/PI 0.16979763 8.651693e-01
## FAI_pi:GENERATIONMillennial:RACE_catAsian/PI -0.69070321 4.897521e-01
## FAI_pi:GENERATIONGenZ:RACE_catAsian/PI -3.26415180 1.097923e-03
## FAI_pi:GENERATIONGenX:RACE_catOther -1.51063162 1.308823e-01
## FAI_pi:GENERATIONMillennial:RACE_catOther 0.18620015 8.522878e-01
## FAI_pi:GENERATIONGenZ:RACE_catOther 0.64066511 5.217403e-01
# Joint Wald tests are run separately by race group rather than as a
# single omnibus test because year-clustered robust covariance with
# 8 survey years has rank ≤ 7, making a 9-restriction joint test
# singular. Separate 3-restriction tests remain identified.
library(car)
run_race_joint_test <- function(race_label) {
pattern <- paste0("^FAI_pi:GENERATION.*:RACE_cat", race_label, "$")
terms <- grep(pattern, names(coef(m_threeway)), value = TRUE)
if (length(terms) == 0) return(NULL)
test <- linearHypothesis(m_threeway, terms,
vcov. = vc_threeway,
test = "Chisq")
data.table(
race = race_label,
n_terms = length(terms),
chi_sq = round(test$Chisq[2], 2),
df = test$Df[2],
p_value = signif(test$`Pr(>Chisq)`[2], 4)
)
}
joint_tests <- rbindlist(lapply(
c("Black", "Asian/PI", "Other"),
run_race_joint_test
))
joint_tests
## race n_terms chi_sq df p_value
## <char> <int> <num> <num> <num>
## 1: Black 3 8.68 3 3.382e-02
## 2: Asian/PI 3 109.01 3 1.792e-23
## 3: Other 3 159.48 3 2.374e-34
Parallel to Section 2 with cost_burdened as the outcome.
fit_stratified_burden <- function(race_label) {
dat <- model_dt[RACE_cat == race_label &
!is.na(FAI_pi) & !is.na(cost_burdened)]
if (nrow(dat) < 1000) {
return(list(fit = NULL, vcov = NULL, n = nrow(dat), race = race_label))
}
fit <- glm(
cost_burdened ~ FAI_pi * GENERATION + MARST + HISPAN + EDUC_cat,
data = dat,
weights = w,
family = binomial(link = "logit")
)
vc <- vcovCL(fit, cluster = ~YEAR, type = "HC1")
list(fit = fit, vcov = vc, n = nrow(dat), race = race_label)
}
strat_burden <- lapply(levels(model_dt$RACE_cat), fit_stratified_burden)
names(strat_burden) <- levels(model_dt$RACE_cat)
cohort_burden_effects <- function(s) {
if (is.null(s$fit)) {
return(data.table(race = s$race, GENERATION = NA, est = NA, n = s$n))
}
cf <- coef(s$fit)
data.table(
race = s$race,
n = s$n,
GENERATION = c("Boomer (ref)", "GenX", "Millennial", "GenZ"),
cohort_logodds = c(
0,
cf["GENERATIONGenX"],
cf["GENERATIONMillennial"],
cf["GENERATIONGenZ"]
)
)
}
burden_cohort_dt <- rbindlist(lapply(strat_burden, cohort_burden_effects))
dcast(burden_cohort_dt, race ~ GENERATION,
value.var = "cohort_logodds")
## Key: <race>
## race Boomer (ref) GenX GenZ Millennial
## <char> <num> <num> <num> <num>
## 1: Asian/PI 0 0.171561540 -0.3103853 -0.09935377
## 2: Black 0 0.212816190 0.1238465 0.24041021
## 3: Other 0 0.007315399 -0.2384769 -0.02370535
## 4: White 0 0.158956619 -0.1299126 0.06234491
fai_grid <- seq(quantile(model_dt$FAI_pi, 0.05, na.rm = TRUE),
quantile(model_dt$FAI_pi, 0.95, na.rm = TRUE),
length.out = 50)
modal <- function(x) names(sort(table(x), decreasing = TRUE))[1]
pred_grid <- expand.grid(
FAI_pi = fai_grid,
GENERATION = levels(model_dt$GENERATION),
RACE_cat = levels(model_dt$RACE_cat),
MARST = modal(model_dt$MARST),
HISPAN = modal(model_dt$HISPAN),
EDUC_cat = modal(model_dt$EDUC_cat)
)
pred_grid$GENERATION <- factor(pred_grid$GENERATION,
levels = levels(model_dt$GENERATION))
pred_grid$RACE_cat <- factor(pred_grid$RACE_cat,
levels = levels(model_dt$RACE_cat))
pred_grid$MARST <- factor(pred_grid$MARST,
levels = levels(model_dt$MARST))
pred_grid$HISPAN <- factor(pred_grid$HISPAN,
levels = levels(model_dt$HISPAN))
pred_grid$EDUC_cat <- factor(pred_grid$EDUC_cat,
levels = levels(model_dt$EDUC_cat))
pred_grid$pred_own <- predict(m_threeway, newdata = pred_grid,
type = "response")
ggplot(pred_grid,
aes(x = FAI_pi, y = pred_own, color = GENERATION)) +
geom_line(size = 1) +
facet_wrap(~ RACE_cat) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
labs(
title = "Predicted homeownership probability by affordability, generation, and race",
subtitle = "Modal marital status, ethnicity, and education held constant",
x = "Affordability index (z-scored P/I; higher = less attainable)",
y = "Predicted Pr(own home)",
color = "Generation"
) +
theme_minimal()