# Global chunk options and required packages
knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE)
# Core data manipulation and survey-weighted estimation
options(repos = c(CRAN = "https://cloud.r-project.org")) # set CRAN mirror for non-interactive knit
install.packages(c("tidyverse","survey","dplyr", "mice"), repos = getOption("repos")) # ensure mirror is used
library(tidyverse)
library(survey)
library(dplyr)
library(mice)
# Handle strata with a single PSU after subsetting (e.g., Texas-only)
options(survey.lonely.psu = "adjust")
# Load BRFSS 2011 data (pre-processed to RDS)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")
# Restrict sample to Texas respondents (state FIPS = 48)
BRFSS2011_TX <- BRFSS2011 %>% filter(x.state == 48)
# Revised variable list (core predictors + access + diagnostics)
keep_2011 <- c(
# Survey design (required, not predictors)
"x.state","x.psu","x.ststr","x.llcpwt",
# Outcome
"diabete3",
# 1) Demographics
"age",
"sex",
"x.imprace",
# 2) Biological / cardiometabolic risk
"x.bmi5",
"bphigh4",
"toldhi2",
# 3) Behavioral risk
"x.smoker3",
"exerany2",
"drnkany5",
# 4) Socioeconomic position
"educa",
"income2",
"employ",
# 5) Access to care / detection
"hlthpln1",
"checkup1",
"medcost",
# 6) Geography / context (Texas-specific)
"x.region",
"ctycode1",
"persdoc2"
)
# Keep only the analysis variables that actually exist in this year's file
df <- BRFSS2011_TX %>%
select(all_of(intersect(keep_2011, names(BRFSS2011_TX))))
# Recode BRFSS non-substantive response codes to NA
# (DK/Not sure, Refused, and similar special codes)
# Marks entries missing for later handling (e.g., MICE)
# Diabetes source variable: drop DK/refused
df$diabete3[df$diabete3 %in% c(7,9)] <- NA
# Demographics
df$age[df$age %in% c(7, 9)] <- NA
# Restrict to adults
df <- df %>% filter(age >= 18)
# sex by default 1 for male, 2 for female
df$x.imprace[!(df$x.imprace %in% 1:6)] <- NA
# Cardiometabolic risk
df$bphigh4[df$bphigh4 %in% c(2,4,7,9)] <- NA # exclude pregnancy-only, borderline, DK/refused
df$toldhi2[df$toldhi2 %in% c(7,9)] <- NA
# Behavioral risk
df$x.smoker3[df$x.smoker3 == 9] <- NA
df$exerany2[df$exerany2 %in% c(7,9)] <- NA
df$drnkany5[df$drnkany5 %in% c(7,9)] <- NA
# Socioeconomic position
df$educa[df$educa == 9] <- NA
df$income2[df$income2 %in% c(77,99)] <- NA
df$employ[df$employ == 9] <- NA
# Access to care
df$hlthpln1[df$hlthpln1 %in% c(7,9)] <- NA
df$checkup1[df$checkup1 %in% c(7,9)] <- NA # keep 8 = "Never" as a valid category
df$medcost[df$medcost %in% c(7,9)] <- NA
df$persdoc2[df$persdoc2 %in% c(7,9)] <- NA
# Geography
# x.region has no NA
df$ctycode1[df$ctycode1 %in% c(777,999)] <- NA
df$persdoc2[df$persdoc2 %in% c(7,9)] <- NA
# BMI is computed; keep numeric and scale
df$x.bmi5 <- as.numeric(df$x.bmi5)
# Remove impossible values
df$x.bmi5[df$x.bmi5 %in% c(9999, 0)] <- NA
# Plausible analytic range (BMI 12–80)
df$x.bmi5[df$x.bmi5 < 1200 | df$x.bmi5 > 8000] <- NA
# Create scaled BMI in standard units for modeling
df$bmi <- df$x.bmi5 / 100
# Construct binary diagnosed diabetes outcome
# Keep: 1 = Yes, 3 = No
# Exclude: 2 = gestational only, 4 = prediabetes/borderline, plus missing
# Then drop missing outcome (do NOT impute outcome)
df <- df %>%
mutate(diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
)) %>%
filter(!is.na(diabetes_dx))
# Set variable types for MICE
# Outcome stays as factor (NOT imputed)
# Numeric: age, bmi
# Binary indicators as factors (0/1)
# Ordered: educa, income2, checkup1
# Nominal multi-category: x.imprace, employ, x.smoker3, persdoc2, x.region, ctycode1
df$diabetes_dx <- factor(df$diabetes_dx, levels = c(0,1))
# Continuous
df$age <- as.numeric(df$age)
df$bmi <- as.numeric(df$bmi)
# High blood pressure: 1=Yes, 3=No -> 1/0
df$bphigh_bin <- NA_integer_
df$bphigh_bin[df$bphigh4 == 1] <- 1
df$bphigh_bin[df$bphigh4 == 3] <- 0
# Told cholesterol high: 1=Yes, 2=No -> 1/0
df$toldhi_bin <- NA_integer_
df$toldhi_bin[df$toldhi2 == 1] <- 1
df$toldhi_bin[df$toldhi2 == 2] <- 0
# Exercise past 30 days: 1=Yes, 2=No -> 1/0
df$exer_bin <- NA_integer_
df$exer_bin[df$exerany2 == 1] <- 1
df$exer_bin[df$exerany2 == 2] <- 0
# Any alcohol past 30 days: 1=Yes, 2=No -> 1/0
df$drink_bin <- NA_integer_
df$drink_bin[df$drnkany5 == 1] <- 1
df$drink_bin[df$drnkany5 == 2] <- 0
# Health insurance coverage: 1=Yes, 2=No -> 1/0
df$hlthpln_bin <- NA_integer_
df$hlthpln_bin[df$hlthpln1 == 1] <- 1
df$hlthpln_bin[df$hlthpln1 == 2] <- 0
# Could not see doctor due to cost: 1=Yes, 2=No -> 1/0
df$medcost_bin <- NA_integer_
df$medcost_bin[df$medcost == 1] <- 1
df$medcost_bin[df$medcost == 2] <- 0
# Personal doctor/provider as binary:(1/2)=1, (3)=0 (any provider vs none)
df$persdoc_any_bin <- NA_integer_
df$persdoc_any_bin[df$persdoc2 %in% c(1, 2)] <- 1
df$persdoc_any_bin[df$persdoc2 == 3] <- 0
# Binary 0/1 predictors: factors so mice uses logistic regression
df$bphigh_bin <- factor(df$bphigh_bin, levels = c(0, 1))
df$toldhi_bin <- factor(df$toldhi_bin, levels = c(0, 1))
df$exer_bin <- factor(df$exer_bin, levels = c(0, 1))
df$drink_bin <- factor(df$drink_bin, levels = c(0, 1))
df$hlthpln_bin <- factor(df$hlthpln_bin, levels = c(0, 1))
df$medcost_bin <- factor(df$medcost_bin, levels = c(0, 1))
df$persdoc_any_bin <- factor(df$persdoc_any_bin, levels = c(0, 1))
# Ordered / nominal variables (NOT forced to 1/0)
df$sex <- factor(df$sex, levels = c(1, 2))
df$educa <- ordered(df$educa, levels = 1:6)
df$income2 <- ordered(df$income2, levels = 1:8)
df$employ <- factor(df$employ, levels = 1:8)
df$x.imprace <- factor(df$x.imprace, levels = 1:6)
df$x.smoker3 <- factor(df$x.smoker3, levels = 1:4)
# Checkup timing (8 = "Never" as valid ordered category)
df$checkup1 <- ordered(df$checkup1, levels = c(1, 2, 3, 4, 8))
# Geography IDs (excluded from MICE, keep as factors for later)
df$x.region <- factor(df$x.region)
df$ctycode1 <- factor(df$ctycode1)
# Run MICE (exclude survey design vars + IDs from imputation)
# Let MICE automatically choose an imputation model (logistic, PMM, etc.) based on variable type
meth <- make.method(df)
# Create the matrix that controls which variables can predict missing values in others
pred <- make.predictorMatrix(df)
# DO NOT EVER IMPUTE THE OUTCOME (and do not use it to predict others)
meth["diabetes_dx"] <- ""
pred[, "diabetes_dx"] <- 0
pred["diabetes_dx", ] <- 0
# Force BMI to use predictive mean matching to avoid implausible imputed values
meth["bmi"] <- "pmm"
meth["x.bmi5"] <- ""
# Prevent survey design variables and county identifiers from being imputed
meth[c("x.state","x.psu","x.ststr","x.llcpwt","ctycode1","x.region")] <- ""
# Do not impute original multi-code versions when you are using the 0/1 bins in the model
meth[c("bphigh4","toldhi2","exerany2","drnkany5","hlthpln1","medcost","persdoc2")] <- ""
# Impute the 0/1 binary predictors using logistic regression
meth[c("bphigh_bin","toldhi_bin","exer_bin","drink_bin","hlthpln_bin","medcost_bin","persdoc_any_bin")] <- "logreg"
# Exclude survey design variables and county from predicting other variables
# because they are identifiers, not substantive covariates
pred[, c("x.state","x.psu","x.ststr","x.llcpwt","ctycode1","x.region")] <- 0
# Exclude survey design variables and county from being predicted themselves
pred[c("x.state","x.psu","x.ststr","x.llcpwt","ctycode1","x.region"), ] <- 0
# Also remove the original multi-code versions as predictors/targets since we are imputing the bins instead
pred[, c("bphigh4","toldhi2","exerany2","drnkany5","hlthpln1","medcost","persdoc2","x.bmi5")] <- 0
pred[c("bphigh4","toldhi2","exerany2","drnkany5","hlthpln1","medcost","persdoc2","x.bmi5"), ] <- 0
# Run multiple imputation with a small number of datasets and iterations for feasibility
imp <- mice(df, m = 5, maxit = 2, method = meth, predictorMatrix = pred, seed = 123)
# Extract the completed imputed datasets for analysis
d1 <- complete(imp, 1)
d2 <- complete(imp, 2)
# Survey design using the cleaned (non-imputed) dataset
# Used as a baseline / complete-case comparison
des_cc <- svydesign(
ids = ~x.psu, # primary sampling units (clusters)
strata = ~x.ststr, # stratification variable
weights = ~x.llcpwt,# BRFSS final survey weights
data = df,
nest = TRUE # PSUs are nested within strata
)
# Survey design for imputed dataset 1
# Preserves BRFSS sampling structure after imputation
des1 <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = d1,
nest = TRUE
)
# Survey design for imputed dataset 2
# Allows analysis and pooling across multiple imputations
des2 <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = d2,
nest = TRUE
)
# BASELINE MODEL (no FE, no interactions)
f_base <- diabetes_dx ~
age + sex + x.imprace +
bmi + bphigh_bin +
x.smoker3 + exer_bin +
educa + income2 +
hlthpln_bin + checkup1
# Complete case
m1_cc_base <- svyglm(f_base, design = des_cc, family = quasibinomial())
# Imputation 1
m1_d1_base <- svyglm(f_base, design = des1, family = quasibinomial())
# Imputation 2
m1_d2_base <- svyglm(f_base, design = des2, family = quasibinomial())
# Used to establish raw associational relationships
broom::tidy(m1_cc_base)
## # A tibble: 31 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.56 0.450 -16.8 1.62e-62
## 2 age 0.0377 0.00426 8.83 1.19e-18
## 3 sex2 -0.303 0.112 -2.70 6.87e- 3
## 4 x.imprace2 0.189 0.190 0.996 3.19e- 1
## 5 x.imprace3 1.12 0.472 2.38 1.75e- 2
## 6 x.imprace4 0.903 0.398 2.27 2.34e- 2
## 7 x.imprace5 0.626 0.145 4.32 1.58e- 5
## 8 x.imprace6 -0.641 0.453 -1.42 1.57e- 1
## 9 bmi 0.0831 0.00805 10.3 8.11e-25
## 10 bphigh_bin1 1.14 0.129 8.88 7.75e-19
## # ℹ 21 more rows
broom::tidy(m1_d1_base)
## # A tibble: 31 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.42 0.386 -19.2 2.88e-81
## 2 age 0.0391 0.00349 11.2 4.75e-29
## 3 sex2 -0.345 0.0977 -3.53 4.20e- 4
## 4 x.imprace2 0.123 0.165 0.745 4.56e- 1
## 5 x.imprace3 0.874 0.402 2.18 2.95e- 2
## 6 x.imprace4 0.545 0.363 1.50 1.33e- 1
## 7 x.imprace5 0.624 0.125 5.01 5.51e- 7
## 8 x.imprace6 -0.640 0.403 -1.59 1.12e- 1
## 9 bmi 0.0825 0.00725 11.4 7.06e-30
## 10 bphigh_bin1 1.16 0.114 10.2 3.17e-24
## # ℹ 21 more rows
broom::tidy(m1_d2_base)
## # A tibble: 31 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.42 0.384 -19.3 2.53e-82
## 2 age 0.0391 0.00347 11.3 1.91e-29
## 3 sex2 -0.342 0.0973 -3.51 4.49e- 4
## 4 x.imprace2 0.102 0.166 0.613 5.40e- 1
## 5 x.imprace3 0.912 0.391 2.33 1.97e- 2
## 6 x.imprace4 0.577 0.360 1.61 1.08e- 1
## 7 x.imprace5 0.625 0.125 5.00 5.73e- 7
## 8 x.imprace6 -0.689 0.400 -1.72 8.49e- 2
## 9 bmi 0.0823 0.00711 11.6 6.97e-31
## 10 bphigh_bin1 1.15 0.112 10.2 2.23e-24
## # ℹ 21 more rows
# MODEL + REGION FIXED EFFECTS
f_fe <- update(f_base, . ~ . + factor(x.region))
m2_cc_fe <- svyglm(f_fe, design = des_cc, family = quasibinomial())
m2_d1_fe <- svyglm(f_fe, design = des1, family = quasibinomial())
m2_d2_fe <- svyglm(f_fe, design = des2, family = quasibinomial())
# Controls for unobserved regional differences within Texas
broom::tidy(m2_cc_fe)
## # A tibble: 44 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.64 0.477 -16.0 4.02e-57
## 2 age 0.0378 0.00429 8.82 1.37e-18
## 3 sex2 -0.296 0.112 -2.64 8.29e- 3
## 4 x.imprace2 0.213 0.192 1.11 2.68e- 1
## 5 x.imprace3 1.16 0.500 2.33 2.01e- 2
## 6 x.imprace4 0.874 0.407 2.15 3.18e- 2
## 7 x.imprace5 0.576 0.159 3.63 2.84e- 4
## 8 x.imprace6 -0.615 0.443 -1.39 1.65e- 1
## 9 bmi 0.0823 0.00810 10.2 3.51e-24
## 10 bphigh_bin1 1.16 0.130 8.95 4.24e-19
## # ℹ 34 more rows
broom::tidy(m2_d1_fe)
## # A tibble: 44 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.46 0.404 -18.5 2.79e-75
## 2 age 0.0391 0.00349 11.2 5.20e-29
## 3 sex2 -0.343 0.0977 -3.51 4.51e- 4
## 4 x.imprace2 0.142 0.166 0.857 3.91e- 1
## 5 x.imprace3 0.913 0.416 2.20 2.82e- 2
## 6 x.imprace4 0.522 0.367 1.42 1.55e- 1
## 7 x.imprace5 0.579 0.135 4.28 1.89e- 5
## 8 x.imprace6 -0.647 0.395 -1.64 1.01e- 1
## 9 bmi 0.0818 0.00736 11.1 1.40e-28
## 10 bphigh_bin1 1.17 0.115 10.1 4.03e-24
## # ℹ 34 more rows
broom::tidy(m2_d2_fe)
## # A tibble: 44 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -7.46 0.398 -18.7 1.66e-77
## 2 age 0.0391 0.00347 11.3 2.25e-29
## 3 sex2 -0.340 0.0972 -3.50 4.73e- 4
## 4 x.imprace2 0.122 0.167 0.730 4.65e- 1
## 5 x.imprace3 0.950 0.406 2.34 1.92e- 2
## 6 x.imprace4 0.552 0.364 1.52 1.30e- 1
## 7 x.imprace5 0.584 0.136 4.31 1.67e- 5
## 8 x.imprace6 -0.706 0.393 -1.80 7.22e- 2
## 9 bmi 0.0814 0.00719 11.3 1.29e-29
## 10 bphigh_bin1 1.16 0.113 10.2 2.37e-24
## # ℹ 34 more rows
# MODEL + FIXED EFFECTS + INTERACTIONS
f_fe_int <- update(
f_fe,
. ~ . +
bmi:x.smoker3 +
bmi:exer_bin +
income2:hlthpln_bin +
educa:hlthpln_bin
)
m3_cc_fe_int <- svyglm(f_fe_int, design = des_cc, family = quasibinomial())
m3_d1_fe_int <- svyglm(f_fe_int, design = des1, family = quasibinomial())
m3_d2_fe_int <- svyglm(f_fe_int, design = des2, family = quasibinomial())
# Tests whether key relationships differ across risk-factor combinations
broom::tidy(m3_cc_fe_int)
## # A tibble: 60 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -8.26 0.888 -9.30 1.61e-20
## 2 age 0.0376 0.00429 8.75 2.39e-18
## 3 sex2 -0.305 0.112 -2.73 6.32e- 3
## 4 x.imprace2 0.230 0.188 1.22 2.22e- 1
## 5 x.imprace3 1.13 0.506 2.24 2.49e- 2
## 6 x.imprace4 0.876 0.403 2.17 2.98e- 2
## 7 x.imprace5 0.577 0.159 3.63 2.84e- 4
## 8 x.imprace6 -0.565 0.426 -1.32 1.86e- 1
## 9 bmi 0.101 0.0246 4.10 4.08e- 5
## 10 bphigh_bin1 1.16 0.130 8.92 5.52e-19
## # ℹ 50 more rows
broom::tidy(m3_d1_fe_int)
## # A tibble: 60 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -8.11 0.798 -10.2 3.49e-24
## 2 age 0.0391 0.00351 11.2 8.48e-29
## 3 sex2 -0.352 0.0972 -3.62 2.91e- 4
## 4 x.imprace2 0.152 0.166 0.918 3.59e- 1
## 5 x.imprace3 0.888 0.418 2.13 3.35e- 2
## 6 x.imprace4 0.529 0.361 1.47 1.42e- 1
## 7 x.imprace5 0.575 0.135 4.26 2.08e- 5
## 8 x.imprace6 -0.619 0.382 -1.62 1.05e- 1
## 9 bmi 0.102 0.0227 4.49 7.07e- 6
## 10 bphigh_bin1 1.16 0.116 10.1 1.06e-23
## # ℹ 50 more rows
broom::tidy(m3_d2_fe_int)
## # A tibble: 60 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -8.08 0.771 -10.5 1.28e-25
## 2 age 0.0389 0.00348 11.2 6.93e-29
## 3 sex2 -0.348 0.0971 -3.58 3.39e- 4
## 4 x.imprace2 0.138 0.167 0.825 4.09e- 1
## 5 x.imprace3 0.932 0.406 2.29 2.18e- 2
## 6 x.imprace4 0.563 0.361 1.56 1.19e- 1
## 7 x.imprace5 0.586 0.136 4.32 1.57e- 5
## 8 x.imprace6 -0.687 0.386 -1.78 7.52e- 2
## 9 bmi 0.102 0.0220 4.62 3.80e- 6
## 10 bphigh_bin1 1.16 0.114 10.1 4.60e-24
## # ℹ 50 more rows
# MODEL + FIXED EFFECTS + NONLINEAR TERMS
f_fe_nl <- update(
f_fe,
. ~ . +
I(bmi^2) +
I(age^2) +
I(as.numeric(income2)^2)
)
m4_cc_fe_nl <- svyglm(f_fe_nl, design = des_cc, family = quasibinomial())
m4_d1_fe_nl <- svyglm(f_fe_nl, design = des1, family = quasibinomial())
m4_d2_fe_nl <- svyglm(f_fe_nl, design = des2, family = quasibinomial())
# Tests for curvature and threshold effects in continuous predictors
broom::tidy(m4_cc_fe_nl)
## # A tibble: 46 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -13.3 1.14 -11.7 2.90e-31
## 2 age 0.173 0.0276 6.25 4.19e-10
## 3 sex2 -0.284 0.112 -2.54 1.10e- 2
## 4 x.imprace2 0.139 0.191 0.727 4.68e- 1
## 5 x.imprace3 1.18 0.507 2.32 2.03e- 2
## 6 x.imprace4 0.872 0.416 2.10 3.60e- 2
## 7 x.imprace5 0.507 0.158 3.22 1.28e- 3
## 8 x.imprace6 -0.556 0.414 -1.34 1.79e- 1
## 9 bmi 0.224 0.0412 5.43 5.64e- 8
## 10 bphigh_bin1 1.05 0.126 8.37 6.73e-17
## # ℹ 36 more rows
broom::tidy(m4_d1_fe_nl)
## # A tibble: 46 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -13.6 0.970 -14.0 2.24e-44
## 2 age 0.185 0.0225 8.21 2.32e-16
## 3 sex2 -0.301 0.0977 -3.08 2.09e- 3
## 4 x.imprace2 0.0775 0.166 0.467 6.41e- 1
## 5 x.imprace3 0.961 0.415 2.32 2.05e- 2
## 6 x.imprace4 0.454 0.391 1.16 2.46e- 1
## 7 x.imprace5 0.504 0.136 3.71 2.11e- 4
## 8 x.imprace6 -0.630 0.372 -1.69 9.05e- 2
## 9 bmi 0.230 0.0364 6.33 2.55e-10
## 10 bphigh_bin1 1.05 0.112 9.40 6.31e-21
## # ℹ 36 more rows
broom::tidy(m4_d2_fe_nl)
## # A tibble: 46 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -13.2 0.963 -13.7 9.90e-43
## 2 age 0.183 0.0225 8.14 4.26e-16
## 3 sex2 -0.302 0.0970 -3.12 1.83e- 3
## 4 x.imprace2 0.0643 0.167 0.384 7.01e- 1
## 5 x.imprace3 0.972 0.407 2.39 1.69e- 2
## 6 x.imprace4 0.493 0.387 1.28 2.02e- 1
## 7 x.imprace5 0.511 0.136 3.76 1.67e- 4
## 8 x.imprace6 -0.704 0.373 -1.89 5.92e- 2
## 9 bmi 0.211 0.0369 5.73 1.05e- 8
## 10 bphigh_bin1 1.04 0.110 9.43 4.93e-21
## # ℹ 36 more rows
# MODEL + FIXED EFFECTS + INTERACTIONS + NONLINEAR TERMS
f_full <- update(
f_fe,
. ~ . +
bmi:x.smoker3 +
bmi:exer_bin +
income2:hlthpln_bin +
educa:hlthpln_bin +
I(bmi^2) +
I(age^2) +
I(as.numeric(income2)^2)
)
m5_cc_full <- svyglm(f_full, design = des_cc, family = quasibinomial())
m5_d1_full <- svyglm(f_full, design = des1, family = quasibinomial())
m5_d2_full <- svyglm(f_full, design = des2, family = quasibinomial())
# Stress-test specification to assess robustness and overfitting sensitivity
broom::tidy(m5_cc_full)
## # A tibble: 62 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -14.3 1.43 -10.0 1.53e-23
## 2 age 0.172 0.0270 6.36 2.07e-10
## 3 sex2 -0.290 0.111 -2.61 9.03e- 3
## 4 x.imprace2 0.150 0.187 0.799 4.24e- 1
## 5 x.imprace3 1.15 0.513 2.25 2.45e- 2
## 6 x.imprace4 0.867 0.413 2.10 3.59e- 2
## 7 x.imprace5 0.510 0.157 3.25 1.16e- 3
## 8 x.imprace6 -0.508 0.407 -1.25 2.12e- 1
## 9 bmi 0.266 0.0504 5.27 1.36e- 7
## 10 bphigh_bin1 1.05 0.126 8.37 6.54e-17
## # ℹ 52 more rows
broom::tidy(m5_d1_full)
## # A tibble: 62 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -14.2 1.22 -11.6 3.17e-31
## 2 age 0.187 0.0225 8.35 7.70e-17
## 3 sex2 -0.311 0.0971 -3.20 1.38e- 3
## 4 x.imprace2 0.0771 0.165 0.468 6.40e- 1
## 5 x.imprace3 0.946 0.416 2.27 2.31e- 2
## 6 x.imprace4 0.452 0.388 1.16 2.44e- 1
## 7 x.imprace5 0.504 0.134 3.75 1.77e- 4
## 8 x.imprace6 -0.620 0.372 -1.67 9.50e- 2
## 9 bmi 0.250 0.0436 5.74 9.80e- 9
## 10 bphigh_bin1 1.04 0.111 9.33 1.27e-20
## # ℹ 52 more rows
broom::tidy(m5_d2_full)
## # A tibble: 62 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -14.0 1.20 -11.6 6.33e-31
## 2 age 0.185 0.0223 8.29 1.19e-16
## 3 sex2 -0.310 0.0966 -3.20 1.36e- 3
## 4 x.imprace2 0.0740 0.167 0.444 6.57e- 1
## 5 x.imprace3 0.958 0.408 2.35 1.88e- 2
## 6 x.imprace4 0.496 0.386 1.28 2.00e- 1
## 7 x.imprace5 0.514 0.135 3.82 1.37e- 4
## 8 x.imprace6 -0.694 0.375 -1.85 6.46e- 2
## 9 bmi 0.239 0.0448 5.34 9.21e- 8
## 10 bphigh_bin1 1.03 0.110 9.37 8.19e-21
## # ℹ 52 more rows