library(tidyverse)
library(survey)
library(broom)
# Handle strata with a single PSU
options(survey.lonely.psu = "adjust")
# Load one year (edit year + path as needed)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")
# Texas + valid diabete3 only
# Keep only design vars + diabete3, drop NA and non-eligible codes
tx_diab <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1, # Yes
diabete3 == 3 ~ 0, # No
TRUE ~ NA_real_
)
) %>%
filter(!is.na(diabetes_dx))
# Count of Texas respondents with valid diabetes outcome
n_tx_diab <- nrow(tx_diab)
n_tx_diab
## [1] 14618
# Survey design object for the outcome-only sample
des_tx_diab <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_diab,
nest = TRUE
)
# Bivariate regression on AGE
# Make a unique subset with ONLY diabetes + age (drop NA in age)
# Age treated as numeric (continuous)
tx_age <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
age = ifelse(age %in% c(7, 9), NA, age),
age = as.numeric(age)
) %>%
filter(!is.na(diabetes_dx), !is.na(age), age >= 18)
# Count for AGE model
n_tx_age <- nrow(tx_age)
n_tx_age
## [1] 14485
# Survey design for AGE subset
des_age <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_age,
nest = TRUE
)
# Bivariate survey-weighted logistic regression
m_age <- svyglm(diabetes_dx ~ age, design = des_age, family = quasibinomial())
# Output for appendix
broom::tidy(m_age, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -4.48 0.135 -33.1 2.90e-232 -4.74 -4.21
## 2 age 0.0462 0.00208 22.2 1.95e-107 0.0422 0.0503
# Bivariate regression on sex (2011)
tx_sex <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, sex) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
sex = ifelse(sex %in% c(7, 9), NA, sex),
sex = factor(sex, levels = c(1, 2)) # 1=Male, 2=Female
) %>%
filter(!is.na(diabetes_dx), !is.na(sex))
# Count for SEX model
n_tx_sex <- nrow(tx_sex)
n_tx_sex
## [1] 14618
# Survey design for SEX subset
des_sex <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_sex,
nest = TRUE
)
# Bivariate survey-weighted logistic regression (sex as factor)
m_sex <- svyglm(diabetes_dx ~ sex, design = des_sex, family = quasibinomial())
# Output for appendix
broom::tidy(m_sex, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.11 0.0635 -33.2 8.03e-233 -2.23 -1.98
## 2 sex2 -0.0966 0.0845 -1.14 2.53e- 1 -0.262 0.0690
# BMI (numeric; from _BMI5 / x.bmi5 with 2 implied decimals)
tx_bmi <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.bmi5) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
x.bmi5 = suppressWarnings(as.numeric(x.bmi5)),
x.bmi5 = ifelse(x.bmi5 %in% c(0, 9999), NA, x.bmi5),
x.bmi5 = ifelse(x.bmi5 < 1200 | x.bmi5 > 9999, NA, x.bmi5),
bmi = x.bmi5 / 100
) %>%
filter(!is.na(diabetes_dx), !is.na(bmi))
n_tx_bmi <- nrow(tx_bmi)
n_tx_bmi
## [1] 13704
des_bmi <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_bmi,
nest = TRUE
)
m_bmi <- svyglm(diabetes_dx ~ bmi, design = des_bmi, family = quasibinomial())
broom::tidy(m_bmi, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -4.63 0.212 -21.9 3.41e-104 -5.05 -4.22
## 2 bmi 0.0847 0.00702 12.1 2.67e- 33 0.0709 0.0984
# Hypertension (binary factor; from BPHIGH4: 1 yes, 3 no)
tx_bphigh <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, bphigh4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
bphigh4 = ifelse(bphigh4 %in% c(2, 4, 7, 9), NA, bphigh4),
htn = case_when(
bphigh4 == 1 ~ 1,
bphigh4 == 3 ~ 0,
TRUE ~ NA_real_
),
htn = factor(htn, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(htn))
n_tx_bphigh <- nrow(tx_bphigh)
n_tx_bphigh
## [1] 14310
des_bphigh <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_bphigh,
nest = TRUE
)
m_bphigh <- svyglm(diabetes_dx ~ htn, design = des_bphigh, family = quasibinomial())
broom::tidy(m_bphigh, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -3.10 0.0826 -37.5 2.11e-293 -3.26 -2.93
## 2 htn1 1.93 0.0979 19.7 1.66e- 85 1.74 2.12
# Smoking status (4-level factor; from _SMOKER3 / x.smoker3)
tx_smoke <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.smoker3) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
x.smoker3 = ifelse(x.smoker3 == 9, NA, x.smoker3),
x.smoker3 = factor(x.smoker3, levels = c(1, 2, 3, 4))
) %>%
filter(!is.na(diabetes_dx), !is.na(x.smoker3))
n_tx_smoke <- nrow(tx_smoke)
n_tx_smoke
## [1] 14512
des_smoke <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_smoke,
nest = TRUE
)
m_smoke <- svyglm(diabetes_dx ~ x.smoker3, design = des_smoke, family = quasibinomial())
broom::tidy(m_smoke, conf.int = TRUE)
## # A tibble: 4 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.37 0.141 -16.8 4.36e-63 -2.65 -2.10
## 2 x.smoker32 0.0630 0.261 0.241 8.09e- 1 -0.449 0.575
## 3 x.smoker33 0.654 0.158 4.13 3.62e- 5 0.344 0.964
## 4 x.smoker34 0.0780 0.153 0.511 6.10e- 1 -0.221 0.377
# Exercise past 30 days (binary factor; EXERANY2: 1 yes, 2 no)
tx_exer <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, exerany2) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
exerany2 = ifelse(exerany2 %in% c(7, 9), NA, exerany2),
exer = case_when(
exerany2 == 1 ~ 1,
exerany2 == 2 ~ 0,
TRUE ~ NA_real_
),
exer = factor(exer, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(exer))
n_tx_exer <- nrow(tx_exer)
n_tx_exer
## [1] 13885
des_exer <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_exer,
nest = TRUE
)
m_exer <- svyglm(diabetes_dx ~ exer, design = des_exer, family = quasibinomial())
broom::tidy(m_exer, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.85 0.0737 -25.1 7.21e-136 -1.99 -1.70
## 2 exer1 -0.436 0.0919 -4.75 2.09e- 6 -0.617 -0.256
# Education (ordered factor; EDUCA 1–6; remove 9)
tx_educ <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, educa) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
educa = ifelse(educa == 9, NA, educa),
educa = ordered(educa, levels = 1:6)
) %>%
filter(!is.na(diabetes_dx), !is.na(educa))
n_tx_educ <- nrow(tx_educ)
n_tx_educ
## [1] 14565
des_educ <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_educ,
nest = TRUE
)
m_educ <- svyglm(diabetes_dx ~ educa, design = des_educ, family = quasibinomial())
broom::tidy(m_educ, conf.int = TRUE)
## # A tibble: 6 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1.91 0.0882 -21.6 5.00e-102 -2.08 -1.73
## 2 educa.L -1.21 0.293 -4.12 3.82e- 5 -1.78 -0.633
## 3 educa.Q 0.250 0.272 0.919 3.58e- 1 -0.283 0.783
## 4 educa.C -0.226 0.202 -1.12 2.62e- 1 -0.622 0.169
## 5 educa^4 -0.182 0.140 -1.30 1.93e- 1 -0.456 0.0920
## 6 educa^5 -0.00859 0.117 -0.0735 9.41e- 1 -0.238 0.221
# Income (ordered factor; INCOME2 1–8; remove 77/99)
tx_inc <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, income2) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
income2 = ifelse(income2 %in% c(77, 99), NA, income2),
income2 = ordered(income2, levels = 1:8)
) %>%
filter(!is.na(diabetes_dx), !is.na(income2))
n_tx_inc <- nrow(tx_inc)
n_tx_inc
## [1] 12507
des_inc <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_inc,
nest = TRUE
)
m_inc <- svyglm(diabetes_dx ~ income2, design = des_inc, family = quasibinomial())
broom::tidy(m_inc, conf.int = TRUE)
## # A tibble: 8 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.04 0.0464 -43.9 0 -2.13 -1.95
## 2 income2.L -0.908 0.128 -7.11 1.21e-12 -1.16 -0.658
## 3 income2.Q -0.270 0.129 -2.09 3.65e- 2 -0.524 -0.0170
## 4 income2.C 0.0627 0.129 0.484 6.28e- 1 -0.191 0.316
## 5 income2^4 -0.282 0.134 -2.11 3.48e- 2 -0.544 -0.0201
## 6 income2^5 -0.0766 0.134 -0.574 5.66e- 1 -0.338 0.185
## 7 income2^6 0.000813 0.133 0.00610 9.95e- 1 -0.260 0.262
## 8 income2^7 0.0367 0.137 0.268 7.88e- 1 -0.231 0.305
# Insurance coverage (binary factor; HLTHPLN1: 1 yes, 2 no)
tx_ins <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, hlthpln1) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
hlthpln1 = ifelse(hlthpln1 %in% c(7, 9), NA, hlthpln1),
insured = case_when(
hlthpln1 == 1 ~ 1,
hlthpln1 == 2 ~ 0,
TRUE ~ NA_real_
),
insured = factor(insured, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(insured))
n_tx_ins <- nrow(tx_ins)
n_tx_ins
## [1] 14568
des_ins <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_ins,
nest = TRUE
)
m_ins <- svyglm(diabetes_dx ~ insured, design = des_ins, family = quasibinomial())
broom::tidy(m_ins, conf.int = TRUE)
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.48 0.105 -23.5 9.33e-120 -2.68 -2.27
## 2 insured1 0.441 0.115 3.84 1.24e- 4 0.216 0.667
# Routine checkup (ordered factor; CHECKUP1 1,2,3,4,8; remove 7/9)
tx_chk <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, checkup1) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
checkup1 = ifelse(checkup1 %in% c(7, 9), NA, checkup1),
checkup1 = ordered(checkup1, levels = c(1, 2, 3, 4, 8))
) %>%
filter(!is.na(diabetes_dx), !is.na(checkup1))
n_tx_chk <- nrow(tx_chk)
n_tx_chk
## [1] 14446
des_chk <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_chk,
nest = TRUE
)
m_chk <- svyglm(diabetes_dx ~ checkup1, design = des_chk, family = quasibinomial())
broom::tidy(m_chk, conf.int = TRUE)
## # A tibble: 5 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2.76 0.110 -25.0 1.49e-135 -2.97 -2.54
## 2 checkup1.L -1.03 0.305 -3.37 7.57e- 4 -1.62 -0.429
## 3 checkup1.Q 0.399 0.276 1.45 1.48e- 1 -0.141 0.939
## 4 checkup1.C -0.0474 0.205 -0.231 8.17e- 1 -0.449 0.355
## 5 checkup1^4 0.384 0.178 2.16 3.10e- 2 0.0352 0.734