library(tidyverse)
library(survey)
library(broom)
# Handle strata with a single PSU
options(survey.lonely.psu = "adjust")
# Load one year (edit year + path as needed)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")
# Texas + valid diabete3 only
# Keep only design vars + diabete3, drop NA and non-eligible codes
tx_diab <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1, # Yes
diabete3 == 3 ~ 0, # No
TRUE ~ NA_real_
)
) %>%
filter(!is.na(diabetes_dx))
# Count of Texas respondents with valid diabetes outcome
n_tx_diab <- nrow(tx_diab)
n_tx_diab
# Survey design object for the outcome-only sample
des_tx_diab <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_diab,
nest = TRUE
)
# Bivariate regression on AGE
# Make a unique subset with ONLY diabetes + age (drop NA in age)
# Age treated as numeric (continuous)
tx_age <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
age = ifelse(age %in% c(7, 9), NA, age),
age = as.numeric(age)
) %>%
filter(!is.na(diabetes_dx), !is.na(age), age >= 18)
# Count for AGE model
n_tx_age <- nrow(tx_age)
n_tx_age
# Survey design for AGE subset
des_age <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_age,
nest = TRUE
)
# Bivariate survey-weighted logistic regression
m_age <- svyglm(diabetes_dx ~ age, design = des_age, family = quasibinomial())
# Output for appendix
broom::tidy(m_age, conf.int = TRUE)
# Bivariate regression on sex (2011)
tx_sex <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, sex) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
sex = ifelse(sex %in% c(7, 9), NA, sex),
sex = factor(sex, levels = c(1, 2)) # 1=Male, 2=Female
) %>%
filter(!is.na(diabetes_dx), !is.na(sex))
# Count for SEX model
n_tx_sex <- nrow(tx_sex)
n_tx_sex
# Survey design for SEX subset
des_sex <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_sex,
nest = TRUE
)
# Bivariate survey-weighted logistic regression (sex as factor)
m_sex <- svyglm(diabetes_dx ~ sex, design = des_sex, family = quasibinomial())
# Output for appendix
broom::tidy(m_sex, conf.int = TRUE)
# BMI (numeric; from _BMI5 / x.bmi5 with 2 implied decimals)
tx_bmi <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.bmi5) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
x.bmi5 = suppressWarnings(as.numeric(x.bmi5)),
x.bmi5 = ifelse(x.bmi5 %in% c(0, 9999), NA, x.bmi5),
x.bmi5 = ifelse(x.bmi5 < 1200 | x.bmi5 > 9999, NA, x.bmi5),
bmi = x.bmi5 / 100
) %>%
filter(!is.na(diabetes_dx), !is.na(bmi))
n_tx_bmi <- nrow(tx_bmi)
n_tx_bmi
des_bmi <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_bmi,
nest = TRUE
)
m_bmi <- svyglm(diabetes_dx ~ bmi, design = des_bmi, family = quasibinomial())
broom::tidy(m_bmi, conf.int = TRUE)
# Hypertension (binary factor; from BPHIGH4: 1 yes, 3 no)
tx_bphigh <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, bphigh4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
bphigh4 = ifelse(bphigh4 %in% c(2, 4, 7, 9), NA, bphigh4),
htn = case_when(
bphigh4 == 1 ~ 1,
bphigh4 == 3 ~ 0,
TRUE ~ NA_real_
),
htn = factor(htn, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(htn))
n_tx_bphigh <- nrow(tx_bphigh)
n_tx_bphigh
des_bphigh <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_bphigh,
nest = TRUE
)
m_bphigh <- svyglm(diabetes_dx ~ htn, design = des_bphigh, family = quasibinomial())
broom::tidy(m_bphigh, conf.int = TRUE)
# Smoking status (4-level factor; from _SMOKER3 / x.smoker3)
tx_smoke <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.smoker3) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
x.smoker3 = ifelse(x.smoker3 == 9, NA, x.smoker3),
x.smoker3 = factor(x.smoker3, levels = c(1, 2, 3, 4))
) %>%
filter(!is.na(diabetes_dx), !is.na(x.smoker3))
n_tx_smoke <- nrow(tx_smoke)
n_tx_smoke
des_smoke <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_smoke,
nest = TRUE
)
m_smoke <- svyglm(diabetes_dx ~ x.smoker3, design = des_smoke, family = quasibinomial())
broom::tidy(m_smoke, conf.int = TRUE)
# Exercise past 30 days (binary factor; EXERANY2: 1 yes, 2 no)
tx_exer <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, exerany2) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
exerany2 = ifelse(exerany2 %in% c(7, 9), NA, exerany2),
exer = case_when(
exerany2 == 1 ~ 1,
exerany2 == 2 ~ 0,
TRUE ~ NA_real_
),
exer = factor(exer, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(exer))
n_tx_exer <- nrow(tx_exer)
n_tx_exer
des_exer <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_exer,
nest = TRUE
)
m_exer <- svyglm(diabetes_dx ~ exer, design = des_exer, family = quasibinomial())
broom::tidy(m_exer, conf.int = TRUE)
# Education (ordered factor; EDUCA 1–6; remove 9)
tx_educ <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, educa) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
educa = ifelse(educa == 9, NA, educa),
educa = ordered(educa, levels = 1:6)
) %>%
filter(!is.na(diabetes_dx), !is.na(educa))
n_tx_educ <- nrow(tx_educ)
n_tx_educ
des_educ <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_educ,
nest = TRUE
)
m_educ <- svyglm(diabetes_dx ~ educa, design = des_educ, family = quasibinomial())
broom::tidy(m_educ, conf.int = TRUE)
# Income (ordered factor; INCOME2 1–8; remove 77/99)
tx_inc <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, income2) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
income2 = ifelse(income2 %in% c(77, 99), NA, income2),
income2 = ordered(income2, levels = 1:8)
) %>%
filter(!is.na(diabetes_dx), !is.na(income2))
n_tx_inc <- nrow(tx_inc)
n_tx_inc
des_inc <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_inc,
nest = TRUE
)
m_inc <- svyglm(diabetes_dx ~ income2, design = des_inc, family = quasibinomial())
broom::tidy(m_inc, conf.int = TRUE)
# Insurance coverage (binary factor; HLTHPLN1: 1 yes, 2 no)
tx_ins <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, hlthpln1) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
hlthpln1 = ifelse(hlthpln1 %in% c(7, 9), NA, hlthpln1),
insured = case_when(
hlthpln1 == 1 ~ 1,
hlthpln1 == 2 ~ 0,
TRUE ~ NA_real_
),
insured = factor(insured, levels = c(0, 1))
) %>%
filter(!is.na(diabetes_dx), !is.na(insured))
n_tx_ins <- nrow(tx_ins)
n_tx_ins
des_ins <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_ins,
nest = TRUE
)
m_ins <- svyglm(diabetes_dx ~ insured, design = des_ins, family = quasibinomial())
broom::tidy(m_ins, conf.int = TRUE)
# Routine checkup (ordered factor; CHECKUP1 1,2,3,4,8; remove 7/9)
tx_chk <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, checkup1) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
checkup1 = ifelse(checkup1 %in% c(7, 9), NA, checkup1),
checkup1 = ordered(checkup1, levels = c(1, 2, 3, 4, 8))
) %>%
filter(!is.na(diabetes_dx), !is.na(checkup1))
n_tx_chk <- nrow(tx_chk)
n_tx_chk
des_chk <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_chk,
nest = TRUE
)
m_chk <- svyglm(diabetes_dx ~ checkup1, design = des_chk, family = quasibinomial())
broom::tidy(m_chk, conf.int = TRUE)