library(tidyverse)
library(survey)
library(broom)
# Handle strata with a single PSU
options(survey.lonely.psu = "adjust")

# Load one year (edit year + path as needed)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")

# Texas + valid diabete3 only
# Keep only design vars + diabete3, drop NA and non-eligible codes

tx_diab <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,   # Yes
      diabete3 == 3 ~ 0,   # No
      TRUE ~ NA_real_
    )
  ) %>%
  filter(!is.na(diabetes_dx))

# Count of Texas respondents with valid diabetes outcome
n_tx_diab <- nrow(tx_diab)
n_tx_diab

# Survey design object for the outcome-only sample
des_tx_diab <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_diab,
  nest = TRUE
)

# Bivariate regression on AGE
# Make a unique subset with ONLY diabetes + age (drop NA in age)
# Age treated as numeric (continuous)

tx_age <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    age = ifelse(age %in% c(7, 9), NA, age),
    age = as.numeric(age)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(age), age >= 18)

# Count for AGE model
n_tx_age <- nrow(tx_age)
n_tx_age

# Survey design for AGE subset
des_age <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_age,
  nest = TRUE
)

# Bivariate survey-weighted logistic regression
m_age <- svyglm(diabetes_dx ~ age, design = des_age, family = quasibinomial())

# Output for appendix
broom::tidy(m_age, conf.int = TRUE)

# Bivariate regression on sex (2011)

tx_sex <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, sex) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    sex = ifelse(sex %in% c(7, 9), NA, sex),
    sex = factor(sex, levels = c(1, 2))   # 1=Male, 2=Female
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(sex))

# Count for SEX model
n_tx_sex <- nrow(tx_sex)
n_tx_sex

# Survey design for SEX subset
des_sex <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_sex,
  nest = TRUE
)

# Bivariate survey-weighted logistic regression (sex as factor)
m_sex <- svyglm(diabetes_dx ~ sex, design = des_sex, family = quasibinomial())

# Output for appendix
broom::tidy(m_sex, conf.int = TRUE)

# BMI (numeric; from _BMI5 / x.bmi5 with 2 implied decimals)
tx_bmi <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.bmi5) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    x.bmi5 = suppressWarnings(as.numeric(x.bmi5)),
    x.bmi5 = ifelse(x.bmi5 %in% c(0, 9999), NA, x.bmi5),
    x.bmi5 = ifelse(x.bmi5 < 1200 | x.bmi5 > 9999, NA, x.bmi5),
    bmi = x.bmi5 / 100
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(bmi))

n_tx_bmi <- nrow(tx_bmi)
n_tx_bmi

des_bmi <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_bmi,
  nest = TRUE
)

m_bmi <- svyglm(diabetes_dx ~ bmi, design = des_bmi, family = quasibinomial())
broom::tidy(m_bmi, conf.int = TRUE)


# Hypertension (binary factor; from BPHIGH4: 1 yes, 3 no)
tx_bphigh <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, bphigh4) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    bphigh4 = ifelse(bphigh4 %in% c(2, 4, 7, 9), NA, bphigh4),
    htn = case_when(
      bphigh4 == 1 ~ 1,
      bphigh4 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    htn = factor(htn, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(htn))

n_tx_bphigh <- nrow(tx_bphigh)
n_tx_bphigh

des_bphigh <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_bphigh,
  nest = TRUE
)

m_bphigh <- svyglm(diabetes_dx ~ htn, design = des_bphigh, family = quasibinomial())
broom::tidy(m_bphigh, conf.int = TRUE)


# Smoking status (4-level factor; from _SMOKER3 / x.smoker3)
tx_smoke <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.smoker3) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    x.smoker3 = ifelse(x.smoker3 == 9, NA, x.smoker3),
    x.smoker3 = factor(x.smoker3, levels = c(1, 2, 3, 4))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(x.smoker3))

n_tx_smoke <- nrow(tx_smoke)
n_tx_smoke

des_smoke <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_smoke,
  nest = TRUE
)

m_smoke <- svyglm(diabetes_dx ~ x.smoker3, design = des_smoke, family = quasibinomial())
broom::tidy(m_smoke, conf.int = TRUE)


# Exercise past 30 days (binary factor; EXERANY2: 1 yes, 2 no)
tx_exer <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, exerany2) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    exerany2 = ifelse(exerany2 %in% c(7, 9), NA, exerany2),
    exer = case_when(
      exerany2 == 1 ~ 1,
      exerany2 == 2 ~ 0,
      TRUE ~ NA_real_
    ),
    exer = factor(exer, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(exer))

n_tx_exer <- nrow(tx_exer)
n_tx_exer

des_exer <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_exer,
  nest = TRUE
)

m_exer <- svyglm(diabetes_dx ~ exer, design = des_exer, family = quasibinomial())
broom::tidy(m_exer, conf.int = TRUE)


# Education (ordered factor; EDUCA 1–6; remove 9)
tx_educ <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, educa) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    educa = ifelse(educa == 9, NA, educa),
    educa = ordered(educa, levels = 1:6)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(educa))

n_tx_educ <- nrow(tx_educ)
n_tx_educ

des_educ <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_educ,
  nest = TRUE
)

m_educ <- svyglm(diabetes_dx ~ educa, design = des_educ, family = quasibinomial())
broom::tidy(m_educ, conf.int = TRUE)


# Income (ordered factor; INCOME2 1–8; remove 77/99)
tx_inc <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, income2) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    income2 = ifelse(income2 %in% c(77, 99), NA, income2),
    income2 = ordered(income2, levels = 1:8)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(income2))

n_tx_inc <- nrow(tx_inc)
n_tx_inc

des_inc <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_inc,
  nest = TRUE
)

m_inc <- svyglm(diabetes_dx ~ income2, design = des_inc, family = quasibinomial())
broom::tidy(m_inc, conf.int = TRUE)


# Insurance coverage (binary factor; HLTHPLN1: 1 yes, 2 no)
tx_ins <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, hlthpln1) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    hlthpln1 = ifelse(hlthpln1 %in% c(7, 9), NA, hlthpln1),
    insured = case_when(
      hlthpln1 == 1 ~ 1,
      hlthpln1 == 2 ~ 0,
      TRUE ~ NA_real_
    ),
    insured = factor(insured, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(insured))

n_tx_ins <- nrow(tx_ins)
n_tx_ins

des_ins <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_ins,
  nest = TRUE
)

m_ins <- svyglm(diabetes_dx ~ insured, design = des_ins, family = quasibinomial())
broom::tidy(m_ins, conf.int = TRUE)


# Routine checkup (ordered factor; CHECKUP1 1,2,3,4,8; remove 7/9)
tx_chk <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, checkup1) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    checkup1 = ifelse(checkup1 %in% c(7, 9), NA, checkup1),
    checkup1 = ordered(checkup1, levels = c(1, 2, 3, 4, 8))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(checkup1))

n_tx_chk <- nrow(tx_chk)
n_tx_chk

des_chk <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_chk,
  nest = TRUE
)

m_chk <- svyglm(diabetes_dx ~ checkup1, design = des_chk, family = quasibinomial())
broom::tidy(m_chk, conf.int = TRUE)