library(tidyverse)
library(survey)
library(broom)
# Handle strata with a single PSU
options(survey.lonely.psu = "adjust")

# Load one year (edit year + path as needed)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")

# Texas + valid diabete3 only
# Keep only design vars + diabete3, drop NA and non-eligible codes

tx_diab <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,   # Yes
      diabete3 == 3 ~ 0,   # No
      TRUE ~ NA_real_
    )
  ) %>%
  filter(!is.na(diabetes_dx))

# Count of Texas respondents with valid diabetes outcome
n_tx_diab <- nrow(tx_diab)
n_tx_diab
## [1] 14618
# Survey design object for the outcome-only sample
des_tx_diab <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_diab,
  nest = TRUE
)

# Bivariate regression on AGE
# Make a unique subset with ONLY diabetes + age (drop NA in age)
# Age treated as numeric (continuous)

tx_age <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    age = ifelse(age %in% c(7, 9), NA, age),
    age = as.numeric(age)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(age), age >= 18)

# Count for AGE model
n_tx_age <- nrow(tx_age)
n_tx_age
## [1] 14485
# Survey design for AGE subset
des_age <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_age,
  nest = TRUE
)

# Bivariate survey-weighted logistic regression
m_age <- svyglm(diabetes_dx ~ age, design = des_age, family = quasibinomial())

# Output for appendix
broom::tidy(m_age, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -4.48     0.135       -33.1 2.90e-232  -4.74     -4.21  
## 2 age           0.0462   0.00208      22.2 1.95e-107   0.0422    0.0503
# Bivariate regression on sex (2011)

tx_sex <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, sex) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    sex = ifelse(sex %in% c(7, 9), NA, sex),
    sex = factor(sex, levels = c(1, 2))   # 1=Male, 2=Female
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(sex))

# Count for SEX model
n_tx_sex <- nrow(tx_sex)
n_tx_sex
## [1] 14618
# Survey design for SEX subset
des_sex <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_sex,
  nest = TRUE
)

# Bivariate survey-weighted logistic regression (sex as factor)
m_sex <- svyglm(diabetes_dx ~ sex, design = des_sex, family = quasibinomial())

# Output for appendix
broom::tidy(m_sex, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -2.11      0.0635    -33.2  8.03e-233   -2.23    -1.98  
## 2 sex2         -0.0966    0.0845     -1.14 2.53e-  1   -0.262    0.0690
# BMI (numeric; from _BMI5 / x.bmi5 with 2 implied decimals)
tx_bmi <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.bmi5) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    x.bmi5 = suppressWarnings(as.numeric(x.bmi5)),
    x.bmi5 = ifelse(x.bmi5 %in% c(0, 9999), NA, x.bmi5),
    x.bmi5 = ifelse(x.bmi5 < 1200 | x.bmi5 > 9999, NA, x.bmi5),
    bmi = x.bmi5 / 100
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(bmi))

n_tx_bmi <- nrow(tx_bmi)
n_tx_bmi
## [1] 13704
des_bmi <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_bmi,
  nest = TRUE
)

m_bmi <- svyglm(diabetes_dx ~ bmi, design = des_bmi, family = quasibinomial())
broom::tidy(m_bmi, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -4.63     0.212       -21.9 3.41e-104  -5.05     -4.22  
## 2 bmi           0.0847   0.00702      12.1 2.67e- 33   0.0709    0.0984
# Hypertension (binary factor; from BPHIGH4: 1 yes, 3 no)
tx_bphigh <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, bphigh4) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    bphigh4 = ifelse(bphigh4 %in% c(2, 4, 7, 9), NA, bphigh4),
    htn = case_when(
      bphigh4 == 1 ~ 1,
      bphigh4 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    htn = factor(htn, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(htn))

n_tx_bphigh <- nrow(tx_bphigh)
n_tx_bphigh
## [1] 14310
des_bphigh <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_bphigh,
  nest = TRUE
)

m_bphigh <- svyglm(diabetes_dx ~ htn, design = des_bphigh, family = quasibinomial())
broom::tidy(m_bphigh, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)    -3.10    0.0826     -37.5 2.11e-293    -3.26     -2.93
## 2 htn1            1.93    0.0979      19.7 1.66e- 85     1.74      2.12
# Smoking status (4-level factor; from _SMOKER3 / x.smoker3)
tx_smoke <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, x.smoker3) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    x.smoker3 = ifelse(x.smoker3 == 9, NA, x.smoker3),
    x.smoker3 = factor(x.smoker3, levels = c(1, 2, 3, 4))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(x.smoker3))

n_tx_smoke <- nrow(tx_smoke)
n_tx_smoke
## [1] 14512
des_smoke <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_smoke,
  nest = TRUE
)

m_smoke <- svyglm(diabetes_dx ~ x.smoker3, design = des_smoke, family = quasibinomial())
broom::tidy(m_smoke, conf.int = TRUE)
## # A tibble: 4 × 7
##   term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -2.37       0.141   -16.8   4.36e-63   -2.65     -2.10 
## 2 x.smoker32    0.0630     0.261     0.241 8.09e- 1   -0.449     0.575
## 3 x.smoker33    0.654      0.158     4.13  3.62e- 5    0.344     0.964
## 4 x.smoker34    0.0780     0.153     0.511 6.10e- 1   -0.221     0.377
# Exercise past 30 days (binary factor; EXERANY2: 1 yes, 2 no)
tx_exer <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, exerany2) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    exerany2 = ifelse(exerany2 %in% c(7, 9), NA, exerany2),
    exer = case_when(
      exerany2 == 1 ~ 1,
      exerany2 == 2 ~ 0,
      TRUE ~ NA_real_
    ),
    exer = factor(exer, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(exer))

n_tx_exer <- nrow(tx_exer)
n_tx_exer
## [1] 13885
des_exer <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_exer,
  nest = TRUE
)

m_exer <- svyglm(diabetes_dx ~ exer, design = des_exer, family = quasibinomial())
broom::tidy(m_exer, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)   -1.85     0.0737    -25.1  7.21e-136   -1.99     -1.70 
## 2 exer1         -0.436    0.0919     -4.75 2.09e-  6   -0.617    -0.256
# Education (ordered factor; EDUCA 1–6; remove 9)
tx_educ <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, educa) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    educa = ifelse(educa == 9, NA, educa),
    educa = ordered(educa, levels = 1:6)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(educa))

n_tx_educ <- nrow(tx_educ)
n_tx_educ
## [1] 14565
des_educ <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_educ,
  nest = TRUE
)

m_educ <- svyglm(diabetes_dx ~ educa, design = des_educ, family = quasibinomial())
broom::tidy(m_educ, conf.int = TRUE)
## # A tibble: 6 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept) -1.91       0.0882  -21.6    5.00e-102   -2.08    -1.73  
## 2 educa.L     -1.21       0.293    -4.12   3.82e-  5   -1.78    -0.633 
## 3 educa.Q      0.250      0.272     0.919  3.58e-  1   -0.283    0.783 
## 4 educa.C     -0.226      0.202    -1.12   2.62e-  1   -0.622    0.169 
## 5 educa^4     -0.182      0.140    -1.30   1.93e-  1   -0.456    0.0920
## 6 educa^5     -0.00859    0.117    -0.0735 9.41e-  1   -0.238    0.221
# Income (ordered factor; INCOME2 1–8; remove 77/99)
tx_inc <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, income2) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    income2 = ifelse(income2 %in% c(77, 99), NA, income2),
    income2 = ordered(income2, levels = 1:8)
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(income2))

n_tx_inc <- nrow(tx_inc)
n_tx_inc
## [1] 12507
des_inc <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_inc,
  nest = TRUE
)

m_inc <- svyglm(diabetes_dx ~ income2, design = des_inc, family = quasibinomial())
broom::tidy(m_inc, conf.int = TRUE)
## # A tibble: 8 × 7
##   term         estimate std.error statistic  p.value conf.low conf.high
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept) -2.04        0.0464 -43.9     0          -2.13    -1.95  
## 2 income2.L   -0.908       0.128   -7.11    1.21e-12   -1.16    -0.658 
## 3 income2.Q   -0.270       0.129   -2.09    3.65e- 2   -0.524   -0.0170
## 4 income2.C    0.0627      0.129    0.484   6.28e- 1   -0.191    0.316 
## 5 income2^4   -0.282       0.134   -2.11    3.48e- 2   -0.544   -0.0201
## 6 income2^5   -0.0766      0.134   -0.574   5.66e- 1   -0.338    0.185 
## 7 income2^6    0.000813    0.133    0.00610 9.95e- 1   -0.260    0.262 
## 8 income2^7    0.0367      0.137    0.268   7.88e- 1   -0.231    0.305
# Insurance coverage (binary factor; HLTHPLN1: 1 yes, 2 no)
tx_ins <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, hlthpln1) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    hlthpln1 = ifelse(hlthpln1 %in% c(7, 9), NA, hlthpln1),
    insured = case_when(
      hlthpln1 == 1 ~ 1,
      hlthpln1 == 2 ~ 0,
      TRUE ~ NA_real_
    ),
    insured = factor(insured, levels = c(0, 1))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(insured))

n_tx_ins <- nrow(tx_ins)
n_tx_ins
## [1] 14568
des_ins <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_ins,
  nest = TRUE
)

m_ins <- svyglm(diabetes_dx ~ insured, design = des_ins, family = quasibinomial())
broom::tidy(m_ins, conf.int = TRUE)
## # A tibble: 2 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)   -2.48      0.105    -23.5  9.33e-120   -2.68     -2.27 
## 2 insured1       0.441     0.115      3.84 1.24e-  4    0.216     0.667
# Routine checkup (ordered factor; CHECKUP1 1,2,3,4,8; remove 7/9)
tx_chk <- BRFSS2011 %>%
  filter(x.state == 48) %>%
  select(x.state, x.psu, x.ststr, x.llcpwt, diabete3, checkup1) %>%
  mutate(
    diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
    diabetes_dx = case_when(
      diabete3 == 1 ~ 1,
      diabete3 == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    checkup1 = ifelse(checkup1 %in% c(7, 9), NA, checkup1),
    checkup1 = ordered(checkup1, levels = c(1, 2, 3, 4, 8))
  ) %>%
  filter(!is.na(diabetes_dx), !is.na(checkup1))

n_tx_chk <- nrow(tx_chk)
n_tx_chk
## [1] 14446
des_chk <- svydesign(
  ids = ~x.psu,
  strata = ~x.ststr,
  weights = ~x.llcpwt,
  data = tx_chk,
  nest = TRUE
)

m_chk <- svyglm(diabetes_dx ~ checkup1, design = des_chk, family = quasibinomial())
broom::tidy(m_chk, conf.int = TRUE)
## # A tibble: 5 × 7
##   term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -2.76       0.110   -25.0   1.49e-135  -2.97      -2.54 
## 2 checkup1.L   -1.03       0.305    -3.37  7.57e-  4  -1.62      -0.429
## 3 checkup1.Q    0.399      0.276     1.45  1.48e-  1  -0.141      0.939
## 4 checkup1.C   -0.0474     0.205    -0.231 8.17e-  1  -0.449      0.355
## 5 checkup1^4    0.384      0.178     2.16  3.10e-  2   0.0352     0.734