library(tidyverse)
library(survey)
library(broom)
library(dplyr)
library(ggplot2)
# Load BRFSS data (pre-processed to RDS)
BRFSS2011 <- readRDS("C:/Users/63650/Downloads/BRFSS2011.RDS")
BRFSS2012 <- readRDS("C:/Users/63650/Downloads/BRFSS2012.RDS")
BRFSS2013 <- readRDS("C:/Users/63650/Downloads/BRFSS2013.RDS")
BRFSS2014 <- readRDS("C:/Users/63650/Downloads/BRFSS2014.RDS")
BRFSS2015 <- readRDS("C:/Users/63650/Downloads/BRFSS2015.RDS")
BRFSS2016 <- readRDS("C:/Users/63650/Downloads/BRFSS2016.RDS")
BRFSS2017 <- readRDS("C:/Users/63650/Downloads/BRFSS2017.RDS")
BRFSS2018 <- readRDS("C:/Users/63650/Downloads/BRFSS2018.RDS")
BRFSS2019 <- readRDS("C:/Users/63650/Downloads/BRFSS2019.RDS")
BRFSS2020 <- readRDS("C:/Users/63650/Downloads/BRFSS2020.RDS")
BRFSS2021 <- readRDS("C:/Users/63650/Downloads/BRFSS2021.RDS")
BRFSS2022 <- readRDS("C:/Users/63650/Downloads/BRFSS2022.RDS")
BRFSS2023 <- readRDS("C:/Users/63650/Downloads/BRFSS2023.RDS")
options(survey.lonely.psu = "adjust")
years <- 2011:2023
# ============================================================
# BRFSS Texas — Bivariate AGE regressions (2011–2023)
# IMPORTANT NOTE:
# 2011–2012 originally report AGE in years.
# To maintain comparability with 2013+, AGE is recoded into
# five-year categorical groups to match x.ageg5yr.
# From 2013 onward, x.ageg5yr is used directly.
# ============================================================
options(survey.lonely.psu = "adjust")
# ----------------
# 2011 (harmonize to _AGEG5YR structure)
# - Force age to numeric before binning
# - Explicitly exclude <18 and missing (so ageg5 matches 1–13 only)
# - Use unordered factor so coefficients are category-specific (not .L/.Q polynomials)
# ----------------
tx_age_2011 <- BRFSS2011 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
mutate(
# Outcome: keep only 1 (Yes) and 3 (No); drop all other / DK / refused
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
# Age: drop DK/refused, ensure numeric
age = ifelse(age %in% c(7, 9), NA, age),
age = as.numeric(age),
# Harmonize to 13-category _AGEG5YR style
ageg5 = case_when(
age >= 18 & age <= 24 ~ "1",
age >= 25 & age <= 29 ~ "2",
age >= 30 & age <= 34 ~ "3",
age >= 35 & age <= 39 ~ "4",
age >= 40 & age <= 44 ~ "5",
age >= 45 & age <= 49 ~ "6",
age >= 50 & age <= 54 ~ "7",
age >= 55 & age <= 59 ~ "8",
age >= 60 & age <= 64 ~ "9",
age >= 65 & age <= 69 ~ "10",
age >= 70 & age <= 74 ~ "11",
age >= 75 & age <= 79 ~ "12",
age >= 80 & age <= 99 ~ "13",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5))
# Unordered factor, set baseline explicitly to 18–24
tx_age_2011 <- tx_age_2011 %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2011 <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_age_2011,
nest = TRUE
)
out_age_2011 <- broom::tidy(
svyglm(diabetes_dx ~ ageg5, design = des_age_2011, family = quasibinomial()),
conf.int = FALSE
)
# ----------------
# 2012 (same harmonization as 2011)
# ----------------
tx_age_2012 <- BRFSS2012 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, age) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
age = ifelse(age %in% c(7, 9), NA, age),
age = as.numeric(age),
ageg5 = case_when(
age >= 18 & age <= 24 ~ "1",
age >= 25 & age <= 29 ~ "2",
age >= 30 & age <= 34 ~ "3",
age >= 35 & age <= 39 ~ "4",
age >= 40 & age <= 44 ~ "5",
age >= 45 & age <= 49 ~ "6",
age >= 50 & age <= 54 ~ "7",
age >= 55 & age <= 59 ~ "8",
age >= 60 & age <= 64 ~ "9",
age >= 65 & age <= 69 ~ "10",
age >= 70 & age <= 74 ~ "11",
age >= 75 & age <= 79 ~ "12",
age >= 80 & age <= 99 ~ "13",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2012 <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_age_2012,
nest = TRUE
)
out_age_2012 <- broom::tidy(
svyglm(diabetes_dx ~ ageg5, design = des_age_2012, family = quasibinomial()),
conf.int = FALSE
)
# ----------------
# 2013 (use x.ageg5yr directly)
# ----------------
tx_age_2013 <- BRFSS2013 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2013 <- svydesign(
ids = ~x.psu,
strata = ~x.ststr,
weights = ~x.llcpwt,
data = tx_age_2013,
nest = TRUE
)
out_age_2013 <- broom::tidy(
svyglm(diabetes_dx ~ ageg5, design = des_age_2013, family = quasibinomial()),
conf.int = FALSE
)
# ----------------
# 2014 (same as 2013)
# ----------------
tx_age_2014 <- BRFSS2014 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(
diabete3 == 1 ~ 1,
diabete3 == 3 ~ 0,
TRUE ~ NA_real_
),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2014 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2014, nest = TRUE)
out_age_2014 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2014, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2015
# ----------------
tx_age_2015 <- BRFSS2015 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2015 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2015, nest = TRUE)
out_age_2015 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2015, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2016
# ----------------
tx_age_2016 <- BRFSS2016 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2016 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2016, nest = TRUE)
out_age_2016 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2016, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2017
# ----------------
tx_age_2017 <- BRFSS2017 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2017 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2017, nest = TRUE)
out_age_2017 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2017, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2018
# ----------------
tx_age_2018 <- BRFSS2018 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete3, x.ageg5yr) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2018 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2018, nest = TRUE)
out_age_2018 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2018, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# NOTE:
# Beginning in 2019, BRFSS renames the diabetes variable from DIABETE3 to DIABETE4.
# To preserve consistency across years, DIABETE4 is renamed to DIABETE3
# before any recoding or regression is performed.
# ----------------
tx_age_2019 <- BRFSS2019 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete4, x.ageg5yr) %>%
rename(diabete3 = diabete4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2019 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2019, nest = TRUE)
out_age_2019 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2019, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2020
# ----------------
tx_age_2020 <- BRFSS2020 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete4, x.ageg5yr) %>%
rename(diabete3 = diabete4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2020 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2020, nest = TRUE)
out_age_2020 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2020, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2021
# ----------------
tx_age_2021 <- BRFSS2021 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete4, x.ageg5yr) %>%
rename(diabete3 = diabete4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2021 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2021, nest = TRUE)
out_age_2021 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2021, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2022
# ----------------
tx_age_2022 <- BRFSS2022 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete4, x.ageg5yr) %>%
rename(diabete3 = diabete4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2022 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2022, nest = TRUE)
out_age_2022 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2022, family = quasibinomial()),
conf.int = FALSE)
# ----------------
# 2023
# ----------------
tx_age_2023 <- BRFSS2023 %>%
filter(x.state == 48) %>%
select(x.psu, x.ststr, x.llcpwt, diabete4, x.ageg5yr) %>%
rename(diabete3 = diabete4) %>%
mutate(
diabete3 = ifelse(diabete3 %in% c(7, 9), NA, diabete3),
diabetes_dx = case_when(diabete3 == 1 ~ 1, diabete3 == 3 ~ 0, TRUE ~ NA_real_),
ageg5 = ifelse(x.ageg5yr == 14, NA, x.ageg5yr),
ageg5 = as.character(ageg5)
) %>%
filter(!is.na(diabetes_dx), !is.na(ageg5)) %>%
mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_age_2023 <- svydesign(ids = ~x.psu, strata = ~x.ststr, weights = ~x.llcpwt,
data = tx_age_2023, nest = TRUE)
out_age_2023 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design = des_age_2023, family = quasibinomial()),
conf.int = FALSE)
# Pool 2012–2014 (pre-ACA)
pool_2012_2014 <- bind_rows(
tx_age_2012 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2013 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2014 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5))
) %>% mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_2012_2014 <- svydesign(ids=~x.psu, strata=~x.ststr, weights=~x.llcpwt, data=pool_2012_2014, nest=TRUE)
out_2012_2014 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design=des_2012_2014, family=quasibinomial()), conf.int=FALSE)
strong_2012_2014 <- out_2012_2014 %>% filter(term!="(Intercept)") %>% slice_min(p.value, n=1, with_ties=FALSE)
strong_2012_2014
# Pool 2014–2016 (early ACA)
pool_2014_2016 <- bind_rows(
tx_age_2014 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2015 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2016 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5))
) %>% mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_2014_2016 <- svydesign(ids=~x.psu, strata=~x.ststr, weights=~x.llcpwt, data=pool_2014_2016, nest=TRUE)
out_2014_2016 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design=des_2014_2016, family=quasibinomial()), conf.int=FALSE)
strong_2014_2016 <- out_2014_2016 %>% filter(term!="(Intercept)") %>% slice_min(p.value, n=1, with_ties=FALSE)
strong_2014_2016
# Pool 2020–2023 (COVID era)
pool_2020_2023 <- bind_rows(
tx_age_2020 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2021 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2022 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5)),
tx_age_2023 %>% select(x.psu,x.ststr,x.llcpwt,diabetes_dx,ageg5) %>%
mutate(x.psu=as.numeric(x.psu), x.ststr=as.numeric(x.ststr), x.llcpwt=as.numeric(x.llcpwt),
diabetes_dx=as.numeric(diabetes_dx), ageg5=as.character(ageg5))
) %>% mutate(ageg5 = relevel(factor(ageg5, levels = as.character(1:13)), ref = "1"))
des_2020_2023 <- svydesign(ids=~x.psu, strata=~x.ststr, weights=~x.llcpwt, data=pool_2020_2023, nest=TRUE)
out_2020_2023 <- broom::tidy(svyglm(diabetes_dx ~ ageg5, design=des_2020_2023, family=quasibinomial()), conf.int=FALSE)
strong_2020_2023 <- out_2020_2023 %>% filter(term!="(Intercept)") %>% slice_min(p.value, n=1, with_ties=FALSE)
strong_2020_2023
# Combine all years
age_all <- bind_rows(
out_age_2011 %>% mutate(year = 2011),
out_age_2012 %>% mutate(year = 2012),
out_age_2013 %>% mutate(year = 2013),
out_age_2014 %>% mutate(year = 2014),
out_age_2015 %>% mutate(year = 2015),
out_age_2016 %>% mutate(year = 2016),
out_age_2017 %>% mutate(year = 2017),
out_age_2018 %>% mutate(year = 2018),
out_age_2019 %>% mutate(year = 2019),
out_age_2020 %>% mutate(year = 2020),
out_age_2021 %>% mutate(year = 2021),
out_age_2022 %>% mutate(year = 2022),
out_age_2023 %>% mutate(year = 2023)
)
# Keep only age-category coefficients (drop intercept), then pick the single smallest p-value per year
age_strongest <- age_all %>%
filter(term != "(Intercept)") %>%
group_by(year) %>%
slice_min(order_by = p.value, n = 1, with_ties = FALSE) %>%
ungroup()
# Build age_period_table from pooled strongest objects
age_period_table <- bind_rows(
strong_2012_2014 %>% mutate(period = "2012–2014"),
strong_2014_2016 %>% mutate(period = "2014–2016"),
strong_2020_2023 %>% mutate(period = "2020–2023")
)
# Pooled-period points (from age_period_table)
age_pooled_plot <- age_period_table %>%
mutate(
year = c(2013, 2015, 2021.5), # midpoints: 2012–14, 2014–16, 2020–23
label = term
)
# Plot
ggplot(age_strongest, aes(x = year, y = estimate)) +
geom_line() +
geom_point(size = 2.5) +
# Error bars for yearly points
geom_errorbar(
aes(
ymin = estimate - 1.96 * std.error,
ymax = estimate + 1.96 * std.error
),
width = 0.2,
linewidth = 0.6
) +
geom_text(
aes(label = term),
vjust = -1.2,
size = 2.8,
check_overlap = TRUE
) +
geom_vline(xintercept = 2014, linetype = "dashed", color = "#1f77b4", linewidth = 0.8) +
geom_vline(xintercept = 2020, linetype = "dashed", color = "#d62728", linewidth = 0.8) +
annotate(
"text",
x = 2014,
y = max(age_strongest$estimate, na.rm = TRUE),
label = "ACA",
color = "#1f77b4",
angle = 90,
hjust = -1.3,
vjust = -0.6,
size = 4
) +
annotate(
"text",
x = 2020,
y = max(age_strongest$estimate, na.rm = TRUE),
label = "COVID",
color = "#d62728",
angle = 90,
hjust = -0.5,
vjust = -0.6,
size = 4
) +
scale_x_continuous(breaks = 2011:2023) +
labs(
x = "Year",
y = "Log-odds coefficient",
title = "Most Statistically Significant Age-Group Coefficient by Year (Texas BRFSS)",
subtitle = "Dashed lines mark 2014 (ACA-era onset) and 2020 (COVID-era onset)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(size = 12, face = "bold", angle = 45, hjust = 1),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 13, face = "bold"),
axis.title.y = element_text(size = 13, face = "bold"),
plot.title = element_text(face = "bold")
) +
# Pooled-era strongest coefficients
geom_point(
data = age_pooled_plot,
aes(x = year, y = estimate),
color = "#2E8B57",
size = 2.5
) +
# Error bars for pooled-era points
geom_errorbar(
data = age_pooled_plot,
aes(
x = year,
ymin = estimate - 1.96 * std.error,
ymax = estimate + 1.96 * std.error
),
width = 0.4,
linewidth = 0.8,
color = "#2E8B57"
) +
geom_text(
data = age_pooled_plot,
aes(x = year, y = estimate, label = label, vjust = ifelse(year == 2021.5, -1.2, 1.4)),
color = "#2E8B57",
size = 3
) +
# Green brackets showing pooled-year ranges
annotate(
"segment",
x = c(2012, 2014, 2020),
xend = c(2014, 2016, 2023),
y = min(age_strongest$estimate, na.rm = TRUE) - 0.5,
yend = min(age_strongest$estimate, na.rm = TRUE) - 0.5,
linewidth = 1,
color = "#2E8B57"
) +
annotate(
"segment",
x = c(2012, 2014, 2020),
xend = c(2012, 2014, 2020),
y = min(age_strongest$estimate, na.rm = TRUE) - 0.6,
yend = min(age_strongest$estimate, na.rm = TRUE) - 0.4,
linewidth = 1,
color = "#2E8B57"
) +
annotate(
"segment",
x = c(2014, 2016, 2023),
xend = c(2014, 2016, 2023),
y = min(age_strongest$estimate, na.rm = TRUE) - 0.6,
yend = min(age_strongest$estimate, na.rm = TRUE) - 0.4,
linewidth = 1,
color = "#2E8B57"
)