Cholesterol Trends Among Adults

Using R with NHANES

Author

Cubicle 3253

Trends In Total Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023

Trends In HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023

Trends In Non-HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023

--- title: Cholesterol Trends Among Adults subtitle: Using R with NHANES author: Cubicle 3253 format: html: code-tools: true echo: false embed-resources: true fig-asp: 0.6 fig-dpi: 300 linestretch: 1.1 page-layout: full --- ```{r} #| message: false library(dplyr) library(ggplot2) library(haven) library(survey) library(srvyr) read_nhanes <- function(xpt_name) { xpt_name <- toupper(xpt_name) if(substr(xpt_name, 1, 2) == "P_") { begin_year = "2017" } else { begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)), "_L" = "2021", "_J" = "2017", "_I" = "2015", "_H" = "2013", "_G" = "2011", "_F" = "2009", "_E" = "2007", "_D" = "2005", "_C" = "2003", "_B" = "2001", "1999") } xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt") try(read_xpt(xpt_url), silent = TRUE) } plot_overall <- function(q_var, y_label, y_min, y_max) { NHANES |> group_by(SDDSRVYR) |> summarise(q = survey_quantile({{q_var}}, c(0.25, 0.50, 0.75), vartype = NULL)) |> ggplot(aes(x = SDDSRVYR, y = q_q50)) + geom_ribbon(aes(ymin = q_q25, ymax = q_q75, color = "25th to 75th", fill = "25th to 75th")) + geom_line(aes(color = "50th"), linewidth = 2) + geom_point(aes(color = "50th"), size = 2.5) + scale_color_manual(name = "Percentile", breaks = c("50th", "25th to 75th"), values = c("50th" = "#21918c", "25th to 75th" = "#21918c44"), aesthetics = c("color", "fill")) + labs( title = "Percentiles by Survey Period", x = "Survey Period", y = paste(y_label, "Cholesterol (mg/dL)")) + scale_x_continuous( breaks = c(2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018.6, 2022.7), labels = c("1999-2000", "2001-2002", "2003-2004", "2005-2006", "2007-2008", "2009-2010", "2011-2012", "2013-2014", "2015-2016", "2017-Mar 2020", "Aug 2021-Aug 2023"), expand = expansion(add = 0.5)) + scale_y_continuous(limits = c(y_min, y_max), expand = expansion(add = 0)) + theme_bw() + theme( text = element_text(size = 8), axis.text.x = element_text(angle = 15, hjust = 1), legend.position = "top") } plot_by <- function(g_var, q_var, y_label, y_min, y_max) { NHANES |> filter(!is.na({{g_var}})) |> group_by({{g_var}}, SDDSRVYR) |> summarise(q = survey_quantile({{q_var}}, 0.50, vartype = NULL)) |> ggplot(aes(x = SDDSRVYR, y = q_q50, color = {{g_var}})) + geom_line(linewidth = 2) + geom_point(size = 2.5) + scale_colour_viridis_d() + labs( title = "Percentile by Survey Period", x = "Survey Period", y = paste(y_label, "Cholesterol (mg/dL)"), color = "50th Percentile") + scale_x_continuous( breaks = c(2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018.6, 2022.7), labels = c("1999-2000", "2001-2002", "2003-2004", "2005-2006", "2007-2008", "2009-2010", "2011-2012", "2013-2014", "2015-2016", "2017-Mar 2020", "Aug 2021-Aug 2023"), expand = expansion(add = 0.5)) + scale_y_continuous(limits = c(y_min, y_max), expand = expansion(add = 0)) + theme_bw() + theme( text = element_text(size = 8), axis.text.x = element_text(angle = 15, hjust = 1), legend.position = "top") } ``` ```{r} demo_vars <- c("SEQN", "SDDSRVYR", "RIAGENDR", "RIDAGEYR", "INDFMPIR", "SDMVSTRA", "SDMVPSU", "WTMEC2YR", "WTMECPRP") DEMO_A <- read_nhanes("DEMO") |> select(any_of(demo_vars)) DEMO_B <- read_nhanes("DEMO_B") |> select(any_of(demo_vars)) DEMO_C <- read_nhanes("DEMO_C") |> select(any_of(demo_vars)) DEMO_D <- read_nhanes("DEMO_D") |> select(any_of(demo_vars)) DEMO_E <- read_nhanes("DEMO_E") |> select(any_of(demo_vars)) DEMO_F <- read_nhanes("DEMO_F") |> select(any_of(demo_vars)) DEMO_G <- read_nhanes("DEMO_G") |> select(any_of(demo_vars)) DEMO_H <- read_nhanes("DEMO_H") |> select(any_of(demo_vars)) DEMO_I <- read_nhanes("DEMO_I") |> select(any_of(demo_vars)) P_DEMO <- read_nhanes("P_DEMO") |> select(any_of(demo_vars)) DEMO_L <- read_nhanes("DEMO_L") |> select(any_of(demo_vars)) DEMO <- bind_rows(DEMO_A, DEMO_B, DEMO_C, DEMO_D, DEMO_E, DEMO_F, DEMO_G, DEMO_H, DEMO_I, P_DEMO, DEMO_L) ``` ```{r} TCHOL_A <- read_nhanes("LAB13") |> select(SEQN, LBXTC) TCHOL_B <- read_nhanes("L13_B") |> select(SEQN, LBXTC) TCHOL_C <- read_nhanes("L13_C") |> select(SEQN, LBXTC) TCHOL_D <- read_nhanes("TCHOL_D") |> select(SEQN, LBXTC) TCHOL_E <- read_nhanes("TCHOL_E") |> select(SEQN, LBXTC) TCHOL_F <- read_nhanes("TCHOL_F") |> select(SEQN, LBXTC) TCHOL_G <- read_nhanes("TCHOL_G") |> select(SEQN, LBXTC) TCHOL_H <- read_nhanes("TCHOL_H") |> select(SEQN, LBXTC) TCHOL_I <- read_nhanes("TCHOL_I") |> select(SEQN, LBXTC) P_TCHOL <- read_nhanes("P_TCHOL") |> select(SEQN, LBXTC) TCHOL_L <- read_nhanes("TCHOL_L") |> select(SEQN, LBXTC, WTPH2YR) TCHOL <- bind_rows(TCHOL_A, TCHOL_B, TCHOL_C, TCHOL_D, TCHOL_E, TCHOL_F, TCHOL_G, TCHOL_H, TCHOL_I, P_TCHOL, TCHOL_L) ``` ```{r} HDL_A <- read_nhanes("LAB13") |> select(SEQN, LBDHDD = LBDHDL) HDL_B <- read_nhanes("L13_B") |> select(SEQN, LBDHDD = LBDHDL) HDL_C <- read_nhanes("L13_C") |> select(SEQN, LBDHDD = LBXHDD) HDL_D <- read_nhanes("HDL_D") |> select(SEQN, LBDHDD) HDL_E <- read_nhanes("HDL_E") |> select(SEQN, LBDHDD) HDL_F <- read_nhanes("HDL_F") |> select(SEQN, LBDHDD) HDL_G <- read_nhanes("HDL_G") |> select(SEQN, LBDHDD) HDL_H <- read_nhanes("HDL_H") |> select(SEQN, LBDHDD) HDL_I <- read_nhanes("HDL_I") |> select(SEQN, LBDHDD) P_HDL <- read_nhanes("P_HDL") |> select(SEQN, LBDHDD) HDL_L <- read_nhanes("HDL_L") |> select(SEQN, LBDHDD) HDL <- bind_rows(HDL_A, HDL_B, HDL_C, HDL_D, HDL_E, HDL_F, HDL_G, HDL_H, HDL_I, P_HDL, HDL_L) ``` ```{r} One <- DEMO |> left_join(TCHOL, by = "SEQN") |> left_join(HDL, by = "SEQN") |> mutate( survey = case_match(SDDSRVYR, 1 ~ "1999-2000", 2 ~ "2001-2002", 3 ~ "2003-2004", 4 ~ "2005-2006", 5 ~ "2007-2008", 6 ~ "2009-2010", 7 ~ "2011-2012", 8 ~ "2013-2014", 9 ~ "2015-2016", 66 ~ "2017-Mar 2020", 12 ~ "Aug 2021-Aug 2023"), SDDSRVYR = case_match(SDDSRVYR, 1 ~ 2000, 2 ~ 2002, 3 ~ 2004, 4 ~ 2006, 5 ~ 2008, 6 ~ 2010, 7 ~ 2012, 8 ~ 2014, 9 ~ 2016, 66 ~ 2018.6, 12 ~ 2022.7), RIAGENDR = if_else(RIAGENDR == 1, "Male", "Female"), RIDAGEYR = case_match(RIDAGEYR, 18:34 ~ "18 to 34", 35:49 ~ "35 to 49", 50:64 ~ "50 to 64", 65:85 ~ "65 or more"), INDFMPIR = case_when( INDFMPIR < 1.30 ~ "Less than 1.30", INDFMPIR < 3.50 ~ "1.30 to 3.49", INDFMPIR >= 3.50 ~ "3.50 or more"), INDFMPIR = factor(INDFMPIR, levels = c("Less than 1.30", "1.30 to 3.49", "3.50 or more")), non_hdl = LBXTC - LBDHDD, survey_wt = coalesce(WTMEC2YR, WTMECPRP, WTPH2YR)) ``` ```{r} NHANES <- One |> filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |> as_survey_design(ids = SDMVPSU, strata = SDMVSTRA, weights = survey_wt, nest = TRUE) ``` <h3> Trends In Total Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023 </h3> ::: {.panel-tabset} ### Overall ```{r} plot_overall(LBXTC, "Total", 150, 230) ``` ```{r} One |> filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |> ggplot(aes(x = LBXTC, y = survey, fill = survey, weight = survey_wt)) + geom_boxplot() + scale_fill_viridis_d(alpha = 0.6) + labs( title = "Boxplot by Survey Period", x = "Total Cholesterol (mg/dL)", y = "Survey Period") + coord_cartesian(xlim = c(60, 320)) + scale_x_continuous(breaks = seq(60, 320, 20)) + theme_bw() + theme( text = element_text(size = 8), legend.position = "none") ``` ### By Sex ```{r} plot_by(RIAGENDR, LBXTC, "Total", 150, 230) ``` ### By Age Group ```{r} plot_by(RIDAGEYR, LBXTC, "Total", 150, 230) ``` ### By Ratio of Family Income to Poverty ```{r} plot_by(INDFMPIR, LBXTC, "Total", 150, 230) ``` ::: <br> <h3> Trends In HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023 </h3> ::: {.panel-tabset} ### Overall ```{r} plot_overall(LBDHDD, "HDL", 30, 70) ``` ```{r} One |> filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |> ggplot(aes(x = LBDHDD, y = survey, fill = survey, weight = survey_wt)) + geom_boxplot() + scale_fill_viridis_d(alpha = 0.6) + labs( title = "Boxplot by Survey Period", x = "HDL Cholesterol (mg/dL)", y = "Survey Period") + coord_cartesian(xlim = c(0, 100)) + scale_x_continuous(breaks = seq(0, 100, 10)) + theme_bw() + theme( text = element_text(size = 8), legend.position = "none") ``` ### By Sex ```{r} plot_by(RIAGENDR, LBDHDD, "HDL", 30, 70) ``` ### By Age Group ```{r} plot_by(RIDAGEYR, LBDHDD, "HDL", 30, 70) ``` ### By Ratio of Family Income to Poverty ```{r} plot_by(INDFMPIR, LBDHDD, "HDL", 30, 70) ``` ::: <br> <h3> Trends In Non-HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023 </h3> ::: {.panel-tabset} ### Overall ```{r} plot_overall(non_hdl, "Non-HDL", 100, 180) ``` ```{r} One |> filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |> ggplot(aes(x = non_hdl, y = survey, fill = survey, weight = survey_wt)) + geom_boxplot() + scale_fill_viridis_d(alpha = 0.6) + labs( title = "Boxplot by Survey Period", x = "Non-HDL Cholesterol (mg/dL)", y = "Survey Period") + coord_cartesian(xlim = c(20, 280)) + scale_x_continuous(breaks = seq(20, 280, 20)) + theme_bw() + theme( text = element_text(size = 8), legend.position = "none") ``` ### By Sex ```{r} plot_by(RIAGENDR, non_hdl, "Non-HDL", 100, 180) ``` ### By Age Group ```{r} plot_by(RIDAGEYR, non_hdl, "Non-HDL", 100, 180) ``` ### By Ratio of Family Income to Poverty ```{r} plot_by(INDFMPIR, non_hdl, "Non-HDL", 100, 180) ``` :::