Cholesterol Trends Among Adults
Using R with NHANES
Using R with NHANES
Cubicle 3253
---
title: Cholesterol Trends Among Adults
subtitle: Using R with NHANES
author: Cubicle 3253
format:
html:
code-tools: true
echo: false
embed-resources: true
fig-asp: 0.6
fig-dpi: 300
linestretch: 1.1
page-layout: full
---
```{r}
#| message: false
library(dplyr)
library(ggplot2)
library(haven)
library(survey)
library(srvyr)
read_nhanes <- function(xpt_name) {
xpt_name <- toupper(xpt_name)
if(substr(xpt_name, 1, 2) == "P_") {
begin_year = "2017"
} else {
begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)),
"_L" = "2021",
"_J" = "2017",
"_I" = "2015",
"_H" = "2013",
"_G" = "2011",
"_F" = "2009",
"_E" = "2007",
"_D" = "2005",
"_C" = "2003",
"_B" = "2001",
"1999")
}
xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt")
try(read_xpt(xpt_url), silent = TRUE)
}
plot_overall <- function(q_var, y_label, y_min, y_max) {
NHANES |>
group_by(SDDSRVYR) |>
summarise(q = survey_quantile({{q_var}}, c(0.25, 0.50, 0.75), vartype = NULL)) |>
ggplot(aes(x = SDDSRVYR, y = q_q50)) +
geom_ribbon(aes(ymin = q_q25, ymax = q_q75,
color = "25th to 75th", fill = "25th to 75th")) +
geom_line(aes(color = "50th"), linewidth = 2) +
geom_point(aes(color = "50th"), size = 2.5) +
scale_color_manual(name = "Percentile",
breaks = c("50th", "25th to 75th"),
values = c("50th" = "#21918c", "25th to 75th" = "#21918c44"),
aesthetics = c("color", "fill")) +
labs(
title = "Percentiles by Survey Period",
x = "Survey Period",
y = paste(y_label, "Cholesterol (mg/dL)")) +
scale_x_continuous(
breaks = c(2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018.6, 2022.7),
labels = c("1999-2000", "2001-2002", "2003-2004", "2005-2006",
"2007-2008", "2009-2010", "2011-2012", "2013-2014", "2015-2016",
"2017-Mar 2020", "Aug 2021-Aug 2023"),
expand = expansion(add = 0.5)) +
scale_y_continuous(limits = c(y_min, y_max), expand = expansion(add = 0)) +
theme_bw() +
theme(
text = element_text(size = 8),
axis.text.x = element_text(angle = 15, hjust = 1),
legend.position = "top")
}
plot_by <- function(g_var, q_var, y_label, y_min, y_max) {
NHANES |>
filter(!is.na({{g_var}})) |>
group_by({{g_var}}, SDDSRVYR) |>
summarise(q = survey_quantile({{q_var}}, 0.50, vartype = NULL)) |>
ggplot(aes(x = SDDSRVYR, y = q_q50, color = {{g_var}})) +
geom_line(linewidth = 2) +
geom_point(size = 2.5) +
scale_colour_viridis_d() +
labs(
title = "Percentile by Survey Period",
x = "Survey Period",
y = paste(y_label, "Cholesterol (mg/dL)"),
color = "50th Percentile") +
scale_x_continuous(
breaks = c(2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018.6, 2022.7),
labels = c("1999-2000", "2001-2002", "2003-2004", "2005-2006",
"2007-2008", "2009-2010", "2011-2012", "2013-2014", "2015-2016",
"2017-Mar 2020", "Aug 2021-Aug 2023"),
expand = expansion(add = 0.5)) +
scale_y_continuous(limits = c(y_min, y_max), expand = expansion(add = 0)) +
theme_bw() +
theme(
text = element_text(size = 8),
axis.text.x = element_text(angle = 15, hjust = 1),
legend.position = "top")
}
```
```{r}
demo_vars <- c("SEQN", "SDDSRVYR", "RIAGENDR", "RIDAGEYR", "INDFMPIR",
"SDMVSTRA", "SDMVPSU", "WTMEC2YR", "WTMECPRP")
DEMO_A <- read_nhanes("DEMO") |> select(any_of(demo_vars))
DEMO_B <- read_nhanes("DEMO_B") |> select(any_of(demo_vars))
DEMO_C <- read_nhanes("DEMO_C") |> select(any_of(demo_vars))
DEMO_D <- read_nhanes("DEMO_D") |> select(any_of(demo_vars))
DEMO_E <- read_nhanes("DEMO_E") |> select(any_of(demo_vars))
DEMO_F <- read_nhanes("DEMO_F") |> select(any_of(demo_vars))
DEMO_G <- read_nhanes("DEMO_G") |> select(any_of(demo_vars))
DEMO_H <- read_nhanes("DEMO_H") |> select(any_of(demo_vars))
DEMO_I <- read_nhanes("DEMO_I") |> select(any_of(demo_vars))
P_DEMO <- read_nhanes("P_DEMO") |> select(any_of(demo_vars))
DEMO_L <- read_nhanes("DEMO_L") |> select(any_of(demo_vars))
DEMO <- bind_rows(DEMO_A, DEMO_B, DEMO_C, DEMO_D, DEMO_E, DEMO_F,
DEMO_G, DEMO_H, DEMO_I, P_DEMO, DEMO_L)
```
```{r}
TCHOL_A <- read_nhanes("LAB13") |> select(SEQN, LBXTC)
TCHOL_B <- read_nhanes("L13_B") |> select(SEQN, LBXTC)
TCHOL_C <- read_nhanes("L13_C") |> select(SEQN, LBXTC)
TCHOL_D <- read_nhanes("TCHOL_D") |> select(SEQN, LBXTC)
TCHOL_E <- read_nhanes("TCHOL_E") |> select(SEQN, LBXTC)
TCHOL_F <- read_nhanes("TCHOL_F") |> select(SEQN, LBXTC)
TCHOL_G <- read_nhanes("TCHOL_G") |> select(SEQN, LBXTC)
TCHOL_H <- read_nhanes("TCHOL_H") |> select(SEQN, LBXTC)
TCHOL_I <- read_nhanes("TCHOL_I") |> select(SEQN, LBXTC)
P_TCHOL <- read_nhanes("P_TCHOL") |> select(SEQN, LBXTC)
TCHOL_L <- read_nhanes("TCHOL_L") |> select(SEQN, LBXTC, WTPH2YR)
TCHOL <- bind_rows(TCHOL_A, TCHOL_B, TCHOL_C, TCHOL_D, TCHOL_E, TCHOL_F,
TCHOL_G, TCHOL_H, TCHOL_I, P_TCHOL, TCHOL_L)
```
```{r}
HDL_A <- read_nhanes("LAB13") |> select(SEQN, LBDHDD = LBDHDL)
HDL_B <- read_nhanes("L13_B") |> select(SEQN, LBDHDD = LBDHDL)
HDL_C <- read_nhanes("L13_C") |> select(SEQN, LBDHDD = LBXHDD)
HDL_D <- read_nhanes("HDL_D") |> select(SEQN, LBDHDD)
HDL_E <- read_nhanes("HDL_E") |> select(SEQN, LBDHDD)
HDL_F <- read_nhanes("HDL_F") |> select(SEQN, LBDHDD)
HDL_G <- read_nhanes("HDL_G") |> select(SEQN, LBDHDD)
HDL_H <- read_nhanes("HDL_H") |> select(SEQN, LBDHDD)
HDL_I <- read_nhanes("HDL_I") |> select(SEQN, LBDHDD)
P_HDL <- read_nhanes("P_HDL") |> select(SEQN, LBDHDD)
HDL_L <- read_nhanes("HDL_L") |> select(SEQN, LBDHDD)
HDL <- bind_rows(HDL_A, HDL_B, HDL_C, HDL_D, HDL_E, HDL_F,
HDL_G, HDL_H, HDL_I, P_HDL, HDL_L)
```
```{r}
One <- DEMO |>
left_join(TCHOL, by = "SEQN") |>
left_join(HDL, by = "SEQN") |>
mutate(
survey = case_match(SDDSRVYR,
1 ~ "1999-2000",
2 ~ "2001-2002",
3 ~ "2003-2004",
4 ~ "2005-2006",
5 ~ "2007-2008",
6 ~ "2009-2010",
7 ~ "2011-2012",
8 ~ "2013-2014",
9 ~ "2015-2016",
66 ~ "2017-Mar 2020",
12 ~ "Aug 2021-Aug 2023"),
SDDSRVYR = case_match(SDDSRVYR,
1 ~ 2000,
2 ~ 2002,
3 ~ 2004,
4 ~ 2006,
5 ~ 2008,
6 ~ 2010,
7 ~ 2012,
8 ~ 2014,
9 ~ 2016,
66 ~ 2018.6,
12 ~ 2022.7),
RIAGENDR = if_else(RIAGENDR == 1, "Male", "Female"),
RIDAGEYR = case_match(RIDAGEYR,
18:34 ~ "18 to 34",
35:49 ~ "35 to 49",
50:64 ~ "50 to 64",
65:85 ~ "65 or more"),
INDFMPIR = case_when(
INDFMPIR < 1.30 ~ "Less than 1.30",
INDFMPIR < 3.50 ~ "1.30 to 3.49",
INDFMPIR >= 3.50 ~ "3.50 or more"),
INDFMPIR = factor(INDFMPIR,
levels = c("Less than 1.30", "1.30 to 3.49", "3.50 or more")),
non_hdl = LBXTC - LBDHDD,
survey_wt = coalesce(WTMEC2YR, WTMECPRP, WTPH2YR))
```
```{r}
NHANES <- One |>
filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |>
as_survey_design(ids = SDMVPSU, strata = SDMVSTRA, weights = survey_wt, nest = TRUE)
```
<h3>
Trends In Total Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023
</h3>
::: {.panel-tabset}
### Overall
```{r}
plot_overall(LBXTC, "Total", 150, 230)
```
```{r}
One |>
filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |>
ggplot(aes(x = LBXTC, y = survey, fill = survey, weight = survey_wt)) +
geom_boxplot() +
scale_fill_viridis_d(alpha = 0.6) +
labs(
title = "Boxplot by Survey Period",
x = "Total Cholesterol (mg/dL)",
y = "Survey Period") +
coord_cartesian(xlim = c(60, 320)) +
scale_x_continuous(breaks = seq(60, 320, 20)) +
theme_bw() +
theme(
text = element_text(size = 8),
legend.position = "none")
```
### By Sex
```{r}
plot_by(RIAGENDR, LBXTC, "Total", 150, 230)
```
### By Age Group
```{r}
plot_by(RIDAGEYR, LBXTC, "Total", 150, 230)
```
### By Ratio of Family Income to Poverty
```{r}
plot_by(INDFMPIR, LBXTC, "Total", 150, 230)
```
:::
<br>
<h3>
Trends In HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023
</h3>
::: {.panel-tabset}
### Overall
```{r}
plot_overall(LBDHDD, "HDL", 30, 70)
```
```{r}
One |>
filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |>
ggplot(aes(x = LBDHDD, y = survey, fill = survey, weight = survey_wt)) +
geom_boxplot() +
scale_fill_viridis_d(alpha = 0.6) +
labs(
title = "Boxplot by Survey Period",
x = "HDL Cholesterol (mg/dL)",
y = "Survey Period") +
coord_cartesian(xlim = c(0, 100)) +
scale_x_continuous(breaks = seq(0, 100, 10)) +
theme_bw() +
theme(
text = element_text(size = 8),
legend.position = "none")
```
### By Sex
```{r}
plot_by(RIAGENDR, LBDHDD, "HDL", 30, 70)
```
### By Age Group
```{r}
plot_by(RIDAGEYR, LBDHDD, "HDL", 30, 70)
```
### By Ratio of Family Income to Poverty
```{r}
plot_by(INDFMPIR, LBDHDD, "HDL", 30, 70)
```
:::
<br>
<h3>
Trends In Non-HDL Cholesterol Among Adults: United States, 1999–2000 to August 2021–August 2023
</h3>
::: {.panel-tabset}
### Overall
```{r}
plot_overall(non_hdl, "Non-HDL", 100, 180)
```
```{r}
One |>
filter(!is.na(RIDAGEYR), !is.na(non_hdl)) |>
ggplot(aes(x = non_hdl, y = survey, fill = survey, weight = survey_wt)) +
geom_boxplot() +
scale_fill_viridis_d(alpha = 0.6) +
labs(
title = "Boxplot by Survey Period",
x = "Non-HDL Cholesterol (mg/dL)",
y = "Survey Period") +
coord_cartesian(xlim = c(20, 280)) +
scale_x_continuous(breaks = seq(20, 280, 20)) +
theme_bw() +
theme(
text = element_text(size = 8),
legend.position = "none")
```
### By Sex
```{r}
plot_by(RIAGENDR, non_hdl, "Non-HDL", 100, 180)
```
### By Age Group
```{r}
plot_by(RIDAGEYR, non_hdl, "Non-HDL", 100, 180)
```
### By Ratio of Family Income to Poverty
```{r}
plot_by(INDFMPIR, non_hdl, "Non-HDL", 100, 180)
```
:::