---
title: Abnormal Cholesterol Among Children & Adolescents
subtitle: Using R with NHANES
author: Cubicle 3253
format:
html:
code-tools: true
echo: false
embed-resources: true
fig-asp: 0.46
fig-dpi: 300
linestretch: 1.1
page-layout: full
---
```{r}
#| message: false
library(dplyr)
library(ggiraph)
library(ggplot2)
library(ggtext)
library(gt)
library(haven)
library(htmltools)
library(purrr)
library(survey)
library(surveytable)
read_nhanes <- function(xpt_name) {
xpt_name <- toupper(xpt_name)
if(substr(xpt_name, 1, 2) == "P_") {
begin_year = "2017"
} else {
begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)),
"_L" = "2021",
"_J" = "2017",
"_I" = "2015",
"_H" = "2013",
"_G" = "2011",
"_F" = "2009",
"_E" = "2007",
"_D" = "2005",
"_C" = "2003",
"_B" = "2001",
"1999")
}
xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt")
try(read_xpt(xpt_url), silent = TRUE)
}
do_table <- function(t) {
t |>
gt() |>
fmt_number(columns = 2, decimals = 4) |>
sub_small_vals(
threshold = 0.0001,
small_pattern = "<0.0001") |>
opt_row_striping() |>
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()) |>
tab_footnote(
footnote = "P-value from t-test.",
locations = cells_column_labels(columns = 2))
}
```
```{r}
DEMO_L <- read_nhanes("DEMO_L") |> select(SEQN, SDDSRVYR, RIAGENDR, RIDEXAGM, SDMVSTRA, SDMVPSU)
TCHOL_L <- read_nhanes("TCHOL_L") |> select(SEQN, LBXTC, WTPH2YR)
HDL_L <- read_nhanes("HDL_L") |> select(SEQN, LBDHDD)
BMX_L <- read_nhanes("BMX_L") |> select(SEQN, BMDBMIC)
One <- DEMO_L |>
left_join(HDL_L, by = "SEQN") |>
left_join(TCHOL_L, by = "SEQN") |>
left_join(BMX_L, by = "SEQN") |>
mutate(
RIAGENDR = if_else(RIAGENDR == 1, "Boys", "Girls"),
RIDEXAGM = case_match(RIDEXAGM,
72:143 ~ "6 to 11",
144:239 ~ "12 to 19"),
RIDEXAGM = factor(RIDEXAGM,
levels = c("6 to 11", "12 to 19")),
tchol = if_else(LBXTC >= 200, TRUE, FALSE),
non_hdl = if_else(LBXTC - LBDHDD >= 145, TRUE, FALSE),
hdl = if_else(LBDHDD < 40, TRUE, FALSE),
any_chol = if_else(tchol | hdl | non_hdl, TRUE, FALSE),
BMDBMIC = case_match(BMDBMIC,
1:2 ~ "Underweight or Normal Weight",
3 ~ "Overweight",
4 ~ "Obese"),
BMDBMIC = factor(BMDBMIC,
levels = c("Underweight or Normal Weight", "Overweight", "Obese")))
```
```{r}
#| output: false
NHANES <- svydesign(
data = subset(One, !is.na(RIDEXAGM) & !is.na(any_chol)),
id = ~SDMVPSU, strata = ~SDMVSTRA, weights = ~WTPH2YR, nest = TRUE)
set_opts(mode = "NCHS", drop_na = TRUE)
set_survey(NHANES)
```
::: {.panel-tabset}
### Figure 1
<h5>
Prevalence of abnormal cholesterol among children and adolescents aged 6-19 years:
United States, August 2021-August 2023
</h5>
```{r}
#| out-width: 100%
t_tchol <- tab("tchol")
t_non_hdl <- tab("non_hdl")
t_hdl <- tab("hdl")
t_any_chol <- tab("any_chol")
g <- bind_rows(t_tchol, t_non_hdl, t_hdl, t_any_chol, .id = "chol") |>
filter(Level == "TRUE") |>
mutate(
chol = case_match(chol,
"1" ~ "High Total Cholesterol",
"2" ~ "High Non-HDL Cholesterol",
"3" ~ "Low HDL Cholesterol",
"4" ~ "Any Abnormal Cholesterol"),
chol = factor(chol,
levels = c("High Total Cholesterol", "High Non-HDL Cholesterol",
"Low HDL Cholesterol", "Any Abnormal Cholesterol"))) |>
ggplot(aes(x = chol, y = Percent, fill = chol,
tooltip = sprintf("%s: %.1f%%", chol, Percent))) +
geom_col_interactive() +
geom_text(aes(label = sprintf("%.1f%%", Percent)), size = 2.5, nudge_y = 0.4) +
scale_fill_viridis_d(begin = 0.2, end = 0.8) +
scale_y_continuous(limits = c(0, 20), expand = expansion(add = 0)) +
theme_classic() +
theme(text = element_text(size = 9),
axis.title.x = element_blank(),
legend.position = "none")
girafe(ggobj = g)
```
<p style="font-size: 9pt;">
NOTES: HDL is high-density lipoprotein. High total cholesterol is at or above 200 mg/dL,
low HDL cholesterol is less than 40 mg/dL, and high non-HDL cholesterol is at or above
145 mg/dL. Any abnormal cholesterol includes at least one measure of high total
cholesterol, low HDL cholesterol, or high non-HDL cholesterol.<br>
SOURCE: CDC/NCHS, National Health and Nutrition Examination Survey, August 2021–August 2023.
</p>
<details>
<summary>
Tables
</summary>
```{r}
#| results: asis
HTML("<hr><h6>High Total Cholesterol</h6>")
t_tchol
HTML("<hr><h6>High Non-HDL Cholesterol</h6>")
t_non_hdl
HTML("<hr><h6>Low HDL Cholesterol</h6>")
t_hdl
HTML("<hr><h6>Any Abnormal Cholesterol</h6>")
t_any_chol
```
</details>
### Figure 2
<h5>
Prevalence of any abnormal cholesterol among children and adolescents aged 6–19 years
by age, sex, and weight status: United States, August 2021–August 2023
</h5>
```{r}
#| out-width: 100%
t_age <- tab_subset("any_chol", "RIDEXAGM")
t_sex <- tab_subset("any_chol", "RIAGENDR")
t_bmi <- tab_subset("any_chol", "BMDBMIC")
t1 <- bind_rows(map(t_age, as_tibble), .id = "group") |>
filter(Level == "TRUE")
t2 <- bind_rows(map(t_sex, as_tibble), .id = "group") |>
filter(Level == "TRUE")
t3 <- bind_rows(map(t_bmi, as_tibble), .id = "group") |>
filter(Level == "TRUE")
g <- bind_rows(t1, t2, t3, .id = "var") |>
mutate(
bar_label = case_when(
group == "Girls" ~ sprintf("%.1f%% <sup>1</sup>", Percent),
group == "Underweight or Normal Weight" ~ sprintf("%.1f%% <sup>2</sup>", Percent),
group == "Overweight" ~ sprintf("%.1f%% <sup>2</sup>", Percent),
.default = sprintf("%.1f%%", Percent))) |>
ggplot(aes(x = Percent, y = group, fill = var,
tooltip = sprintf("%s: %.1f%%", group, Percent))) +
geom_col_interactive() +
geom_richtext(aes(label = bar_label),
fill = NA, label.color = NA, size = 2.5, nudge_x = 1.4) +
scale_fill_viridis_d(begin = 0.3, end = 0.7) +
scale_x_continuous(limits = c(0, 40), expand = expansion(add = 0)) +
scale_y_discrete(limits = c("Obese", "Overweight", "Underweight or Normal Weight", "",
"Boys", "Girls", " ", "12 to 19", "6 to 11")) +
theme_classic() +
theme(text = element_text(size = 9),
axis.title.y = element_blank(),
legend.position = "none")
girafe(ggobj = g)
```
<p style="font-size: 9pt;">
<sup>1</sup>Significantly different from boys (p<0.05).<br>
<sup>2</sup>Significantly different from children/adolescents with obesity (p<0.05).
NOTES: HDL is high-density lipoprotein. High total cholesterol is at or above 200 mg/dL,
low HDL cholesterol is less than 40 mg/dL, and high non-HDL cholesterol is at or above
145 mg/dL. Any abnormal cholesterol includes at least one measure of high total
cholesterol, low HDL cholesterol, or high non-HDL cholesterol.<br>
SOURCE: CDC/NCHS, National Health and Nutrition Examination Survey, August 2021–August 2023.
</p>
<details>
<summary>
Tables & Statistical Testing
</summary>
```{r}
#| results: asis
HTML("<hr><h6>By Age</h6>")
t_age
HTML("<hr><h6>By Sex</h6>")
t_sex
HTML("<hr><h6>By Weight Status</h6>")
t_bmi
HTML("<hr><h6>Statistical Testing</h6>")
p1 <- svyttest(any_chol~RIDEXAGM, NHANES)$p.value
p2 <- svyttest(any_chol~RIAGENDR, NHANES)$p.value
p3 <- svyttest(any_chol~BMDBMIC, subset(NHANES, BMDBMIC != "Obese"))$p.value
p4 <- svyttest(any_chol~BMDBMIC, subset(NHANES, BMDBMIC != "Overweight"))$p.value
p5 <- svyttest(any_chol~BMDBMIC, subset(NHANES, BMDBMIC != "Underweight or Normal Weight"))$p.value
tribble(
~Comparison, ~`p-value`,
"6 to 11 vs. 12 to 19", p1,
"Girls vs Boys", p2,
"Underweight or Normal Weight vs. Overweight", p3,
"Underweight or Normal Weight vs. Obese", p4,
"Overweight vs. Obese", p5) |>
do_table()
```
</details>
### Figure 3
<h5>
Trends in prevalence of abnormal cholesterol among children and adolescents
aged 6–19 years: United States, 2013–2014 to August 2021–August 2023
</h5>
```{r}
DEMO_H <- read_nhanes("DEMO_H") |> select(SEQN, SDDSRVYR, RIDEXAGM, SDMVSTRA, SDMVPSU, WTMEC2YR)
DEMO_I <- read_nhanes("DEMO_I") |> select(SEQN, SDDSRVYR, RIDEXAGM, SDMVSTRA, SDMVPSU, WTMEC2YR)
DEMO_J <- read_nhanes("DEMO_J") |> select(SEQN, SDDSRVYR, RIDEXAGM, SDMVSTRA, SDMVPSU, WTMEC2YR)
DEMO <- bind_rows(DEMO_H, DEMO_I, DEMO_J, DEMO_L)
TCHOL_H <- read_nhanes("TCHOL_H") |> select(SEQN, LBXTC)
TCHOL_I <- read_nhanes("TCHOL_I") |> select(SEQN, LBXTC)
TCHOL_J <- read_nhanes("TCHOL_J") |> select(SEQN, LBXTC)
TCHOL <- bind_rows(TCHOL_H, TCHOL_I, TCHOL_J, TCHOL_L)
HDL_H <- read_nhanes("HDL_H") |> select(SEQN, LBDHDD)
HDL_I <- read_nhanes("HDL_I") |> select(SEQN, LBDHDD)
HDL_J <- read_nhanes("HDL_J") |> select(SEQN, LBDHDD)
HDL <- bind_rows(HDL_H, HDL_I, HDL_J, HDL_L)
One <- DEMO |>
left_join(TCHOL, by = "SEQN") |>
left_join(HDL, by = "SEQN") |>
rename(Survey = SDDSRVYR) |>
mutate(
midpoint = case_match(Survey,
8 ~ 0,
9 ~ 2,
10 ~ 4,
12 ~ 8.7),
Survey = case_match(Survey,
8 ~ "2013-2014",
9 ~ "2015-2016",
10 ~ "2017-2018",
12 ~ "August 2021-August 2023"),
survey_wt = coalesce(WTMEC2YR, WTPH2YR),
tchol = if_else(LBXTC >= 200, TRUE, FALSE),
hdl = if_else(LBDHDD < 40, TRUE, FALSE),
non_hdl = if_else(LBXTC - LBDHDD >= 145, TRUE, FALSE),
any_chol = if_else(tchol | hdl | non_hdl, TRUE, FALSE))
```
```{r}
#| output: false
NHANES <- svydesign(
data = subset(One, RIDEXAGM %in% 72:239 & !is.na(any_chol)),
id = ~SDMVPSU, strata = ~SDMVSTRA, weights = ~survey_wt, nest = TRUE)
set_survey(NHANES, mode = "NCHS")
tab_tchol <- tab_subset("tchol", "Survey")
t_tchol <- bind_rows(map(tab_tchol, as_tibble), .id = "Survey") |>
filter(Level == TRUE) |>
mutate(chol = "High Total Cholesterol") |>
select(chol, Survey, Percent)
tab_hdl <- tab_subset("hdl", "Survey")
t_hdl <- bind_rows(map(tab_hdl, as_tibble), .id = "Survey") |>
filter(Level == TRUE) |>
mutate(chol = "Low HDL Cholesterol") |>
select(chol, Survey, Percent)
tab_non_hdl <- tab_subset("non_hdl", "Survey")
t_non_hdl <- bind_rows(map(tab_non_hdl, as_tibble), .id = "Survey") |>
filter(Level == TRUE) |>
mutate(chol = "High Non-HDL Cholesterol") |>
select(chol, Survey, Percent)
tab_any_chol <- tab_subset("any_chol", "Survey")
t_any_chol <- bind_rows(map(tab_any_chol, as_tibble), .id = "Survey") |>
filter(Level == TRUE) |>
mutate(chol = "Any Cholesterol Abnormality") |>
select(chol, Survey, Percent)
```
```{r}
#| out-width: 100%
g <- bind_rows(t_tchol, t_hdl, t_non_hdl, t_any_chol) |>
mutate(
midpoint = case_match(Survey,
"2013-2014" ~ 0,
"2015-2016" ~ 2,
"2017-2018" ~ 4,
"August 2021-August 2023" ~ 8.7)) |>
ggplot(aes(x = midpoint, y = Percent, color = chol)) +
geom_line(linewidth = 1.5) +
geom_point_interactive(aes(tooltip = sprintf("%s: %.1f%%", chol, Percent)), size = 3) +
geom_richtext(aes(x = 0, y = 22, label = "Any Abnormal Cholesterol <sup>1</sup>"),
hjust = 0, fill = NA, label.color = NA, size = 2.5) +
geom_richtext(aes(x = 0, y = 15, label = "Low HDL Cholesterol <sup>1</sup>"),
hjust = 0, fill = NA, label.color = NA, size = 2.5) +
geom_richtext(aes(x = 0, y = 8.5, label = "High Non-HDL Cholesterol"),
hjust = 0, fill = NA, label.color = NA, size = 2.5) +
geom_richtext(aes(x = 0, y = 5.5, label = "High Total Cholesterol"),
hjust = 0, fill = NA, label.color = NA, size = 2.5) +
scale_color_viridis_d(begin = 0.2, end = 0.8) +
scale_x_continuous(breaks = c(0, 2, 4, 8.7),
labels = c("2013-2014", "2015-2016", "2017-2018", "August 2021-August 2023"),
expand = expansion(mult = c(0.05, 0.10))) +
scale_y_continuous(limits = c(0, 25), expand = expansion(add = 0)) +
theme_classic() +
theme(text = element_text(size = 9),
axis.title.x = element_blank(),
legend.position = "none")
girafe(ggobj = g)
```
<p style="font-size: 9pt;">
<sup>1</sup>Significantly decreasing linear trend (p<0.05).<br>
NOTES: HDL is high-density lipoprotein. High total cholesterol is at or above 200 mg/dL,
low HDL cholesterol is less than 40 mg/dL, and high non-HDL cholesterol is at or above
145 mg/dL. Any abnormal cholesterol includes at least one measure of high total,
low HDL, or high non-HDL cholesterol.<br>
SOURCE: CDC/NCHS, National Health and Nutrition Examination Survey,
2013–2014 to August 2021–August 2023.
</p>
<br>
<details>
<summary>
Tables & Statistical Testing
</summary>
```{r}
#| results: asis
HTML("<hr><h6>High Total Cholesterol</h6>")
tab_tchol
HTML("<hr><h6>High Non-HDL Cholesterol</h6>")
tab_non_hdl
HTML("<hr><h6>Low HDL Cholesterol</h6>")
tab_hdl
HTML("<hr><h6>Any Cholesterol Abnormality</h6>")
tab_any_chol
p1 <- summary(svyglm(tchol~midpoint, NHANES, family = quasipoisson()))$coefficients[[2, 4]]
p2 <- summary(svyglm(non_hdl~midpoint, NHANES, family = quasipoisson()))$coefficients[[2, 4]]
p3 <- summary(svyglm(hdl~midpoint, NHANES, family = quasipoisson()))$coefficients[[2, 4]]
p4 <- summary(svyglm(any_chol~midpoint, NHANES, family = quasipoisson()))$coefficients[[2, 4]]
tribble(
~`Linear Trend`, ~`p-value`,
"High Total Cholesterol", p1,
"High Non-HDL Cholesterol", p2,
"Low HDL Cholesterol", p3,
"Any Cholesterol Abnormality", p4) |>
do_table()
```
</details>
:::