Trends in Type 2 Hypertension Among Adults
Using R with NHANES
Using R with NHANES
Cubicle 3253
---
title: Trends in Type 2 Hypertension Among Adults
subtitle: Using R with NHANES
author: Cubicle 3253
format:
html:
code-tools: true
echo: false
embed-resources: true
fig-asp: 0.6
fig-dpi: 300
grid:
sidebar-width: 200px
body-width: 1500px
margin-width: 100px
linestretch: 1.1
toc: true
toc-location: left
---
```{r}
#| message: false
library(dplyr)
library(ggiraph)
library(ggplot2)
library(haven)
library(plotly)
library(survey)
library(srvyr)
## Source: https://www.cdc.gov/nchs/data/statnt/statnt20.pdf ##
pop_18_34 <- 8001 + 18257 + 17722 + 19511
pop_35_49 <- 22180 + 22479 + 19806
pop_50_64 <- 17224 + 13307 + 10654
pop_65plus <- 9410 + 8726 + 7415 + 4900 + 4259
pop_2000 <- c(pop_18_34 , pop_35_49, pop_50_64, pop_65plus)
read_nhanes <- function(xpt_name)
{
xpt_name <- toupper(xpt_name)
if(substr(xpt_name, 1, 2) == "P_") {
begin_year = "2017"
} else {
begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)),
"_L" = "2021",
"_J" = "2017",
"_I" = "2015",
"_H" = "2013",
"_G" = "2011",
"_F" = "2009",
"_E" = "2007",
"_D" = "2005",
"_C" = "2003",
"_B" = "2001",
"1999")
}
xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt")
try(read_xpt(xpt_url), silent = TRUE)
}
do_plot <- function(df, by_var, max_y = 30)
{
g <- ggplot(df, aes(x = midpoint, y = p, color = {{by_var}}, fill = {{by_var}})) +
geom_ribbon(aes(ymin = p_low, ymax = p_upp), color = NA) +
geom_line(linewidth = 1.5) +
geom_point_interactive(
aes(tooltip = sprintf("<b>%s</b><br>%.1f%% (%.1f%% - %.1f%%)", {{by_var}}, p, p_low, p_upp)),
size = 2) +
scale_color_viridis_d(begin = 0.2, end = 0.8) +
scale_fill_viridis_d(begin = 0.2, end = 0.8, alpha = 0.2) +
guides(
color = guide_legend(NULL),
fill = "none") +
labs(
x = "Survey Period",
y = "Percent") +
scale_x_continuous(
breaks = c(2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018.6, 2022.7),
labels = c("1999-2000", "2001-2002", "2003-2004", "2005-2006", "2007-2008", "2009-2010",
"2011-2012", "2013-2014", "2015-2016", "2017-Mar 2020", "Aug 2021-Aug 2023"),
expand = expansion(add = 0.5)) +
scale_y_continuous(
limits = c(0, max_y),
breaks = seq(0, max_y, 10),
expand = expansion(add = 0)) +
theme_bw() +
theme(text = element_text(size = 8),
axis.text.x = element_text(angle = 15, hjust = 1),
legend.position = "top")
girafe(ggobj = g)
}
```
```{r}
demo_vars <- c("SEQN", "SDDSRVYR", "RIAGENDR", "RIDAGEYR", "INDFMPIR", "RIDEXPRG",
"SDMVSTRA", "SDMVPSU", "WTMEC2YR", "WTMECPRP")
DEMO_A <- read_nhanes("DEMO") |> select(any_of(demo_vars))
DEMO_B <- read_nhanes("DEMO_B") |> select(any_of(demo_vars))
DEMO_C <- read_nhanes("DEMO_C") |> select(any_of(demo_vars))
DEMO_D <- read_nhanes("DEMO_D") |> select(any_of(demo_vars))
DEMO_E <- read_nhanes("DEMO_E") |> select(any_of(demo_vars))
DEMO_F <- read_nhanes("DEMO_F") |> select(any_of(demo_vars))
DEMO_G <- read_nhanes("DEMO_G") |> select(any_of(demo_vars))
DEMO_H <- read_nhanes("DEMO_H") |> select(any_of(demo_vars))
DEMO_I <- read_nhanes("DEMO_I") |> select(any_of(demo_vars))
P_DEMO <- read_nhanes("P_DEMO") |> select(any_of(demo_vars))
DEMO_L <- read_nhanes("DEMO_L") |> select(any_of(demo_vars))
DEMO <- bind_rows(DEMO_A, DEMO_B, DEMO_C, DEMO_D, DEMO_E, DEMO_F,
DEMO_G, DEMO_H, DEMO_I, P_DEMO, DEMO_L)
```
```{r}
BPX_A <- read_nhanes("BPX") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_B <- read_nhanes("BPX_B") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_C <- read_nhanes("BPX_C") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_D <- read_nhanes("BPX_D") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_E <- read_nhanes("BPX_E") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_F <- read_nhanes("BPX_F") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_G <- read_nhanes("BPX_G") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_H <- read_nhanes("BPX_H") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_I <- read_nhanes("BPX_I") |> select(SEQN, starts_with(c("BPXSY", "BPXDI")))
BPX_1 <- bind_rows(BPX_A, BPX_B, BPX_C, BPX_D, BPX_E, BPX_F, BPX_G, BPX_H, BPX_I) |>
mutate(
across(starts_with("BPXDI"), ~ if_else(.x == 0, NA_real_, .x)),
systolic = rowMeans(cbind(BPXSY1, BPXSY2, BPXSY3, BPXSY4), na.rm = TRUE),
diastolic = rowMeans(cbind(BPXDI1, BPXDI2, BPXDI3, BPXDI4), na.rm = TRUE)) |>
select(SEQN, systolic, diastolic)
```
```{r}
P_BPXO <- read_nhanes("P_BPXO") |> select(SEQN, BPXOSY1:BPXODI3)
BPXO_L <- read_nhanes("BPXO_L") |> select(SEQN, BPXOSY1:BPXODI3)
BPX_2 <- bind_rows(P_BPXO, BPXO_L) |>
mutate(
systolic = rowMeans(cbind(BPXOSY1, BPXOSY2, BPXOSY3), na.rm = TRUE),
diastolic = rowMeans(cbind(BPXODI1, BPXODI2, BPXODI3), na.rm = TRUE)) |>
select(SEQN, systolic, diastolic)
```
```{r}
BPX <- bind_rows(BPX_1, BPX_2) |>
mutate(
stage2 = case_when(
is.na(systolic) | is.na(diastolic) ~ NA,
systolic >= 140 | diastolic >= 90 ~ TRUE,
.default = FALSE))
```
```{r}
One <- DEMO |>
left_join(BPX, by = "SEQN") |>
mutate(
survey = case_match(SDDSRVYR,
1 ~ "1999-2000",
2 ~ "2001-2002",
3 ~ "2003-2004",
4 ~ "2005-2006",
5 ~ "2007-2008",
6 ~ "2009-2010",
7 ~ "2011-2012",
8 ~ "2013-2014",
9 ~ "2015-2016",
66 ~ "2017-Mar 2020",
12 ~ "Aug 2021-Aug 2023"),
midpoint = case_match(SDDSRVYR,
1 ~ 2000,
2 ~ 2002,
3 ~ 2004,
4 ~ 2006,
5 ~ 2008,
6 ~ 2010,
7 ~ 2012,
8 ~ 2014,
9 ~ 2016,
66 ~ 2018.6,
12 ~ 2022.7),
RIAGENDR = if_else(RIAGENDR == 1, "Male", "Female"),
RIDAGEYR = case_match(RIDAGEYR,
18:34 ~ "18 to 34",
35:49 ~ "35 to 49",
50:64 ~ "50 to 64",
65:85 ~ "65 or more"),
INDFMPIR = case_when(
INDFMPIR < 1.30 ~ "Less than 130%",
INDFMPIR < 3.50 ~ "130% to 349%",
INDFMPIR >= 3.50 ~ "350% or more"),
INDFMPIR = factor(INDFMPIR,
levels = c("Less than 130%", "130% to 349%", "350% or more")),
RIDEXPRG = if_else(RIDEXPRG == 1, TRUE, FALSE, FALSE),
survey_wt = coalesce(WTMEC2YR, WTMECPRP))
```
```{r}
NHANES <- One |>
filter(!is.na(RIDAGEYR), !RIDEXPRG, !is.na(stage2)) |>
as_survey_design(id = SDMVPSU, strata = SDMVSTRA, nest = TRUE, weight = survey_wt)
```
```{r}
t1 <- NHANES |>
group_by(midpoint) |>
summarise(
n = n(),
p = survey_mean(stage2, proportion = TRUE, vartype = "ci", prop_method = "beta") * 100) |>
mutate(grp = "Crude")
```
```{r}
NHANES_Adj <- svystandardize(NHANES, by = ~RIDAGEYR, over = ~midpoint,
population = pop_2000)
t2 <- NHANES_Adj |>
group_by(midpoint) |>
summarise(
n = n(),
p = survey_mean(stage2, proportion = TRUE, vartype = "ci", prop_method = "beta") * 100) |>
mutate(grp = "Age-Adjusted")
```
## By Survey
<h4>Crude and Age-Adjusted Prevalence of Stage 2 Hypertension by Survey Period</h4>
```{r}
t <- bind_rows(t1, t2)
do_plot(t, grp)
```
<br>
## By Survey & Sex
<h4>Age-Adjusted Prevalence of Stage 2 Hypertension by Survey Period & Sex</h4>
```{r}
NHANES_Adj <- svystandardize(NHANES, by = ~RIDAGEYR, over = ~RIAGENDR+midpoint,
population = pop_2000)
t <- NHANES_Adj |>
group_by(RIAGENDR, midpoint) |>
summarise(
n = n(),
p = survey_mean(stage2, proportion = TRUE, vartype = "ci", prop_method = "beta") * 100)
```
```{r}
do_plot(t, RIAGENDR)
```
<br>
## By Survey & Age Group
<h4>Prevalence of Stage 2 Hypertension by Survey Period & Age Group</h4>
```{r}
t <- NHANES |>
group_by(RIDAGEYR, midpoint) |>
summarise(
n = n(),
p = survey_mean(stage2, proportion = TRUE, vartype = "ci", prop_method = "beta") * 100)
```
```{r}
do_plot(t, RIDAGEYR, 60)
```
## By Survey & Family Poverty Level
<h4>Age-Adjusted Prevalence of Stage 2 Hypertension by Survey Period & Family Poverty Level</h4>
```{r}
NHANES_Adj <- svystandardize(NHANES, by = ~RIDAGEYR, over = ~INDFMPIR+midpoint,
population = pop_2000, excluding.missing = ~INDFMPIR)
t <- NHANES_Adj |>
group_by(INDFMPIR, midpoint) |>
summarise(
n = n(),
p = survey_mean(stage2, proportion = TRUE, vartype = "ci", prop_method = "beta") * 100)
```
```{r}
do_plot(t, INDFMPIR)
```