---
title: Physical Activity in U.S. 2-11 Year Olds
subtitle: Using R with NHANES
author: Cubicle 3253
format:
html:
code-tools: true
echo: false
embed-resources: true
grid:
sidebar-width: 150px
body-width: 1600px
margin-width: 50px
linestretch: 1.1
toc: true
toc-location: left
---
### Introduction
For the 2009-2010 survey, NHANES added a question about the number of days
in a week respondents, 2 to 11 years of age, were physically active for at least
60 minutes. The question was asked by proxy and was
<i>"During the past 7 days, on how many days was {child's name} physically active
for a total of at least 60 minutes per day? Add up all the time they spent in
any kind of physical activity that increased their heart rate and made them
breathe hard some of the time."</i>
This document examines physical activity, in children 2 to 11 years of age,
using the NHANES 2009-2010 through August 2021-August 2023 surveys.
```{r}
#| message: false
library(apexcharter)
library(broom.helpers)
library(dplyr)
library(gt)
library(haven)
library(survey)
library(srvyr)
library(tidyr)
library(viridisLite)
read_nhanes <- function(xpt_name) {
xpt_name <- toupper(xpt_name)
if(substr(xpt_name, 1, 2) == "P_") {
begin_year = "2017"
} else {
begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)),
"_L" = "2021",
"_J" = "2017",
"_I" = "2015",
"_H" = "2013",
"_G" = "2011",
"_F" = "2009",
"_E" = "2007",
"_D" = "2005",
"_C" = "2003",
"_B" = "2001",
"1999")
}
xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt")
try(read_xpt(xpt_url), silent = TRUE)
}
do_plot <- function(df, plot_ht = 600) {
df |>
apex(aes(x = SDDSRVYR, y = p, fill = PAQ706), type = "bar", height = plot_ht) |>
ax_colors(viridis(4, direction = -1)) |>
ax_xaxis(title = list(text = "Percent"), min = 0, max = 100, stepSize = 10) |>
ax_tooltip(y = list(formatter = format_num(".1f", suffix = "%"))) |>
ax_chart(stacked = TRUE) |>
ax_plotOptions(bar = bar_opts(horizontal = TRUE, barHeight = "85%"))
}
do_table <- function(df) {
df |>
select(SDDSRVYR:p) |>
pivot_wider(names_from = PAQ706, values_from = p) |>
gt() |>
cols_label(SDDSRVYR = "Survey Period") |>
tab_spanner(columns = 2:5, label = "Days Physically Active At Least 60 Minutes") |>
fmt_percent(decimals = 1, scale_values = FALSE) |>
cols_align(columns = 1, align = "left") |>
cols_align(columns = 2:5, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
}
```
```{r}
demo_vars <- c("SEQN", "RIDAGEYR", "RIAGENDR", "INDFMPIR", "SDDSRVYR",
"SDMVSTRA", "SDMVPSU", "WTINT2YR", "WTINTPRP")
DEMO_F <- read_nhanes("DEMO_F") |> select(any_of(demo_vars))
DEMO_G <- read_nhanes("DEMO_G") |> select(any_of(demo_vars))
DEMO_H <- read_nhanes("DEMO_H") |> select(any_of(demo_vars))
DEMO_I <- read_nhanes("DEMO_I") |> select(any_of(demo_vars))
P_DEMO <- read_nhanes("P_DEMO") |> select(any_of(demo_vars))
DEMO_L <- read_nhanes("DEMO_L") |> select(any_of(demo_vars))
DEMO <- bind_rows(DEMO_F, DEMO_G, DEMO_H, DEMO_I, P_DEMO, DEMO_L)
```
```{r}
PAQ_F <- read_nhanes("PAQ_F") |> select(SEQN, PAQ706)
PAQ_G <- read_nhanes("PAQ_G") |> select(SEQN, PAQ706)
PAQ_H <- read_nhanes("PAQ_H") |> select(SEQN, PAQ706)
PAQ_I <- read_nhanes("PAQ_I") |> select(SEQN, PAQ706)
P_PAQY <- read_nhanes("P_PAQY") |> select(SEQN, PAQ706)
PAQY_L <- read_nhanes("PAQY_L") |> select(SEQN, PAQ706)
PAQ <- bind_rows(PAQ_F, PAQ_G, PAQ_H, PAQ_I, P_PAQY, PAQY_L)
```
```{r}
One <- left_join(DEMO, PAQ, by = "SEQN") |>
mutate(
SDDSRVYR = recode_values(SDDSRVYR,
6 ~ "2009-2010",
7 ~ "2011-2012",
8 ~ "2013-2014",
9 ~ "2015-2016",
66 ~ "2017-March 2020",
12 ~ "August 2021-August 2023"),
RIAGENDR = if_else(RIAGENDR == 1, "Boys", "Girls"),
RIDAGEYR = recode_values(RIDAGEYR,
2:5 ~ "2 to 5 Years",
6:11 ~ "6 to 11 Years"),
INDFMPIR = case_when(
INDFMPIR < 1.30 ~ "Less than 130%",
INDFMPIR < 3.50 ~ "130% to 349%",
INDFMPIR >= 3.50 ~ "350% or more"),
INDFMPIR = factor(INDFMPIR,
levels = c("Less than 130%", "130% to 349%", "350% or more")),
PAQ706 = recode_values(PAQ706,
0:2 ~ "0 to 2",
3:4 ~ "3 to 4",
5:6 ~ "5 to 6",
7 ~ "7"),
survey_wt = coalesce(WTINT2YR, WTINTPRP))
```
```{r}
NHANES <- One |>
filter(!is.na(RIDAGEYR), !is.na(PAQ706)) |>
as_survey_design(id = SDMVPSU, strata = SDMVSTRA, nest = TRUE, weights = survey_wt)
```
### Overall
```{r}
#| message: false
t <- NHANES |>
group_by(SDDSRVYR, PAQ706) |>
summarize(p = survey_prop(proportion = TRUE)) |>
mutate(p = p * 100) |>
select(!p_se) |>
ungroup()
```
<h6>Days Physically Active at least 60 Minutes by Survey Cycle</h6>
```{r}
#| results: asis
t |> do_plot()
```
<details>
<summary>
Data Table
</summary>
```{r}
t |> do_table()
```
</details>
<details>
<summary>
Prevalence Ratios
</summary>
<br>
Prevalence ratios for being physically active for 7 days.
```{r}
svyglm((PAQ706 == "7") ~ SDDSRVYR,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = "Survey Period",
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>
### By Sex
```{r}
#| message: false
t <- NHANES |>
group_by(RIAGENDR, SDDSRVYR, PAQ706) |>
summarize(p = survey_prop(proportion = TRUE)) |>
mutate(p = p * 100) |>
select(!p_se) |>
ungroup()
```
<h6>Days Physically Active at least 60 Minutes by Sex & Survey Cycle</h6>
```{r}
#| results: asis
t |> do_plot(plot_ht = 300) |>
ax_facet_wrap(vars(RIAGENDR), ncol = 1)
```
<details>
<summary>
Data Table
</summary>
```{r}
t |> filter(RIAGENDR == "Boys") |>
do_table() |>
tab_header(title = "Boys")
t |> filter(RIAGENDR == "Girls") |>
do_table() |>
tab_header(title = "Girls")
```
</details>
<details>
<summary>
Prevalence Ratios
</summary>
<br>
Prevalence ratios for being physically active for 7 days.
```{r}
svyglm((PAQ706 == "7") ~ SDDSRVYR + RIAGENDR,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = replace_values(variable,
"SDDSRVYR" ~ "Survey Period",
"RIAGENDR" ~ "Sex"),
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>
### By Age Group
```{r}
#| message: false
t <- NHANES |>
group_by(RIDAGEYR, SDDSRVYR, PAQ706) |>
summarize(p = survey_prop(proportion = TRUE)) |>
mutate(p = p * 100) |>
select(!p_se) |>
ungroup()
```
<h6>Days Physically Active at least 60 Minutes by Age Group & Survey Cycle</h6>
```{r}
#| results: asis
t |> do_plot(plot_ht = 300) |>
ax_facet_wrap(vars(RIDAGEYR), ncol = 1)
```
<details>
<summary>
Data Table
</summary>
```{r}
t |> filter(RIDAGEYR == "2 to 5 Years") |>
do_table() |>
tab_header(title = "2 to 5 Years of Age")
t |> filter(RIDAGEYR == "6 to 11 Years") |>
do_table() |>
tab_header(title = "6 to 11 Years of Age")
```
</details>
<details>
<summary>
Prevalence Ratios
</summary>
<br>
Prevalence ratios for being physically active for 7 days.
```{r}
svyglm((PAQ706 == "7") ~ SDDSRVYR + RIDAGEYR,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = replace_values(variable,
"SDDSRVYR" ~ "Survey Period",
"RIDAGEYR" ~ "Age Group"),
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>
### By Family Income Level
```{r}
#| message: false
t <- NHANES |>
group_by(INDFMPIR, SDDSRVYR, PAQ706) |>
summarize(p = survey_prop(proportion = TRUE)) |>
mutate(p = p * 100) |>
select(!p_se) |>
ungroup()
```
<h6>Days Physically Active at least 60 Minutes by Family Income Level & Survey Cycle</h6>
```{r}
#| results: asis
t |> do_plot(plot_ht = 200) |>
ax_facet_wrap(vars(INDFMPIR), ncol = 1)
```
<details>
<summary>
Data Table
</summary>
```{r}
t |> filter(INDFMPIR == "Less than 130%") |>
do_table() |>
tab_header(title = "Family Income Level: Less than 130%")
t |> filter(INDFMPIR == "130% to 349%") |>
do_table() |>
tab_header(title = "Family Income Level: 130% to 349%")
t |> filter(INDFMPIR == "350% or more") |>
do_table() |>
tab_header(title = "Family Income Level: 350% or more")
```
</details>
<details>
<summary>
Prevalence Ratios
</summary>
<br>
Prevalence ratios for being physically active for 7 days.
```{r}
svyglm((PAQ706 == "7") ~ SDDSRVYR + INDFMPIR,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = replace_values(variable,
"SDDSRVYR" ~ "Survey Period",
"INDFMPIR" ~ "Family Income Level"),
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>
### Final Model
Prevalence ratios for being physically active for 7 days.
```{r}
svyglm((PAQ706 == "7") ~ SDDSRVYR + RIAGENDR + RIDAGEYR + INDFMPIR,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = replace_values(variable,
"SDDSRVYR" ~ "Survey Period",
"RIAGENDR" ~ "Sex",
"RIDAGEYR" ~ "Age Group",
"INDFMPIR" ~ "Family Income Level"),
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```