---
title: Depression in US Population 18 Years of Age and Older
subtitle: Using R with NHANES
author: Cubicle 3253
format:
html:
code-tools: true
echo: false
embed-resources: true
grid:
sidebar-width: 50px
body-width: 1700px
margin-width: 50px
linestretch: 1.1
---
<p>
Starting with the 2005-2006 survey, depression was measured in the National Health and
Nutrition Examination Survey (NHANES) using the Patient Health Questionnaire (PHQ-9),
a nine-item screening instrument that asks questions about the frequency of depression
symptoms over the past 2 weeks
(<a href="https://wwwn.cdc.gov/nchs/data/nhanes/public/2005/questionnaires/mi_dpq_d.pdf">1</a>).
</p><p>
This document examines depression, in adults, for the available survey periods.
The combined 2017-March 2020 data was used as it's the only public source for the 2019-March 2020 data.
The data was examined by survey period, sex, age group, family income level and self-reported health.
The score, from the PHQ-9, was categorized into the following depressive symptom groups:
None (0 to 4), Mild (5 to 9), Moderate (10 to 14) and Moderately Severe to Severe (15 to 27).
</p>
```{r}
#| message: false
library(apexcharter)
library(broom.helpers)
library(dplyr)
library(haven)
library(gt)
library(RColorBrewer)
library(srvyr)
library(stringr)
library(survey)
library(tidyr)
read_nhanes <- function(xpt_name) {
xpt_name <- toupper(xpt_name)
if(substr(xpt_name, 1, 2) == "P_") {
begin_year = "2017"
} else {
begin_year = switch(substr(xpt_name, nchar(xpt_name) - 1, nchar(xpt_name)),
"_L" = "2021",
"_J" = "2017",
"_I" = "2015",
"_H" = "2013",
"_G" = "2011",
"_F" = "2009",
"_E" = "2007",
"_D" = "2005",
"_C" = "2003",
"_B" = "2001",
"1999")
}
xpt_url <- paste0("https://wwwn.cdc.gov/nchs/data/nhanes/public/", begin_year, "/datafiles/", xpt_name, ".xpt")
try(read_xpt(xpt_url), silent = TRUE)
}
do_summary <- function(grp_var) {
NHANES |>
filter(!is.na({{grp_var}})) |>
group_by({{grp_var}}, dep_symptoms) |>
summarize(
n = n(),
p = survey_prop(vartype = NULL) * 100)
}
do_wider <- function(grp_var, grp_name) {
pivot_wider(t_p, names_from = dep_symptoms, values_from = c(n, p)) |>
mutate(
variable = grp_name,
n = rowSums(across(starts_with("n")))) |>
select(variable, label = {{grp_var}}, n, starts_with("p")) |>
rename_with(~ str_sub(.x, 3), starts_with("p"))
}
do_plot <- function(df, x_var) {
df |>
apex(aes(x = {{x_var}}, y = p, fill = dep_symptoms), type = "bar", height = 480) |>
ax_colors(rev(brewer.pal(4, "RdYlBu"))) |>
ax_xaxis(title = list(text = "Percent", style = list(fontSize = "14px")),
labels = list(style = list(fontSize = "14px")),
min = 0, max = 100, stepSize = 10) |>
ax_yaxis(labels = list(style = list(fontSize = "14px"))) |>
ax_tooltip(y = list(formatter = format_num(".1f", suffix = "%"))) |>
ax_chart(stacked = TRUE) |>
ax_plotOptions(bar = bar_opts(horizontal = TRUE, barHeight = "85%"))
}
```
```{r}
demo_vars <- c("SEQN", "SDDSRVYR", "RIAGENDR", "RIDAGEYR", "INDFMPIR",
"SDMVSTRA", "SDMVPSU", "WTMEC2YR", "WTMECPRP")
DEMO_D <- read_nhanes("DEMO_D") |> select(any_of(demo_vars))
DEMO_E <- read_nhanes("DEMO_E") |> select(any_of(demo_vars))
DEMO_F <- read_nhanes("DEMO_F") |> select(any_of(demo_vars))
DEMO_G <- read_nhanes("DEMO_G") |> select(any_of(demo_vars))
DEMO_H <- read_nhanes("DEMO_H") |> select(any_of(demo_vars))
DEMO_I <- read_nhanes("DEMO_I") |> select(any_of(demo_vars))
P_DEMO <- read_nhanes("P_DEMO") |> select(any_of(demo_vars))
DEMO_L <- read_nhanes("DEMO_L") |> select(any_of(demo_vars))
DEMO <- bind_rows(DEMO_D, DEMO_E, DEMO_F, DEMO_G, DEMO_H, DEMO_I, P_DEMO, DEMO_L)
```
```{r}
HUQ_D <- read_nhanes("HUQ_D") |> select(SEQN, HUQ010)
HUQ_E <- read_nhanes("HUQ_E") |> select(SEQN, HUQ010)
HUQ_F <- read_nhanes("HUQ_F") |> select(SEQN, HUQ010)
HUQ_G <- read_nhanes("HUQ_G") |> select(SEQN, HUQ010)
HUQ_H <- read_nhanes("HUQ_H") |> select(SEQN, HUQ010)
HUQ_I <- read_nhanes("HUQ_I") |> select(SEQN, HUQ010)
P_HUQ <- read_nhanes("P_HUQ") |> select(SEQN, HUQ010)
HUQ_L <- read_nhanes("HUQ_L") |> select(SEQN, HUQ010)
HUQ <- bind_rows(HUQ_D, HUQ_E, HUQ_F, HUQ_G, HUQ_H, HUQ_I, P_HUQ, HUQ_L)
```
```{r}
DPQ_D <- read_nhanes("DPQ_D")
DPQ_E <- read_nhanes("DPQ_E")
DPQ_F <- read_nhanes("DPQ_F")
DPQ_G <- read_nhanes("DPQ_G")
DPQ_H <- read_nhanes("DPQ_H")
DPQ_I <- read_nhanes("DPQ_I")
P_DPQ <- read_nhanes("P_DPQ")
DPQ_L <- read_nhanes("DPQ_L")
DPQ <- bind_rows(DPQ_D, DPQ_E, DPQ_F, DPQ_G, DPQ_H, DPQ_I, P_DPQ, DPQ_L)
```
```{r}
One <- DEMO |>
left_join(HUQ, by = "SEQN") |>
left_join(DPQ, by = "SEQN") |>
mutate(
## See: https://wwwn.cdc.gov/nchs/nhanes/continuousnhanes/overviewbrief.aspx?Cycle=2017-2020 ##
survey_wt = if_else(SDDSRVYR != 66, WTMEC2YR * 2/17.2, WTMECPRP * 3.2/17.2),
SDDSRVYR = recode_values(SDDSRVYR,
4 ~ "2005-2006",
5 ~ "2007-2008",
6 ~ "2009-2010",
7 ~ "2011-2012",
8 ~ "2013-2014",
9 ~ "2015-2016",
66 ~ "2017-March 2020",
12 ~ "August 2021-August 2023"),
RIAGENDR = if_else(RIAGENDR == 1, "Males", "Females"),
RIDAGEYR = recode_values(RIDAGEYR,
18:34 ~ "18 to 34",
35:49 ~ "35 to 49",
50:64 ~ "50 to 64",
65:80 ~ "65 or more"),
INDFMPIR = case_when(
INDFMPIR < 1 ~ "Less than 100%",
INDFMPIR < 2 ~ "100% to 199%",
INDFMPIR < 4 ~ "200% to 399%",
INDFMPIR >= 4 ~ "400% or more"),
INDFMPIR = factor(INDFMPIR,
levels = c("Less than 100%", "100% to 199%", "200% to 399%", "400% or more")),
HUQ010 = recode_values(HUQ010,
1 ~ "Excellent",
2 ~ "Very Good",
3 ~ "Good",
4 ~ "Fair",
5 ~ "Poor"),
HUQ010 = factor(HUQ010,
levels = c("Excellent", "Very Good", "Good", "Fair", "Poor")),
across(DPQ010:DPQ090, ~ if_else(.x >= 7, NA_real_, .x)),
score = rowSums(cbind(DPQ010, DPQ020, DPQ030, DPQ040, DPQ050, DPQ060,
DPQ070, DPQ080, DPQ090)),
dep_symptoms = recode_values(score,
0:4 ~ "None",
5:9 ~ "Mild",
10:14 ~ "Moderate",
15:27 ~ "Moderately Severe to Severe"),
dep_symptoms = factor(dep_symptoms,
levels = c("None", "Mild", "Moderate", "Moderately Severe to Severe")),
depression = if_else(score >= 10, TRUE, FALSE))
```
```{r}
NHANES <- One |>
filter(!is.na(RIDAGEYR), !is.na(dep_symptoms)) |>
as_survey_design(id = SDMVPSU, strata = SDMVSTRA, nest = TRUE, weights = survey_wt)
```
::: {.panel-tabset}
### Survey Period
```{r}
#| results: asis
t_p <- do_summary(SDDSRVYR)
t1 <- do_wider(SDDSRVYR, "Survey Period")
t_p |> do_plot(SDDSRVYR)
```
### Sex
```{r}
#| results: asis
t_p <- do_summary(RIAGENDR)
t2 <- do_wider(RIAGENDR, "Sex")
t_p |> do_plot(RIAGENDR)
```
### Age Group
```{r}
#| results: asis
t_p <- do_summary(RIDAGEYR)
t3 <- do_wider(RIDAGEYR, "Age Group")
t_p |> do_plot(RIDAGEYR)
```
### Family Income Level
```{r}
#| results: asis
t_p <- do_summary(INDFMPIR)
t4 <- do_wider(INDFMPIR, "Family Income Level")
t_p |> do_plot(INDFMPIR)
```
### Self-Reported Health
```{r}
#| results: asis
t_p <- do_summary(HUQ010)
t5 <- do_wider(HUQ010, "Self-Reported Health")
t_p |> do_plot(HUQ010)
```
:::
<details>
<summary>
Data Table
</summary>
```{r}
bind_rows(t1, t2, t3, t4, t5) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stub_indent(rows = everything(), indent = 3) |>
tab_spanner(columns = 4:7, label = "Depressive Symptoms") |>
fmt_integer(columns = 3) |>
fmt_percent(columns = 4:7, decimals = 1, scale_values = FALSE) |>
cols_align(columns = 3:7, align = "center") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>
<details>
<summary>
Prevalence Ratios
</summary>
```{r}
svyglm(depression ~ SDDSRVYR + RIAGENDR + RIDAGEYR + INDFMPIR + HUQ010,
NHANES, family = quasipoisson()) |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_remove_intercept() |>
tidy_add_reference_rows() |>
tidy_add_term_labels() |>
mutate(
variable = replace_values(variable,
"SDDSRVYR" ~ "Survey Period",
"RIAGENDR" ~ "Sex",
"RIDAGEYR" ~ "Age Group",
"INDFMPIR" ~ "Family Income Level",
"HUQ010" ~ "Self-Reported Health"),
pr = if_else(!is.na(estimate),
sprintf("%.2f (%.2f, %.2f)", estimate, conf.low, conf.high), "Ref.")) |>
select(variable, label, pr) |>
gt(groupname_col = "variable", rowname_col = "label") |>
tab_stubhead(label = "") |>
cols_label(pr = "Prevalence Ratio (95% CI)") |>
tab_stub_indent(rows = everything(), indent = 3) |>
cols_align(columns = 3, align = "center") |>
tab_footnote("Note: Prevalence ratio for moderate to severe depression (PHQ-9 score of 10 or more)") |>
tab_options(table.align = "left",
data_row.padding = px(2),
row_group.padding = px(2),
table.font.size = 16)
```
</details>