pacman::p_load(gt, haven, patchwork, srvyr, survey, tidyverse,rstatix)
P_DEMO <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DEMO.XPT") %>%
select(SEQN, RIDAGEYR, SDMVPSU, SDMVSTRA, WTMECPRP)
P_BPXO <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_BPXO.XPT") %>%
mutate(
Systolic = rowMeans(cbind(BPXOSY1, BPXOSY2, BPXOSY3), na.rm = TRUE),
Diastolic = rowMeans(cbind(BPXODI1, BPXODI2, BPXODI3), na.rm = TRUE)
) %>%
select(SEQN, Systolic, Diastolic)
P_DIQ <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DIQ.XPT") %>%
mutate(
Diabetes = case_when(DIQ010 == 1 ~ "Yes",
DIQ010 %in% 2:3 ~ "No")
) %>%
select(SEQN, Diabetes)
One <- left_join(P_DEMO, P_BPXO, by = "SEQN") %>%
left_join(., P_DIQ, by = "SEQN")
g1 <- ggplot(One %>% filter(RIDAGEYR >= 18, !is.na(Systolic), !is.na(Diabetes), WTMECPRP > 0),
aes(x = Systolic, y = Diabetes)) +
geom_boxplot(fill = "#dee8ff") +
coord_cartesian(xlim = c(80, 200)) +
scale_x_continuous(breaks = seq(80, 200, 10)) +
labs(title = "Unweighted",
x = "Systolic Blood Pressure") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size = 14))
g2 <- ggplot(One %>% filter(RIDAGEYR >= 18, !is.na(Systolic), !is.na(Diabetes), WTMECPRP > 0),
aes(x = Systolic, y = Diabetes, weight = WTMECPRP)) +
geom_boxplot(fill = "#dee8ff") +
coord_cartesian(xlim = c(80, 200)) +
scale_x_continuous(breaks = seq(80, 200, 10)) +
labs(title = "Weighted data represent the noninstitutionalized
US population",
x = "Systolic Blood Pressure") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size = 14))
g1 / g2
### A t test for difference between mean systolic BP by diabetes
status.
test <- One %>% t_test(Systolic ~ Diabetes)
options(scipen=999)
print(test)
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.s…¹
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Systolic No Yes 9144 1203 -19.0 1463. 4.68e-72 4.68e-72 ****
## # … with abbreviated variable name ¹p.adj.signif
g3 <- ggplot(One %>% filter(RIDAGEYR >= 18, !is.na(Diastolic), !is.na(Diabetes), WTMECPRP > 0),
aes(x = Systolic, y = Diabetes)) +
geom_boxplot(fill = "#dee8ff") +
coord_cartesian(xlim = c(40, 200)) +
scale_x_continuous(breaks = seq(20, 200, 10)) +
labs(title = "Unweighted",
x = "Diastolic Blood Pressure") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size = 14))
g4 <- ggplot(One %>% filter(RIDAGEYR >= 18, !is.na(Diastolic), !is.na(Diabetes), WTMECPRP > 0),
aes(x = Systolic, y = Diabetes, weight = WTMECPRP)) +
geom_boxplot(fill = "#dee8ff") +
coord_cartesian(xlim = c(40, 200)) +
scale_x_continuous(breaks = seq(40, 200, 10)) +
labs(title = "Weighted data represent the noninstitutionalized
US population",
x = "Diastolic Blood Pressure") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5),
text = element_text(size = 14))
g3 / g4
### A t test for difference between mean diastolic BP by diabetes
status.
test <- One %>% t_test(Diastolic ~ Diabetes)
options(scipen=999)
print(test)
## # A tibble: 1 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj…¹
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Diastolic No Yes 9144 1203 -5.96 1555. 3.13e-9 3.13e-9 ****
## # … with abbreviated variable name ¹p.adj.signif