pacman::p_load(gt, haven, srvyr, survey, tidyverse,plotly )

P_DEMO <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_DEMO.XPT") %>%
  filter(RIDAGEYR >= 18) %>%
  select(SEQN, RIAGENDR, RIDAGEYR, RIDRETH3, INDFMPIR, SDMVPSU, SDMVSTRA, WTMECPRP,  DMDEDUC2) %>%
  mutate(
    RIAGENDR = case_when(RIAGENDR == 1 ~ "Men",
      RIAGENDR == 2 ~ "Women"),
    RIDAGEYR = case_when(RIDAGEYR %in% 18:29 ~ "18-29",
      RIDAGEYR %in% 30:44 ~ "30-44",
      RIDAGEYR %in% 45:64 ~ "45-64",
      RIDAGEYR %in% 65:79 ~ "65-79",
      RIDAGEYR == 80 ~ "80 or more"),
    RIDRETH3 = case_when(RIDRETH3 %in% 1:2 ~ "Hispanic",
      RIDRETH3 == 3 ~ "Non-Hispanic White",
      RIDRETH3 == 4 ~ "Non-Hisptanic Black",
      RIDRETH3 == 6 ~ "Non-Hispanic Asian",
      RIDRETH3 == 7 ~ "Other Race"),
    INDFMPIR = case_when(INDFMPIR < 1 ~ "<100%",
      INDFMPIR < 2 ~ "100-199%",
      INDFMPIR < 4 ~ "200-399%",
      INDFMPIR >= 4 ~ "≥400%"),
    INDFMPIR = factor(INDFMPIR, levels = c("<100%", "100-199%", "200-399%", "≥400%")),
    DMDEDUC2 = case_when(DMDEDUC2 %in% 1:2 ~ "Less than high school",
      DMDEDUC2 == 3 ~ "High school",
      DMDEDUC2 == 4 ~ "Some college",
      DMDEDUC2 == 5 ~ "College or more")
  )
P_BPXO <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_BPXO.XPT") %>%
  mutate(
    systolic = rowMeans(cbind(BPXOSY1, BPXOSY2, BPXOSY3), na.rm = TRUE),
    diastolic = rowMeans(cbind(BPXODI1, BPXODI2, BPXODI3), na.rm = TRUE),
    stage2 = case_when(systolic >= 140 | diastolic >= 90 ~ TRUE,
      !is.na(systolic) & !is.na(diastolic) ~ FALSE)
  ) %>%
  select(SEQN, systolic, diastolic, stage2)

One <- left_join(P_DEMO, P_BPXO, by = "SEQN")
P_BMX <- read_xpt("https://wwwn.cdc.gov/Nchs/Nhanes/2017-2018/P_BMX.XPT") %>%
  select(SEQN, BMXBMI, BMXWAIST)

One <- left_join(One,P_BMX, by = "SEQN")

##Define the survey design.

NHANES <- One %>%
  filter(!is.na(stage2)) %>%
  as_survey_design(id = SDMVPSU, strata = SDMVSTRA, nest = TRUE, weight = WTMECPRP)
c <- svyby(~stage2, "Total", NHANES, unwtd.count, keep.names = FALSE)
p <- svyby(~stage2, "Total", NHANES, svyciprop, vartype = "ci", method = "beta", keep.names = FALSE)
t1 <- inner_join(c, p)
## Joining, by = "by"
c <- svyby(~stage2, ~RIAGENDR, NHANES, unwtd.count, keep.names = FALSE)
p <- svyby(~stage2, ~RIAGENDR, NHANES, svyciprop, vartype = "ci", method = "beta", keep.names = FALSE)
t2 <- inner_join(c, p) %>% rename(by = RIAGENDR)
## Joining, by = "RIAGENDR"
t <- bind_rows(t1, t2) %>%
  mutate(
    by = factor(by, levels = c("Total", "Men", "Women")),
    across(stage2:ci_u, ~ .x * 100)
  ) %>%
  select(-se)

plot <- ggplot(t, aes(x = by, y = stage2, fill = by)) +
  geom_col(alpha = 0.8, color = "#000000") +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), size = 1, width = 0.2) +
  scale_fill_brewer(palette = "Blues") +
  guides(fill = "none") +
  labs(title = "Prevalence of Stage 2 Hypertension in Adults",
    subtitle = "Total and By Gender",
    x = element_blank(),
    y = "Prevalence") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    text = element_text(size = 14))


plot <- ggplotly(plot) 
plot
chisq2 <- chisq.test(One$RIAGENDR,One$stage2)
chisq2
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  One$RIAGENDR and One$stage2
## X-squared = 2.8231, df = 1, p-value = 0.09292
# chisq <- table(One$RIAGENDR,One$stage2)
# print(chisq)
c <- svyby(~stage2, ~RIDAGEYR, NHANES, unwtd.count, keep.names = FALSE)
p <- svyby(~stage2, ~RIDAGEYR, NHANES, svyciprop, vartype = "ci", method = "beta", keep.names = FALSE)
t <- inner_join(c, p) %>% rename(by = RIDAGEYR) %>%
  mutate(across(stage2:ci_u, ~ .x * 100)) %>%
  select(-se)
## Joining, by = "RIDAGEYR"
plot <- ggplot(t, aes(x = by, y = stage2, fill = by)) +
  geom_col(alpha = 0.8, color = "#000000") +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), size = 1, width = 0.2) +
  scale_fill_brewer(palette = "Blues") +
  guides(fill = "none") +
  labs(title = "Prevalence of Stage 2 Hypertension in Adults",
    subtitle = "By Age Group",
    x = element_blank(),
    y = "Prevalence") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    text = element_text(size = 14))

plot <- ggplotly(plot) 
plot
chisq2 <- chisq.test(One$RIDAGEYR,One$stage2)
chisq2
## 
##  Pearson's Chi-squared test
## 
## data:  One$RIDAGEYR and One$stage2
## X-squared = 822.88, df = 4, p-value < 2.2e-16
c <- svyby(~stage2, ~RIDRETH3, NHANES, unwtd.count, keep.names = FALSE)
p <- svyby(~stage2, ~RIDRETH3, NHANES, svyciprop, vartype = "ci", method = "beta", keep.names = FALSE)
t <- inner_join(c, p) %>% rename(by = RIDRETH3) %>%
  filter(by != "Other Race") %>%
  mutate(across(stage2:ci_u, ~ .x * 100)) %>%
  select(-se)
## Joining, by = "RIDRETH3"
plot <- ggplot(t, aes(x = by, y = stage2, fill = by)) +
  geom_col(alpha = 0.8, color = "#000000") +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), size = 1, width = 0.2) +
  scale_fill_brewer(palette = "Blues") +
  guides(fill = "none") +
  labs(title = "Prevalence of Stage 2 Hypertension in Adults",
    subtitle = "By Hispanic Status & Race",
    x = element_blank(),
    y = "Prevalence") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    text = element_text(size = 14, angle=90,hjust=1,vjust=0.5))

plot <- ggplotly(plot) 
plot
chisq2 <- chisq.test(One$RIDRETH3,One$stage2)
chisq2
## 
##  Pearson's Chi-squared test
## 
## data:  One$RIDRETH3 and One$stage2
## X-squared = 127.44, df = 4, p-value < 2.2e-16
c <- svyby(~stage2, ~DMDEDUC2, NHANES, unwtd.count, keep.names = FALSE)
p <- svyby(~stage2, ~DMDEDUC2, NHANES, svyciprop, vartype = "ci", method = "beta", keep.names = FALSE)
t <- inner_join(c, p) %>% rename(by = DMDEDUC2) %>%
  mutate(across(stage2:ci_u, ~ .x * 100)) %>%
  select(-se)
## Joining, by = "DMDEDUC2"
plot <- ggplot(t, aes(x = by, y = stage2, fill = by)) +
  geom_col(alpha = 0.8, color = "#000000") +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_u), size = 1, width = 0.2) +
  scale_fill_brewer(palette = "Blues") +
  guides(fill = "none") +
  labs(title = "Prevalence of Stage 2 Hypertension in Adults",
    subtitle = "By Education",
    x = element_blank(),
    y = "Prevalence") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    text = element_text(size = 14,angle=90,hjust=1,vjust=0.5))

plot <- ggplotly(plot) 
plot
chisq2 <- chisq.test(One$DMDEDUC2,One$stage2)
chisq2
## 
##  Pearson's Chi-squared test
## 
## data:  One$DMDEDUC2 and One$stage2
## X-squared = 69.619, df = 3, p-value = 5.149e-15