1. Data Cleaning

1-1. Import Packages

library(haven)
library(dplyr)
library(lubridate)
library(kableExtra)
library(DiagrammeR)
library(ggplot2)
library(survey)
library(reshape2)

1-2. Import Datasets

dta_diet <- read_sas("nahsit2005.sas7bdat")
dta_nutri_all <- read_sas("data_SAS/data5.sas7bdat")
dta_demo <- read_sas("data_SAS/data3.sas7bdat")
dta_body_all <- read_sas("data_SAS/data4.sas7bdat")
dta_stra <- read_sas("data_SAS/data6.sas7bdat")

1-3. Fasting Interval Data

dta_fi <- dta_diet %>% select(id, XEATTIME, nn1) %>% 
  mutate(XEATTIME=hm(XEATTIME), 
         intvl=hour(XEATTIME)*60+minute(XEATTIME)) %>% 
  filter(nn1 > 5) %>% 
  group_by(id) %>% 
  summarise(fi=(max(intvl)-min(intvl))/60)

1-4. Metabolic Syndrome Data

dta_body <- dta_body_all %>% mutate(age=as.numeric(age)) %>% filter(age>=19) %>% 
  select(age, sex, glucose, tg, height, weight, hdl, 
         starts_with("sbp"), starts_with("dbp"), ht_know, waist, ewt) %>% 
  mutate_if(is.character, as.numeric) %>% 
  cbind(dta_body_all %>% mutate(age=as.numeric(age)) %>% 
          filter(age>=19) %>% select(id), .) %>% tibble::as_tibble() %>% 
  mutate(gndr=factor(sex, labels=c("Male", "Female")), 
         ht_know=ifelse(ht_know==2 | is.na(ht_know), 0, 1)) %>% 
  select(1:2, gndr, 4:19) %>% arrange(id)

Blood Pressure

  • 收縮壓≧130mmHg或舒張壓≧85mmHg或服用高血壓藥物
tbl_bp <- dta_body %>%left_join(., dta_demo, by="id") %>% 
  select(id, starts_with("sbp"), starts_with("dbp"), ea17d) %>%
  mutate(n_sbp=rowSums(!is.na(select(., starts_with("sbp")))),
         n_dbp=rowSums(!is.na(select(., starts_with("dbp"))))) %>%
  mutate_at(vars(starts_with("sbp"), starts_with("dbp")), 
            list(~ na_if(., 0))) %>%
  mutate(mean_sbp=rowMeans(select(., starts_with("sbp")), na.rm=T),
         mean_dbp=rowMeans(select(., starts_with("dbp")), na.rm=T),
         if_htn=case_when(mean_sbp>=130 | mean_dbp>=85 | ea17d=="1" ~ 1,
                          n_sbp>0 & n_dbp>0 ~ 0)) %>%
  select(id, if_htn)

Triglycerides

  • ≧ 150mg/dL
tbl_tg <- dta_body %>% select(id, tg) %>%
  mutate(if_tg=ifelse(tg>=150, 1, 0)) %>%
  select(id, if_tg)

HDL-cholesterol

  • 男性 < 40mg/dL、女性 < 50mg/dL
tbl_hdl <- dta_body %>% select(id, gndr, hdl) %>% 
  mutate(if_hdl=ifelse(gndr=="Male" & hdl<40 | gndr=="Female" & hdl<50, 1, 0)) %>%
  select(id, if_hdl)

Glucose

  • ≧ 100mg/dL或有在服用糖尿病藥物
tbl_glu <- dta_body %>% left_join(., dta_demo, by="id") %>% 
  select(id, glucose, ea20d) %>%
  mutate(if_glu=ifelse(glucose>=100 | ea20d=="1", 1, 0)) %>%
  select(id, if_glu)

Waist Circumference

  • 男性 ≧ 90cm、女性 ≧ 80cm
tbl_waist <- dta_body %>% select(id, gndr, waist) %>%
  mutate(if_waist=ifelse(gndr=="Male" & waist>=90 | gndr=="Female" & waist>=80, 1, 0)) %>%
  select(id, if_waist)

MetS or not

tbl_mets <- 
  plyr::join_all(list(tbl_bp,tbl_tg,tbl_hdl, tbl_glu, tbl_waist),
                 by="id", type="left") %>% 
  tibble::as_tibble() %>% 
  mutate(n_na=rowSums(is.na(.)) %>% as.integer(), 
         n_con=rowSums(select(., 2:6), na.rm=T) %>% as.integer()) %>%  
  filter(n_na < 3) %>% 
  mutate(mets_l=factor(ifelse(n_con>=3, 1, 2), labels=c("Yes", "No")), 
         mets_s=factor(case_when(n_con>=3 ~ 1, 
                                 n_con<3 & n_na==0 ~ 2), 
                       labels=c("Yes", "No"))) %>% 
  select(id, n_na, mets_l, mets_s) %>% arrange(id)

1-5. BMI data

≧ 24 kg/m2

tbl_bmi <- dta_body %>% select(id, height, weight) %>% 
  mutate(bmi=weight/((height/100)^2), 
         if_owob=factor(ifelse(bmi>=24, 1, 2), labels=c("Yes", "No"))) %>% 
  select(id, if_owob) %>% arrange(id)

dta_all <- inner_join(dta_fi, dta_demo %>% select(id, age) %>% 
             mutate(age=as.numeric(age)), by="id") %>% 
  left_join(., tbl_mets, by="id") %>% 
  left_join(., tbl_bmi, by="id")

1-6. Demograohic data

dta <- inner_join(dta_all, dta_demo %>% select(id, sex, aa02, aa19), by="id") %>% 
  left_join(., dta_diet %>% group_by(id) %>% 
              summarise(stra=unique(stra), qwt=max(qwt)), by="id") %>% 
  mutate(age=as.numeric(age), 
         sex=factor(sex, labels=c("Male", "Female")), 
         edu=factor(case_when(aa02 %in% c(1:5,16) ~ 1, 
                              aa02 %in% c(6,7,9) ~ 2, 
                              aa02 %in% c(8,10,11,13) ~ 3, 
                              aa02 %in% c(12,14) ~ 4, 
                              aa02 == 15 ~ 5), 
                    labels=c("國中以下", "高中以下", "大學以下", "大學", "研究所以上")), 
         aa19=ifelse(as.numeric(aa19)==8888|as.numeric(aa19)==9999, NA, aa19), 
         income_status=factor(aa19, labels=c("足夠且有多餘", "剛好足夠", 
                                             "有些困難", "很困難")), 
         one=1) %>% 
  select(-aa02, -n_na, -aa19)

1-7. Store NHANES Survey Design

nahsit <- svydesign(data=dta, ids=~0, strata=~stra, weights=~qwt, nest=T)

1-8. Grouping

q_wt <- svyquantile(~fi, nahsit, c(.25,.5,.75))
dta <- dta %>% 
  mutate(fi_grp=factor(cut(dta$fi, breaks=c(0, q_wt$fi[,1], 24), include.lowest=T), 
                       labels=c("[0, 10]", "(10, 11.5]", "(11.5, 13.5]", "(13.5, 24]")))
nahsit <- svydesign(data=dta, ids=~0, strata=~stra, weights=~qwt, nest=T)

2. Sample Size

2-1. Flowchart

flowchart <- grViz("digraph flowchart1 {
  graph [overlap=true]
  
  node [shape=rectangle, fixedsize=true, width=5, height=1, fontsize=18]        
  s1 [label='2005-2008 NAHSIT sample (>= 19 y)\n(n=4665)']
  s2 [label='Have dietary recall data\n(n=2908)']
  
  node [shape=rectangle, fixedsize=true, width=4, height=1, fontsize=18]
  s3 [label='Have MetS data\n(n=1664)']
  s4 [label='Have BMI data\n(n=1652)']
  
  s1 -> s2 [label='Missing (n=1757)']
  s2 -> s3 [label='Missing\n(n=1244)']
  s2 -> s4 [label='Missing\n(n=1256)']
  
  }")
flowchart

2-2. Tables

MetS

met_l_per <- svymean(~mets_l, nahsit, na.rm=T) %>% as.data.frame()
met_l_per <- met_l_per*100
met_s_per <- svymean(~mets_s, nahsit, na.rm=T) %>% as.data.frame()
met_s_per <- met_s_per*100
mets_table <- rbind(cbind(met_l_per[1, ], met_l_per[2, ]), cbind(met_s_per[1, ], met_s_per[2, ]))
colnames(mets_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(mets_table) <- c("Not Strict (n=1664)", "Strict (n=1640)")
mets_table %>% 
  kbl(digits=2, col.names=rep(c("Percentage (%)", "SE"), 2)) %>% 
  kable_classic_2() %>% 
  add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>% 
  kable_styling(full_width=F)
Yes
No
Percentage (%) SE Percentage (%) SE
Not Strict (n=1664) 22.99 1.24 77.01 1.24
Strict (n=1640) 23.24 1.25 76.76 1.25

OWOB

owob_per <- svymean(~if_owob, nahsit, na.rm=T) %>% as.data.frame()
owob_per <- owob_per*100
owob_table <- cbind(owob_per[1,], owob_per[2,])
colnames(owob_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(owob_table) <- c("OWOB (n=1652)")
owob_table %>% 
  kbl(digits=2, col.names=rep(c("Percentage", "SE"), 2)) %>% 
  kable_classic_2() %>% 
  add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>% 
  kable_styling(full_width=F)
Yes
No
Percentage SE Percentage SE
OWOB (n=1652) 44.27 1.57 55.73 1.57

2-3. Histogram of Fasting Interval

svyhist(~fi, nahsit, breaks=seq(0, 24, 1), 
        xlim=c(0, 25), ylim=c(0, .20), 
        main="Histogram of Fasting Interval", xlab="Fasting Interval (hr(s))")

2-4. Bar Charts

MetS

mets_table[c(1,3)] %>% melt() %>% cbind(grp=rep(c("Not Strict", "Strict"), 2), .) %>% 
  mutate(value=round(value, 2)) %>% 
  ggplot(aes(x=grp, y=value, fill=variable)) + 
  geom_bar(stat="identity", position="dodge") + theme_classic() + 
  labs(x="", y="Percentage", title="MetS") + 
  geom_text(aes(label=value), position=position_dodge(width=.9), vjust=-.5) + 
  theme(legend.title=element_blank()) + ylim(c(0,85)) + 
  scale_fill_grey()

BMI

owob_table[c(1, 3)] %>% melt() %>% mutate(value=round(value, 2)) %>% 
  ggplot(aes(x=variable, y=value)) + 
  geom_bar(stat="identity") + geom_text(aes(label=value), vjust=-.5) + 
  theme_classic() + ylim(c(0,85)) + 
  labs(x="OWOB", y="Percentage", title="OWOB")

3. Table 1

3-1. Categorical Variables

  • p-values were derived from chi-square test.

3-1-1. data

Gender

sex_grp_table <- svyby(~sex, ~fi_grp, nahsit, svymean, na.rm=T)
sex_grp_table <- t(sex_grp_table[2:5]*100)
sex_total_table <- t(svyby(~sex, ~one, nahsit, svymean, na.rm=T)*100)
sex_grp_table <- 
  cbind("Total_p"=sex_total_table[c(2,3)], "Total_se"=sex_total_table[c(4,5)], 
        "G1_p"=sex_grp_table[c(1,2), 1], "G1_se"=sex_grp_table[c(3,4), 1], 
        "G2_p"=sex_grp_table[c(1,2), 2], "G2_se"=sex_grp_table[c(3,4), 2], 
        "G3_p"=sex_grp_table[c(1,2), 3], "G3_se"=sex_grp_table[c(3,4), 3], 
        "G4_p"=sex_grp_table[c(1,2), 4], "G4_se"=sex_grp_table[c(3,4), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=c("Male", "Female")) %>% select(11, 1:10)

Education Level

edu_grp_table <- svyby(~edu, ~fi_grp, nahsit, svymean, na.rm=T)
edu_grp_table <- t(edu_grp_table[2:11]*100)
edu_total_table <- t(svyby(~edu, ~one, nahsit, svymean, na.rm=T)*100)
edu_grp_table <- 
  cbind("Total_p"=edu_total_table[c(2:6)], "Total_se"=edu_total_table[c(7:11)], 
        "G1_p"=edu_grp_table[c(1:5), 1], "G1_se"=edu_grp_table[c(6:10), 1], 
        "G2_p"=edu_grp_table[c(1:5), 2], "G2_se"=edu_grp_table[c(6:10), 2], 
        "G3_p"=edu_grp_table[c(1:5), 3], "G3_se"=edu_grp_table[c(6:10), 3], 
        "G4_p"=edu_grp_table[c(1:5), 4], "G4_se"=edu_grp_table[c(6:10), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=dta$edu %>% levels()) %>% select(11, 1:10)

Income Status

income_status_grp_table <- svyby(~income_status, ~fi_grp, nahsit, svymean, na.rm=T)
income_status_grp_table <- t(income_status_grp_table[2:9]*100)
income_status_total_table <- t(svyby(~income_status, ~one, nahsit, svymean, na.rm=T)*100)
income_status_grp_table <- 
  cbind("Total_p"=income_status_total_table[c(2:5)], "Total_se"=income_status_total_table[c(6:9)], 
        "G1_p"=income_status_grp_table[c(1:4), 1], "G1_se"=income_status_grp_table[c(5:8), 1], 
        "G2_p"=income_status_grp_table[c(1:4), 2], "G2_se"=income_status_grp_table[c(5:8), 2], 
        "G3_p"=income_status_grp_table[c(1:4), 3], "G3_se"=income_status_grp_table[c(5:8), 3], 
        "G4_p"=income_status_grp_table[c(1:4), 4], "G4_se"=income_status_grp_table[c(5:8), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=dta$income_status %>% levels()) %>% select(11, 1:10)

3-1-2. Export table

chr_smry <- rbind(sex_grp_table, edu_grp_table, income_status_grp_table)

sex_chi <- svychisq(~fi_grp+sex, nahsit, na.rm=T)
edu_chi <- svychisq(~fi_grp+edu, nahsit, na.rm=T)
is_chi <- svychisq(~fi_grp+income_status, nahsit, na.rm=T)

chr_smry <- chr_smry %>% 
  mutate("p-value"=c(scales::pvalue(sex_chi$p.value), rep(NA, 1),
                     scales::pvalue(edu_chi$p.value), rep(NA, 4),
                     scales::pvalue(is_chi$p.value), rep(NA, 3)))

opts <- options(knitr.kable.NA="")
chr_smry %>%
  kbl(digits=2,
      col.names=c("Variable", rep(c("Percentage (%)", "se"), 5), "p-value")) %>%
  column_spec(c(3, 5, 7, 9), color="gray") %>%
  add_header_above(header=c(" "=1, "Total"=2, "0 <= x <= 10"=2, "10 < x <= 11.5"=2,
                            "11.5 < x <= 13.5"=2, "13.5 < x <= 24"=2, " "=1)) %>%
  add_header_above(header=c(" "=3, "Fasting Interval (hr(s))"=8, " "=1)) %>%
  pack_rows(index=c("Gender (n=2908)"=2, "Education Level (n=2908)"=5, 
                    "Income Status (n=2817)"=4)) %>%
  kable_classic_2() %>% kable_styling(full_width=F)
Fasting Interval (hr(s))
Total
0 <= x <= 10
10 < x <= 11.5
11.5 < x <= 13.5
13.5 < x <= 24
Variable Percentage (%) se Percentage (%) se Percentage (%) se Percentage (%) se Percentage (%) se p-value
Gender (n=2908)
Male 50.22 1.20 43.64 2.30 46.40 2.30 49.28 2.58 62.35 2.35 <0.001
Female 49.78 1.20 56.36 2.30 53.60 2.30 50.72 2.58 37.65 2.35
Education Level (n=2908)
國中以下 22.30 0.84 22.42 1.70 28.05 1.75 21.04 1.72 17.38 1.53 0.036
高中以下 14.36 0.86 14.67 1.68 13.91 1.65 16.50 2.04 12.66 1.57
大學以下 34.06 1.16 34.04 2.21 31.35 2.22 32.49 2.46 38.22 2.43
大學 25.02 1.10 25.25 2.11 22.46 2.09 24.04 2.30 28.20 2.28
研究所以上 4.25 0.55 3.62 0.99 4.24 1.11 5.93 1.35 3.55 1.00
Income Status (n=2817)
足夠且有多餘 13.13 0.85 12.07 1.57 13.31 1.70 13.12 1.75 14.12 1.81 0.669
剛好足夠 60.38 1.20 58.72 2.32 61.50 2.29 63.60 2.48 58.33 2.47
有些困難 22.15 1.00 25.17 2.02 20.93 1.86 19.57 2.05 22.23 2.07
很困難 4.34 0.49 4.04 0.89 4.26 0.93 3.70 0.79 5.31 1.23

3-2. Continuous Variables

  • p-values were derived from one-way ANOVA.

3-2-1. Age

age_grp_table <- svyby(~age, ~fi_grp, nahsit, svymean, na.rm=T)
age_grp_table <- t(age_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(age_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se", 
                             "G3_mean", "G3_se", "G4_mean", "G4_se")
age_grp_table <- tibble::as_tibble(age_grp_table)
age_total_table <- svyby(~age, ~one, nahsit, svymean, na.rm=T)
age_grp_table <- age_grp_table %>% 
  mutate(total_mean=age_total_table[1,2], total_se=age_total_table[1,3]) %>% 
  select(9,10,1:8)
aov_age <- aov(age~fi_grp, data=dta, weights=qwt) %>% summary()
age_grp_table %>% mutate("p-value"=scales::pvalue(aov_age[[1]][["Pr(>F)"]][1]), 
                         var="Age (y)", "Unweighted n"=2908, "Missing"=0) %>% 
  select(12:14, 1:11) %>% 
  kbl(digits=2, 
      col.names=c("Variable", "Unweighted n", "Missing", rep(c("mean", "se"), 5), "p-value")) %>% 
  add_header_above(header=c(" "=3, "Total"=2, "0 <= x <= 10"=2, "10 < x <= 11.5"=2,
                            "11.5 < x <= 13.5"=2, "13.5 < x <= 24"=2, " "=1)) %>%
  add_header_above(header=c(" "=5, "Fasting Interval (hr(s))"=8, " "=1)) %>%
  kable_classic_2() %>% kable_styling(full_width=F)
Fasting Interval (hr(s))
Total
0 <= x <= 10
10 < x <= 11.5
11.5 < x <= 13.5
13.5 < x <= 24
Variable Unweighted n Missing mean se mean se mean se mean se mean se p-value
Age (y) 2908 0 43.77 0.34 41.03 0.64 47.24 0.71 45.24 0.74 42.05 0.63 <0.001