1. Data Cleaning

1-1. Import Packages

library(plyr)
library(dplyr)
library(haven)
library(lubridate)
library(survey)
library(ggplot2)
library(hrbrthemes)
library(reshape2)
library(ggpubr)
library(GGally)
library(psych)
library(networkD3)
library(DiagrammeR)
library(foreign)
library(kableExtra)
library(jtools)

1-2. Import Datasets

檔案皆可在CDC NHANES 網站上取得。

DEMO <- read.xport("nhanes/DEMO_J.XPT") # 人口學問卷
DR1 <- read.xport("nhanes/DR1IFF_J.XPT") # 第一天飲食回憶
DR2 <- read.xport("nhanes/DR2IFF_J.XPT") # 第二天飲食回憶
BPX <- read.xport("nhanes/BPX_J.XPT") # 血壓紀錄
BPQ <- read.xport("nhanes/BPQ_J.XPT") # 血壓問卷
TRIGLY <- read.xport("nhanes/TRIGLY_J.XPT") # 三酸甘油酯紀錄
HDL <- read.xport("nhanes/HDL_J.XPT") # 膽固醇紀錄
DIQ <- read.xport("nhanes/DIQ_J.XPT") # 糖尿病問卷
GLU <- read.xport("nhanes/GLU_J.XPT") # 血糖紀錄
DX <- read.xport("nhanes/DXX_J.XPT") # 身體組成紀錄
BMX <- read.xport("nhanes/BMX_J.XPT") # 身體測量記錄
PAQ <- read.xport("nhanes/PAQ_J.XPT") # 活動問卷
RHQ <- read.xport("nhanes/RHQ_J.XPT") # 生產健康問卷
DRQD1 <- read.xport("nhanes/DR1TOT_J.XPT") # 飲食問卷

1-3. Fasting Interval Data

#計算第一天斷食時長
df_D1FI <- DR1 %>% 
  #選取欲使用的columns
  select(SEQN, DR1_020, DR1IKCAL) %>% 
  #大於5大卡的飲食紀錄才納入時間點的計算
  filter(DR1IKCAL > 5) %>% 
  #將相同的編號(同一個案)聚集
  group_by(SEQN) %>% 
  #計算斷食時間長度(24-(最晚時間-最早時間))
  summarise(d1=24 - as.numeric(max(DR1_020) - min(DR1_020))/3600)

#計算第二天斷食時長
df_D2FI <- DR2 %>% 
  #選取欲使用的columns
  select(SEQN, DR2_020, DR2IKCAL) %>% 
  #大於5大卡的飲食紀錄才納入時間點的計算
  filter(DR2IKCAL > 5) %>% 
  #將相同編號聚集
  group_by(SEQN) %>% 
  #計算斷食時間長度(24-(最晚時間-最早時間))
  summarise(d2=24 - as.numeric(max(DR2_020) - min(DR2_020))/3600)

#合併斷食時長資料
df_fi <- left_join(df_D1FI, df_D2FI, by="SEQN") %>% 
  #計算"一天"或"兩天平均"的斷食時間長度
  mutate(fi=ifelse(is.na(d2), d1, (d1+d2)/2), 
         #增加一個factor column紀錄資料為一天或兩天
         days=factor(ifelse(is.na(d2), 1, 2), 
                     labels=c("1 day", "2 days"))) %>% 
  #一個個案只留下一個時間長度(一天或兩天平均)
  select(-d1, -d2)

#合併斷食時長與權重
data <- df_fi %>% 
  left_join(., 
            DR1 %>% group_by(SEQN) %>% 
              summarise(wt_d1=max(WTDRD1)) %>% 
              select(SEQN, wt_d1), by="SEQN") %>% 
  select(1,4,2,3)

1-4. Metabolic Syndrome Data

Blood Pressure

收縮壓≧130mmHg或舒張壓≧85mmHg或服用高血壓藥物

df_BP <- 
  #合併人口學與血壓資料
  left_join(DEMO, BPX, by="SEQN") %>% 
  #再合併血壓相關問卷資料
  left_join(., BPQ, by="SEQN") %>%
  #選取欲使用columns
  select(SEQN, starts_with("BPXSY"), starts_with("BPXDI"), BPQ050A) %>%
  #計算收縮壓或舒張壓資料的遺漏值數量
  mutate(n_sbp=rowSums(!is.na(select(., starts_with("BPXSY")))),
         n_dbp=rowSums(!is.na(select(., starts_with("BPXDI"))))) %>%
  #將舒張壓為0者改記為遺漏值(NA)
  mutate_at(vars(starts_with("BPXDI")), list(~ na_if(., 0))) %>%
  #平均有收縮壓或舒張壓記錄的資料
  mutate(mean_sbp=rowMeans(select(., starts_with("BPXSY")), na.rm=T),
         mean_dbp=rowMeans(select(., starts_with("BPXDI")), na.rm=T),
         #將收縮壓壓≧130或舒張壓壓≧85或血壓問卷中有服用高血壓藥物者記為1
         #將無遺漏收縮壓或舒張壓遺漏且不符合上述條件者記為0
         #其餘皆記為遺漏值
         htn=case_when(mean_sbp>=130 | mean_dbp>=85 | BPQ050A==1 ~ 1,
                        n_sbp>0 & n_dbp>0 ~ 0)) %>%
  #選取個案編號與判斷結果
  select(SEQN, htn)

Triglycerides

≧1.69 mmol/L

df_TRIGLY <- left_join(DEMO, TRIGLY, by="SEQN") %>% 
  #選取欲使用columns
  select(SEQN, LBDTRSI) %>%
  #將資料數值≧1.69記為1，否則記為0
  mutate(trigly=ifelse(LBDTRSI>=1.69, 1, 0)) %>%
  #選取個案編號及判斷結果
  select(SEQN, trigly)

1-4-3. HDL-cholesterol

男性 < 1.04 mmol/L、女性 < 1.29 mmol/L

df_HDL <- left_join(DEMO, HDL, by="SEQN") %>%
  select(SEQN, RIAGENDR, LBDHDDSI) %>%
  mutate(hdl=ifelse(RIAGENDR==1 & LBDHDDSI<1.04 | RIAGENDR==2 & LBDHDDSI<1.29, 1, 0)) %>%
  select(SEQN, hdl)

Glucose

≧ 6.1 mmol/L或有服用糖尿病藥物

df_GLU <- 
  #合併人口學和血糖資料
  left_join(DEMO, GLU, by="SEQN") %>% 
  #再合併糖尿病問卷資料
  left_join(., DIQ, by="SEQN") %>% 
  #選取欲使用columns
  select(SEQN, LBDGLUSI, DIQ050) %>%
  #若數值≧6.1或問卷中有服用糖尿病藥物記為1，否則記為0
  mutate(glu=ifelse(LBDGLUSI>=6.1 | DIQ050==1, 1, 0)) %>% 
  #選取個案編號與判斷結果
  select(SEQN, glu)

Waist Circumference

男性 ≧ 102 cm、女性 ≧ 88 公分

df_WAIST <- 
  #合併人口學和身體測量資料
  left_join(DEMO, BMX, by="SEQN") %>%
  #選擇欲使用columns
  select(SEQN, RIAGENDR, BMXWAIST) %>% 
  #符合上述條件記為1，否則記為0
  mutate(waist=ifelse(RIAGENDR==1 & BMXWAIST>=102 | RIAGENDR==2 & BMXWAIST>=88, 1, 0)) %>%
  #選取個案編號與判斷結果
  select(SEQN, waist)

MetS or not

df_mets <- 
  #合併人口學和五個判斷標準資料
  join_all(list(DEMO, df_BP, df_TRIGLY, df_HDL, df_GLU, df_WAIST), 
           by="SEQN", type="left") %>% select(1, 47:51) %>% 
         #計算判斷標準資料的遺漏值數目
  mutate(n_na=rowSums(is.na(.)) %>% as.integer(), 
         #計算符合標準(1)的數量
         n_con=rowSums(select(., 2:6), na.rm=T) %>% as.integer()) %>% 
  #選取遺漏值小於3的個案
  filter(n_na < 3) %>% 
         #若符合標準的數量大於3記為1，否則為2，
         #同時將資料改為factor格式
  mutate(mets_l=factor(ifelse(n_con>=3, 1, 2), labels=c("Yes", "No")), 
         #若符合標準的數量大於3記為1，
         #數量小於3且判斷資料無遺漏值記為2，
         #同時將資料改為factor格式
         mets_s=factor(case_when(n_con>=3 ~ 1, 
                                 n_con<3 & n_na==0 ~ 2), labels=c("Yes", "No"))) %>% 
  #選擇個案編號、遺漏值數量與判斷結果
  select(SEQN, n_na, mets_l, mets_s) %>% 
  #將資料格式改為tibble
  tibble::as_tibble()

#與時間長度資料合併
data <- left_join(data, df_mets, by="SEQN")

1-5. BMI data

≧ 25 kg/m²

df_obesity <- 
  #合併人口學和身體測量資料
  left_join(DEMO, BMX, by="SEQN") %>% 
  #數值≧25記為1，否則記為2，同時將資料改為factor格式
  mutate(owob=factor(ifelse(BMXBMI>=25, 1, 2), labels=c("Yes", "No"))) %>% 
  #選擇個案編號與判斷結果
  select(SEQN, owob) %>% 
  #將資料格式改為tibble
  tibble::as_tibble()

#與先前的資料合併
data <- left_join(data, df_obesity, by="SEQN")

1-6. Body Composition Data

Fat Mass Index (FMI, kg/m²)
Lean Mass Index (LMI, kg/m²)

df_bodycom <- 
  #合併人口學、身體組成和身體測量資料
  join_all(list(DEMO, DX, BMX), by="SEQN", type="left") %>% 
  #選擇欲使用columns
  select(SEQN, DXDTOFAT, DXDTOTOT, BMXHT) %>% 
         #將單位改為kg
  mutate(total_fat=DXDTOFAT/1000, 
         lean_mass=(DXDTOTOT - DXDTOFAT)/1000, 
         #計算fat mass index
         fmi=total_fat/((BMXHT/100)^2), 
         #計算lean mass index
         lmi=lean_mass/((BMXHT/100)^2)) %>% 
  #選擇個案編號、FMI和LMI
  select(SEQN, fmi, lmi) %>% 
  #將資料格式改為tibble
  tibble::as_tibble()

#與先前資料合併
data <- left_join(data, df_bodycom, by="SEQN")

1-7. Physical Activity Data

使用CDC NHANES網站所提供的代謝當量分數進行計算

df_pa <- 
  #合併人口學和身體活動問卷資料
  left_join(DEMO, PAQ, by="SEQN") %>%
  #選擇欲使用columns
  select(SEQN, PAD615, PAD630, PAD645, PAD660, PAD675) %>%
  #將紀錄為不知道者(9999)改為遺漏值(NA)
  mutate_at(vars(starts_with("PAD")), list(~na_if(., 9999))) %>% 
  #依照代謝當量表計算各類分數
  mutate(PAD615=PAD615*8, PAD630=PAD630*4, PAD645=PAD645*4,
         PAD660=PAD660*8, PAD675=PAD675*4) %>% 
  #計算總代謝當量
  mutate(pa=rowSums(select(., starts_with("PAD")), na.rm=T)) %>% 
  #將代謝當量為0者改記為遺漏值(NA)
  mutate_at(vars(pa), list(~na_if(., 0))) %>% 
  #選取個案編號與代謝當量分數
  select(SEQN, pa) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#與先前資料合併
data <- left_join(data, df_pa, by="SEQN")

1-8. Demographics Data

df_demo <- DEMO %>% 
  #選取欲使用columns
  select(SEQN, RIDAGEYR, RIAGENDR, DMDEDUC2, RIDRETH3, #年齡、性別、教育程度、種族
         SDMVPSU, SDMVSTRA, INDFMPIR) %>% #問卷分析用參數、社經地位
         #改column名稱
  mutate(age=RIDAGEYR, 
         #改column名稱，同使改為factor格式
         gndr=factor(RIAGENDR, labels=c("Male", "Female")), 
         #將教育程度資料中紀錄為拒絕回答(7)與不知道(9)者改記為遺漏值(NA)
         DMDEDUC2=ifelse(DMDEDUC2==7 | DMDEDUC2==9, NA, DMDEDUC2), 
         #將資料改為factor格式
         edu=factor(DMDEDUC2, 
                    labels=c("less than 9th grade", 
                             "9-11th grade (includes 12th with no diploma)", 
                             "high school graduate/GED or equivalent", 
                             "some college or AA degree", 
                             "college graduate or above")), 
         race=factor(RIDRETH3, 
                     labels=c("Mexican American", "Other Hispanic", 
                              "Non-Hispanic White", "Non-Hispanic Black", 
                              "Non-Hispanic Asian", "Other")), 
         #改column名稱
         ses=INDFMPIR, psu=SDMVPSU, stra=SDMVSTRA) %>% 
  #選取調整後的columns
  select(1, 9:15) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#與先前資料合併
data <- left_join(data, df_demo, by="SEQN")

1-9. Intake Energy Data

#計算第一天攝入熱量
df_D1EI <- DR1 %>% 
  #選取欲使用的columns
  select(SEQN, DR1IKCAL) %>% 
  #將相同編號聚集
  group_by(SEQN) %>% 
  #計算第一天所攝取總熱量
  summarise(d1_totalEnergyIntake=sum(DR1IKCAL, na.rm=T)) %>% 
  #若攝入熱量為0則改記成為遺漏值(NA)
  mutate(d1_totalEnergyIntake=na_if(d1_totalEnergyIntake,0)) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#計算第二天攝入熱量
df_D2EI <- DR2 %>% 
  #選取欲使用的columns
  select(SEQN, DR2IKCAL) %>% 
  #將相同編號聚集
  group_by(SEQN) %>% 
  #計算第二天所攝取總熱量
  summarise(d2_totalEnergyIntake=sum(DR2IKCAL, na.rm=T)) %>% 
  #若攝入熱量為0則改記成遺漏值(NA)
  mutate(d2_totalEnergyIntake=na_if(d2_totalEnergyIntake,0)) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

df_energy <- 
  #合併兩天資料
  left_join(df_D1EI, df_D2EI, by="SEQN") %>% 
  #計算熱量一天或是兩天平均
  mutate(avg_engy=ifelse(is.na(d2_totalEnergyIntake), d1_totalEnergyIntake, 
                         (d2_totalEnergyIntake+d1_totalEnergyIntake)/2)) %>% 
  #選取個案編號與計算結果
  select(SEQN, avg_engy) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#與先前資料合併
data <- left_join(data, df_energy, by="SEQN")

1-10. On Special Diet or not

special_diet <- DRQD1 %>% 
  #選取欲使用columns
  select(SEQN, DRQSDIET) %>% 
         #將遺漏值或紀錄為不知道(9)者都記為遺漏值
  mutate(spcl_dt=ifelse(is.na(DRQSDIET) | DRQSDIET==9, NA, DRQSDIET), 
         #改資料為factor格式
         spcl_dt=factor(spcl_dt, labels=c("Yes", "No"))) %>% 
  #選取個案編號與整理結果
  select(SEQN, spcl_dt) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#與先前資料合併
data <- left_join(data, special_diet, by="SEQN")

1-11. Postmenopausal or not

df_menopause <- RHQ %>% 
  #將回答Menopause/Change of life(7)者記為1，其餘為2，並改為factor格式
  mutate(mp=factor(ifelse(RHD043==7, 1, 2), labels=c("Yes", "No"))) %>% 
  #選取個案編號與整理結果
  select(SEQN, mp) %>% 
  #將資料格式改為tibble
  tibble::as.tibble()

#與先前資料合併
data <- left_join(data, df_menopause, by="SEQN")

1-12. Data Adjustment

#將個案編號改為character格式
data <- data %>% mutate(SEQN=as.character(SEQN))
#將整理後資料改存至data_all
data_all <- data

data <- data %>% 
  #選取大於20歲的資料
  filter(age>=20) %>% 
  #新增一個column方便後續分析進行
  mutate(one=1)

1-13. Store NHANES Survey Design

nhanes<- svydesign(data=data, id=~psu, strata=~stra, weights=~wt_d1, nest=T)

1-14. Grouping

#找出加權後的quantile
q_wt <- svyquantile(~fi, nhanes, c(.25,.5,.75))
#輸出table
q_wt$fi %>% kbl(digits=2) %>% kable_classic_2() %>% kable_styling(full_width=F)

	quantile	ci.2.5	ci.97.5	se
0.25	10.75	10.75	11.00	0.06
0.5	12.04	12.00	12.25	0.06
0.75	13.75	13.50	14.00	0.12

#用加權後的斷食時間作為分組依據
data <- data %>% 
  mutate(fi_grp=factor(cut(data$fi, breaks=c(0, q_wt$fi[,1], 24), include.lowest=T), 
                          labels=c("[0, 10.75]", "(10.75, 12.04]", "(12.04, 13.75]", "(13.75, 24]")))
#更新問卷設計的資料
nhanes<- svydesign(data=data, id=~psu, strata=~stra, weights=~wt_d1, nest=T)

2. Sample Size

2-1. Flowchart

flowchart <- grViz("digraph flowchart1 {
  graph [overlap=true]
  
  node [shape=rectangle, fixedsize=true, width=5, height=1, fontsize=18]        
  s1 [label='2017-2018 NHANES sample\n(n=9254)']
  s2 [label='Have at least 1 day of dietary recall data\n(n=7591)']
  s3 [label='At least 20 years old\n(n=4740)']
  
  node [shape=rectangle, fixedsize=true, width=4, height=1, fontsize=18]
  s4 [label='Have MetS data\n(n=4420)']
  s5 [label='Have BMI data\n(n=4689)']
  s6 [label='Have body composition data\n(n=2096)']
  
  s1 -> s2 [label='Missing (n=1663)']
  s2 -> s3 [label='Under 20 years old (n=2851)']
  s3 -> s4 [label='Missing\n(n=320)']
  s3 -> s5 [label='Missing\n(n=51)']
  s3 -> s6 [label='Missing\n(n=2644)']
  
  }")
flowchart

2-2. Tables

MetS

#計算百分比和SE
met_l_per <- svymean(~mets_l, nhanes, na.rm=T) %>% as.data.frame()
met_l_per <- met_l_per*100
met_s_per <- svymean(~mets_s, nhanes, na.rm=T) %>% as.data.frame()
met_s_per <- met_s_per*100
#合併資料
mets_table <- rbind(cbind(met_l_per[1, ], met_l_per[2, ]), cbind(met_s_per[1, ], met_s_per[2, ]))
#重新命名行列
colnames(mets_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(mets_table) <- c("Not Strict (n=4420)", "Strict (n=2465)")
#輸出表格
mets_table %>% 
  kbl(digits=2, col.names=rep(c("Percentage (%)", "SE"), 2)) %>% 
  kable_classic_2() %>% 
  add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>% 
  kable_styling(full_width=F)

	Yes		No
	Percentage (%)	SE	Percentage (%)	SE
Not Strict (n=4420)	21.45	1.23	78.55	1.23
Strict (n=2465)	41.30	2.26	58.70	2.26

OWOB

#計算百分比和SE
owob_per <- svymean(~owob, nhanes, na.rm=T) %>% as.data.frame()
owob_per <- owob_per*100
#轉置
owob_table <- cbind(owob_per[1,], owob_per[2,])
#重新命名行列
colnames(owob_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(owob_table) <- c("OWOB (n=4689)")
#輸出表格
owob_table %>% 
  kbl(digits=2, col.names=rep(c("Percentage", "SE"), 2)) %>% 
  kable_classic_2() %>% 
  add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>% 
  kable_styling(full_width=F)

	Yes		No
	Percentage	SE	Percentage	SE
OWOB (n=4689)	74.02	1.57	25.98	1.57

Body Composition

#計算平均
fmi_table <- svymean(~fmi, nhanes, na.rm=T) %>% as.data.frame()
lmi_table <- svymean(~lmi, nhanes, na.rm=T) %>% as.data.frame()
#計算SD、合併資料
bc_table <- rbind(fmi_table[1], lmi_table[1]) %>% 
  cbind(sd=c(svysd(~fmi, nhanes, na.rm=T), svysd(~lmi, nhanes, na.rm=T)))
#改名稱
rownames(bc_table) <- c("Fat Mass Index (kg/m2)", "Lean Mass Index (kg/m2)")
#輸出表格
rbind(fmi_table[1], lmi_table[1]) %>% 
  cbind(sd=c(svysd(~fmi, nhanes, na.rm=T), svysd(~lmi, nhanes, na.rm=T))) %>% 
  tibble::as_tibble() %>% 
  mutate("var"=c("Fat Mass Index (kg/m2)", "Lean Mass Index (kg/m2)"), 
         "Unweighted n"=c(2096, 2096)) %>% select(3,4,1,2) %>% 
  kbl(digits=2) %>% kable_classic_2() %>% kable_styling(full_width=F)

var	Unweighted n	mean	sd
Fat Mass Index (kg/m2)	2096	9.96	4.37
Lean Mass Index (kg/m2)	2096	19.18	3.50

2-3. Histogram of Fasting Interval

        #畫圖資料、問卷設計、區間
svyhist(~fi, nhanes, breaks=seq(0, 24, 1), 
        #x軸、y軸範圍
        xlim=c(0, 25), ylim=c(0, .20), 
        #標題、x軸名稱
        main="Histogram of Fasting Interval", xlab="Fasting Interval (hr(s))")

2-4. Bar Charts & Box Plot

MetS

  #將wide data轉為long data
mets_table[c(1,3)] %>% melt() %>% cbind(grp=rep(c("Not Strict", "Strict"), 2), .) %>% 
  mutate(value=round(value, 2)) %>% 
  #畫圖的資料，依照"是"或"否"具MetS填滿
  ggplot(aes(x=grp, y=value, fill=variable)) + 
  #畫bar chart                #設定圖表主題
  geom_bar(stat="identity", position="dodge") + theme_classic() + 
  #命名y軸、圖表標題
  labs(x="", y="Percentage", title="MetS") + 
  #加上label：資料為value，調整位置、顏色
  geom_text(aes(label=value), position=position_dodge(width=.9), vjust=-.5) + 
  #調整圖例的位置、背景顏色、標題
  theme(legend.position=c(.8,.9), 
        legend.title=element_blank()) + ylim(c(0,85)) + 
  #設定圖表顏色
  scale_fill_grey()

BMI

#wide改long資料                    #小數點兩位
owob_table[c(1, 3)] %>% melt() %>% mutate(value=round(value, 2)) %>% 
  #畫圖資料
  ggplot(aes(x=variable, y=value)) + 
  #畫bar chart                #加上label
  geom_bar(stat="identity") + geom_text(aes(label=value), vjust=-.5) + 
  #設定主題         #y軸範圍
  theme_classic() + ylim(c(0,85)) + 
  #兩軸、圖表名稱
  labs(x="OWOB", y="Percentage", title="OWOB")

Body Composition

#設定1*2格式
par(mfrow = c(1, 2))
#輸出FMI box plot
svyboxplot(~fmi~1, nhanes, all.outliers=T, ylim=c(0, 35), xlab="Fat Mass Index (kg/m2)")
#輸出LMI box plot
svyboxplot(~lmi~1, nhanes, all.outliers=T, ylim=c(0, 35), xlab="Lean Mass Index (kg/m2)")

2-5. 1 or 2 day(s) Fasting Interval

2-5-1. Table

#算百分比和SE
day_table <- svyby(~fi_grp, ~days, nhanes, svymean, na.rm=T)
day_table_p <- day_table[2:5] * 100
day_table_se <- day_table[6:9] * 100
#儲存分組名稱
grp_names <- c("0 <= x <= 10.75", "10.75 < x <= 12.04",
               "12.04 < x <= 13.75", "13.75 < x <= 24")
#改行列名稱
colnames(day_table_p) <- grp_names
colnames(day_table_se) <- grp_names
#算total百分比和SE、轉置
day_table_t <- t(svyby(~days, ~one, nhanes, svymean, na.rm=T) * 100)
#合併資料
opts <- options(knitr.kable.NA="")
day_table <- rbind(day_table_p[1,], day_table_se[1,], 
                   day_table_p[2,], day_table_se[2,]) %>% 
  cbind("var"=c("1 day (%)", "1 day (SE)", "2 days (%)", "2 days (SE)"), 
        "Unweighted n"=c(605, NA, 4135, NA), 
        "total"=c(day_table_t[2], day_table_t[4], day_table_t[3], day_table_t[5]), .) %>% 
  tibble::as_tibble()
#輸出表格
day_table %>% kbl(digits=2) %>% kable_classic_2() %>% 
  row_spec(c(2, 4), color="grey")

var	Unweighted n	total	0 <= x <= 10.75	10.75 < x <= 12.04	12.04 < x <= 13.75	13.75 < x <= 24
1 day (%)	605	12.21	34.20	28.91	12.70	24.19
1 day (SE)		1.07	3.46	2.93	1.52	3.76
2 days (%)	4135	87.79	25.91	22.31	27.79	23.99
2 days (SE)		1.07	0.97	0.96	1.22	1.33

2-5-2. Bar Charts

天數

#選取畫圖用資料                 #小數點兩位
day_table[c(1, 3), c(1, 3)] %>% mutate(total=round(total, 2)) %>% 
  #畫圖資料                     #畫bar chart
  ggplot(aes(x=var, y=total)) + geom_bar(stat="identity") + 
  #加上label                               #設定主題
  geom_text(aes(label=total), vjust=-.5) + theme_classic() + 
  #y軸範圍         #兩軸名稱
  ylim(c(0, 90)) + labs(x="", y="Percentage")

天數X分組

#選取畫圖用資料                   #小數點兩位                                  #wide to long
day_table[c(1, 3), c(1, 4:7)] %>% mutate_if(is.double, list(~round(., 2))) %>% melt() %>% 
  #畫圖資料
  ggplot(aes(x=variable, y=value, fill=var)) + 
  #畫bar chart                                  #設定主題
  geom_bar(stat="identity", position="dodge") + theme_classic() + 
  #兩軸名稱                                          #顏色主題
  labs(x="Fasting Interval Group", y="Percentage") + scale_fill_grey() + 
  #加上label
  geom_text(aes(label=value), position=position_dodge(width=.9), vjust=-.5) + 
  #y軸範圍         #改圖例位置
  ylim(c(0, 40)) + theme(legend.position=c(.9, .9), legend.title=element_blank())

3. Table 1

3-1. Categorical Variables

p-values were derived from chi-square test.

3-1-1. data

Gender

#算分組的百分比和SE
gndr_grp_table <- svyby(~gndr, ~fi_grp, nhanes, svymean, na.rm=T)
gndr_grp_table <- t(gndr_grp_table[2:5]*100)
#算total的百分比和SE
gndr_total_table <- t(svyby(~gndr, ~one, nhanes, svymean, na.rm=T)*100)
#合併資料
gndr_grp_table <- 
  cbind("Total_p"=gndr_total_table[c(2,3)], "Total_se"=gndr_total_table[c(4,5)], 
        "G1_p"=gndr_grp_table[c(1,2), 1], "G1_se"=gndr_grp_table[c(3,4), 1], 
        "G2_p"=gndr_grp_table[c(1,2), 2], "G2_se"=gndr_grp_table[c(3,4), 2], 
        "G3_p"=gndr_grp_table[c(1,2), 3], "G3_se"=gndr_grp_table[c(3,4), 3], 
        "G4_p"=gndr_grp_table[c(1,2), 4], "G4_se"=gndr_grp_table[c(3,4), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=c("Male", "Female")) %>% select(11, 1:10)

Education Level

edu_grp_table <- svyby(~edu, ~fi_grp, nhanes, svymean, na.rm=T)
edu_grp_table <- t(edu_grp_table[2:11]*100)
edu_total_table <- t(svyby(~edu, ~one, nhanes, svymean, na.rm=T)*100)
edu_grp_table <- 
  cbind("Total_p"=edu_total_table[c(2:6)], "Total_se"=edu_total_table[c(7:11)], 
        "G1_p"=edu_grp_table[c(1:5), 1], "G1_se"=edu_grp_table[c(6:10), 1], 
        "G2_p"=edu_grp_table[c(1:5), 2], "G2_se"=edu_grp_table[c(6:10), 2], 
        "G3_p"=edu_grp_table[c(1:5), 3], "G3_se"=edu_grp_table[c(6:10), 3], 
        "G4_p"=edu_grp_table[c(1:5), 4], "G4_se"=edu_grp_table[c(6:10), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=data$edu %>% levels()) %>% select(11, 1:10)

Ethnicity

race_grp_table <- svyby(~race, ~fi_grp, nhanes, svymean, na.rm=T)
race_grp_table <- t(race_grp_table[2:13]*100)
race_total_table <- t(svyby(~race, ~one, nhanes, svymean, na.rm=T)*100)
race_grp_table <- 
  cbind("Total_p"=race_total_table[c(2:7)], "Total_se"=race_total_table[c(8:13)], 
        "G1_p"=race_grp_table[c(1:6), 1], "G1_se"=race_grp_table[c(7:12), 1], 
        "G2_p"=race_grp_table[c(1:6), 2], "G2_se"=race_grp_table[c(7:12), 2], 
        "G3_p"=race_grp_table[c(1:6), 3], "G3_se"=race_grp_table[c(7:12), 3], 
        "G4_p"=race_grp_table[c(1:6), 4], "G4_se"=race_grp_table[c(7:12), 4]) %>% 
  tibble::as_tibble() %>% 
  mutate(chr=data$race %>% levels()) %>% select(11, 1:10)

3-1-2. Export table

#合併資料
chr_smry <- rbind(gndr_grp_table, edu_grp_table, race_grp_table)
#卡方檢定
gndr_chi <- svychisq(~fi_grp+gndr, nhanes, na.rm=T)
edu_chi <- svychisq(~fi_grp+edu, nhanes, na.rm=T)
race_chi <- svychisq(~fi_grp+race, nhanes, na.rm=T)
#合併資料
chr_smry <- chr_smry %>%
  mutate("p-value"=c(scales::pvalue(gndr_chi$p.value), rep(NA, 1),
                     scales::pvalue(edu_chi$p.value), rep(NA, 4),
                     scales::pvalue(race_chi$p.value), rep(NA, 5)))
#設定輸出表格中NA顯示為空白
opts <- options(knitr.kable.NA="")
#輸出表格
chr_smry %>%
  kbl(digits=2,
      col.names=c("Variable", rep(c("Percentage (%)", "se"), 5), "p-value")) %>%
  column_spec(c(3, 5, 7, 9), color="gray") %>%
  add_header_above(header=c(" "=1, "Total"=2, "0 <= x <= 10.75"=2, "10.75 < x <= 12.04"=2,
                            "12.04 < x <= 13.75"=2, "13.75 < x <= 24"=2, " "=1)) %>%
  add_header_above(header=c(" "=3, "Fasting Interval (hr(s))"=8, " "=1)) %>%
  pack_rows(index=c("Gender (n=4740)"=2, "Education Level (n=4732)"=5, "Ethnicity (n=4740)"=6)) %>%
  kable_classic_2() %>% kable_styling(full_width=F)

			Fasting Interval (hr(s))
	Total		0 <= x <= 10.75		10.75 < x <= 12.04		12.04 < x <= 13.75		13.75 < x <= 24
Variable	Percentage (%)	se	Percentage (%)	se	Percentage (%)	se	Percentage (%)	se	Percentage (%)	se	p-value
Gender (n=4740)
Male	48.02	1.09	53.63	1.92	44.57	1.84	45.43	1.75	47.83	1.86	0.004
Female	51.98	1.09	46.37	1.92	55.43	1.84	54.57	1.75	52.17	1.86
Education Level (n=4732)
less than 9th grade	3.53	0.54	2.52	0.56	3.60	0.74	4.06	0.69	4.01	0.79	0.014
9-11th grade (includes 12th with no diploma)	7.19	0.54	6.07	0.61	7.23	1.42	6.13	0.76	9.55	1.27
high school graduate/GED or equivalent	28.14	1.47	25.88	2.32	24.63	2.52	27.51	2.46	34.74	1.89
some college or AA degree	30.92	1.24	33.58	2.05	29.84	2.90	29.78	1.80	30.23	2.23
college graduate or above	30.21	2.42	31.95	4.16	34.69	3.29	32.52	3.68	21.46	2.34
Ethnicity (n=4740)
Mexican American	8.93	1.62	6.64	1.22	8.74	1.68	8.78	1.81	11.82	2.51	<0.001
Other Hispanic	6.88	0.78	5.59	1.08	5.77	1.15	9.27	1.21	6.80	1.10
Non-Hispanic White	62.03	2.83	69.26	2.86	64.92	3.33	60.13	3.56	53.19	2.86
Non-Hispanic Black	11.59	1.60	9.22	1.40	10.34	1.36	10.33	1.79	16.80	2.26
Non-Hispanic Asian	5.89	0.89	5.44	1.24	6.25	1.04	5.84	0.88	6.10	0.89
Other	4.69	0.62	3.84	1.01	3.97	0.71	5.65	0.95	5.29	0.81

3-2. Continuous Variables

p-values were derived from one-way ANOVA.

3-2-1. data

Age

#算各組平均和SE
age_grp_table <- svyby(~age, ~fi_grp, nhanes, svymean, na.rm=T)
#wide to long
age_grp_table <- t(age_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
#重新命名
colnames(age_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se", 
                             "G3_mean", "G3_se", "G4_mean", "G4_se")
#改為tibble格式
age_grp_table <- tibble::as_tibble(age_grp_table)
#算total平均和SE
age_total_table <- svyby(~age, ~one, nhanes, svymean, na.rm=T)
#合併資料
age_grp_table <- age_grp_table %>% 
  mutate(total_mean=age_total_table[1,2], total_se=age_total_table[1,3]) %>% 
  select(9,10,1:8)
#ANOVA
aov_age <- aov(age~fi_grp, data=data, weights=wt_d1) %>% summary()

Physical Activity

pa_grp_table <- svyby(~pa, ~fi_grp, nhanes, svymean, na.rm=T)
pa_grp_table <- t(pa_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(pa_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se", 
                            "G3_mean", "G3_se", "G4_mean", "G4_se")
pa_grp_table <- tibble::as_tibble(pa_grp_table)
pa_total_table <- svyby(~pa, ~one, nhanes, svymean, na.rm=T)
pa_grp_table <- pa_grp_table %>% 
  mutate(total_mean=pa_total_table[1,2], total_se=pa_total_table[1,3]) %>% 
  select(9,10,1:8)
aov_pa <- aov(pa~fi_grp, data=data, weights=wt_d1) %>% summary()

Ratio to Family Income to Poverty

ses_grp_table <- svyby(~ses, ~fi_grp, nhanes, svymean, na.rm=T)
ses_grp_table <- t(ses_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(ses_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se", 
                             "G3_mean", "G3_se", "G4_mean", "G4_se")
ses_grp_table <- tibble::as_tibble(ses_grp_table)
ses_total_table <- svyby(~ses, ~one, nhanes, svymean, na.rm=T)
ses_grp_table <- ses_grp_table %>% 
  mutate(total_mean=ses_total_table[1,2], total_se=ses_total_table[1,3]) %>% 
  select(9,10,1:8)
aov_ses <- aov(ses~fi_grp, data=data, weights=wt_d1) %>% summary()

Energy Intake

avg_engy_grp_table <- svyby(~avg_engy, ~fi_grp, nhanes, svymean, na.rm=T)
avg_engy_grp_table <- t(avg_engy_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(avg_engy_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se", 
                                  "G3_mean", "G3_se", "G4_mean", "G4_se")
avg_engy_grp_table <- tibble::as_tibble(avg_engy_grp_table)
avg_engy_total_table <- svyby(~avg_engy, ~one, nhanes, svymean, na.rm=T)
avg_engy_grp_table <- avg_engy_grp_table %>% 
  mutate(total_mean=avg_engy_total_table[1,2], total_se=avg_engy_total_table[1,3]) %>% 
  select(9,10,1:8)
aov_avg_engy <- aov(avg_engy~fi_grp, data=data, weights=wt_d1) %>% summary()

3-2-2. Export table

#合併資料
con_smry <- rbind(age_grp_table, pa_grp_table, ses_grp_table, avg_engy_grp_table) %>% 
  mutate("p-value"=c(scales::pvalue(aov_age[[1]][["Pr(>F)"]][1]), 
                     scales::pvalue(aov_pa[[1]][["Pr(>F)"]][1]), 
                     scales::pvalue(aov_ses[[1]][["Pr(>F)"]][1]), 
                     scales::pvalue(aov_avg_engy[[1]][["Pr(>F)"]][1])), 
         var=c("Age (y)", "Physical Activity (MET score)", 
               "Ratio of Family Income to Poverty", "Energy Intake (kcal)"), 
         "Unweighted n"=c(4740, 3518, 4183, 4740), 
         "Missing n"=c(0, 1222, 557, 0)) %>% 
  select(12:14, 1:11)
#輸出表格
con_smry %>% 
  kbl(digits=2,
      col.names=c("Variable", "Unweighted n", "Missing", rep(c("mean", "se"), 5), "p-value")) %>%
  column_spec(c(5, 7, 9, 11), color="gray") %>%
  add_header_above(header=c(" "=3, "Total"=2, "0 <= x <= 10.75"=2, "10.75 < x <= 12.04"=2,
                            "12.04 < x <= 13.75"=2, "13.75 < x <= 24"=2, " "=1)) %>%
  add_header_above(header=c(" "=5, "Fasting Interval (hr(s))"=8, " "=1)) %>%
  kable_classic_2() %>% kable_styling(full_width=F)

					Fasting Interval (hr(s))
			Total		0 <= x <= 10.75		10.75 < x <= 12.04		12.04 < x <= 13.75		13.75 < x <= 24
Variable	Unweighted n	Missing	mean	se	mean	se	mean	se	mean	se	mean	se	p-value
Age (y)	4740	0	48.34	0.61	47.81	0.78	50.71	1.26	49.05	0.57	45.88	0.72	<0.001
Physical Activity (MET score)	3518	1222	1346.50	47.48	1537.84	82.04	1244.50	46.82	1240.55	76.11	1331.99	81.93	<0.001
Ratio of Family Income to Poverty	4183	557	3.06	0.06	3.14	0.10	3.13	0.11	3.09	0.09	2.87	0.06	<0.001
Energy Intake (kcal)	4740	0	2079.35	18.19	2412.04	33.89	2115.25	35.03	2011.80	32.45	1744.74	35.15	<0.001

NHANES data

1. Data Cleaning

1-1. Import Packages

1-2. Import Datasets

1-3. Fasting Interval Data

1-4. Metabolic Syndrome Data

Blood Pressure

Triglycerides

1-4-3. HDL-cholesterol

Glucose

Waist Circumference

MetS or not

1-5. BMI data

1-6. Body Composition Data

1-7. Physical Activity Data

1-8. Demographics Data

1-9. Intake Energy Data

1-10. On Special Diet or not

1-11. Postmenopausal or not

1-12. Data Adjustment

1-13. Store NHANES Survey Design

1-14. Grouping

2. Sample Size

2-1. Flowchart

2-2. Tables

MetS

OWOB

Body Composition

2-3. Histogram of Fasting Interval

2-4. Bar Charts & Box Plot

MetS

BMI

Body Composition

2-5. 1 or 2 day(s) Fasting Interval

2-5-1. Table

2-5-2. Bar Charts

天數

天數X分組

3. Table 1

3-1. Categorical Variables

3-1-1. data

Gender

Education Level

Ethnicity

3-1-2. Export table

3-2. Continuous Variables

3-2-1. data

Age

Physical Activity

Ratio to Family Income to Poverty

Energy Intake

3-2-2. Export table