library(plyr)
library(dplyr)
library(haven)
library(lubridate)
library(survey)
library(ggplot2)
library(hrbrthemes)
library(reshape2)
library(ggpubr)
library(GGally)
library(psych)
library(networkD3)
library(DiagrammeR)
library(foreign)
library(kableExtra)
library(jtools)
DEMO <- read.xport("nhanes/DEMO_J.XPT") # 人口學問卷
DR1 <- read.xport("nhanes/DR1IFF_J.XPT") # 第一天飲食回憶
DR2 <- read.xport("nhanes/DR2IFF_J.XPT") # 第二天飲食回憶
BPX <- read.xport("nhanes/BPX_J.XPT") # 血壓紀錄
BPQ <- read.xport("nhanes/BPQ_J.XPT") # 血壓問卷
TRIGLY <- read.xport("nhanes/TRIGLY_J.XPT") # 三酸甘油酯紀錄
HDL <- read.xport("nhanes/HDL_J.XPT") # 膽固醇紀錄
DIQ <- read.xport("nhanes/DIQ_J.XPT") # 糖尿病問卷
GLU <- read.xport("nhanes/GLU_J.XPT") # 血糖紀錄
DX <- read.xport("nhanes/DXX_J.XPT") # 身體組成紀錄
BMX <- read.xport("nhanes/BMX_J.XPT") # 身體測量記錄
PAQ <- read.xport("nhanes/PAQ_J.XPT") # 活動問卷
RHQ <- read.xport("nhanes/RHQ_J.XPT") # 生產健康問卷
DRQD1 <- read.xport("nhanes/DR1TOT_J.XPT") # 飲食問卷
#計算第一天斷食時長
df_D1FI <- DR1 %>%
#選取欲使用的columns
select(SEQN, DR1_020, DR1IKCAL) %>%
#大於5大卡的飲食紀錄才納入時間點的計算
filter(DR1IKCAL > 5) %>%
#將相同的編號(同一個案)聚集
group_by(SEQN) %>%
#計算斷食時間長度(24-(最晚時間-最早時間))
summarise(d1=24 - as.numeric(max(DR1_020) - min(DR1_020))/3600)
#計算第二天斷食時長
df_D2FI <- DR2 %>%
#選取欲使用的columns
select(SEQN, DR2_020, DR2IKCAL) %>%
#大於5大卡的飲食紀錄才納入時間點的計算
filter(DR2IKCAL > 5) %>%
#將相同編號聚集
group_by(SEQN) %>%
#計算斷食時間長度(24-(最晚時間-最早時間))
summarise(d2=24 - as.numeric(max(DR2_020) - min(DR2_020))/3600)
#合併斷食時長資料
df_fi <- left_join(df_D1FI, df_D2FI, by="SEQN") %>%
#計算"一天"或"兩天平均"的斷食時間長度
mutate(fi=ifelse(is.na(d2), d1, (d1+d2)/2),
#增加一個factor column紀錄資料為一天或兩天
days=factor(ifelse(is.na(d2), 1, 2),
labels=c("1 day", "2 days"))) %>%
#一個個案只留下一個時間長度(一天或兩天平均)
select(-d1, -d2)
#合併斷食時長與權重
data <- df_fi %>%
left_join(.,
DR1 %>% group_by(SEQN) %>%
summarise(wt_d1=max(WTDRD1)) %>%
select(SEQN, wt_d1), by="SEQN") %>%
select(1,4,2,3)
df_BP <-
#合併人口學與血壓資料
left_join(DEMO, BPX, by="SEQN") %>%
#再合併血壓相關問卷資料
left_join(., BPQ, by="SEQN") %>%
#選取欲使用columns
select(SEQN, starts_with("BPXSY"), starts_with("BPXDI"), BPQ050A) %>%
#計算收縮壓或舒張壓資料的遺漏值數量
mutate(n_sbp=rowSums(!is.na(select(., starts_with("BPXSY")))),
n_dbp=rowSums(!is.na(select(., starts_with("BPXDI"))))) %>%
#將舒張壓為0者改記為遺漏值(NA)
mutate_at(vars(starts_with("BPXDI")), list(~ na_if(., 0))) %>%
#平均有收縮壓或舒張壓記錄的資料
mutate(mean_sbp=rowMeans(select(., starts_with("BPXSY")), na.rm=T),
mean_dbp=rowMeans(select(., starts_with("BPXDI")), na.rm=T),
#將收縮壓壓≧130或舒張壓壓≧85或血壓問卷中有服用高血壓藥物者記為1
#將無遺漏收縮壓或舒張壓遺漏且不符合上述條件者記為0
#其餘皆記為遺漏值
htn=case_when(mean_sbp>=130 | mean_dbp>=85 | BPQ050A==1 ~ 1,
n_sbp>0 & n_dbp>0 ~ 0)) %>%
#選取個案編號與判斷結果
select(SEQN, htn)
df_TRIGLY <- left_join(DEMO, TRIGLY, by="SEQN") %>%
#選取欲使用columns
select(SEQN, LBDTRSI) %>%
#將資料數值≧1.69記為1,否則記為0
mutate(trigly=ifelse(LBDTRSI>=1.69, 1, 0)) %>%
#選取個案編號及判斷結果
select(SEQN, trigly)
df_HDL <- left_join(DEMO, HDL, by="SEQN") %>%
select(SEQN, RIAGENDR, LBDHDDSI) %>%
mutate(hdl=ifelse(RIAGENDR==1 & LBDHDDSI<1.04 | RIAGENDR==2 & LBDHDDSI<1.29, 1, 0)) %>%
select(SEQN, hdl)
df_GLU <-
#合併人口學和血糖資料
left_join(DEMO, GLU, by="SEQN") %>%
#再合併糖尿病問卷資料
left_join(., DIQ, by="SEQN") %>%
#選取欲使用columns
select(SEQN, LBDGLUSI, DIQ050) %>%
#若數值≧6.1或問卷中有服用糖尿病藥物記為1,否則記為0
mutate(glu=ifelse(LBDGLUSI>=6.1 | DIQ050==1, 1, 0)) %>%
#選取個案編號與判斷結果
select(SEQN, glu)
df_WAIST <-
#合併人口學和身體測量資料
left_join(DEMO, BMX, by="SEQN") %>%
#選擇欲使用columns
select(SEQN, RIAGENDR, BMXWAIST) %>%
#符合上述條件記為1,否則記為0
mutate(waist=ifelse(RIAGENDR==1 & BMXWAIST>=102 | RIAGENDR==2 & BMXWAIST>=88, 1, 0)) %>%
#選取個案編號與判斷結果
select(SEQN, waist)
df_mets <-
#合併人口學和五個判斷標準資料
join_all(list(DEMO, df_BP, df_TRIGLY, df_HDL, df_GLU, df_WAIST),
by="SEQN", type="left") %>% select(1, 47:51) %>%
#計算判斷標準資料的遺漏值數目
mutate(n_na=rowSums(is.na(.)) %>% as.integer(),
#計算符合標準(1)的數量
n_con=rowSums(select(., 2:6), na.rm=T) %>% as.integer()) %>%
#選取遺漏值小於3的個案
filter(n_na < 3) %>%
#若符合標準的數量大於3記為1,否則為2,
#同時將資料改為factor格式
mutate(mets_l=factor(ifelse(n_con>=3, 1, 2), labels=c("Yes", "No")),
#若符合標準的數量大於3記為1,
#數量小於3且判斷資料無遺漏值記為2,
#同時將資料改為factor格式
mets_s=factor(case_when(n_con>=3 ~ 1,
n_con<3 & n_na==0 ~ 2), labels=c("Yes", "No"))) %>%
#選擇個案編號、遺漏值數量與判斷結果
select(SEQN, n_na, mets_l, mets_s) %>%
#將資料格式改為tibble
tibble::as_tibble()
#與時間長度資料合併
data <- left_join(data, df_mets, by="SEQN")
df_obesity <-
#合併人口學和身體測量資料
left_join(DEMO, BMX, by="SEQN") %>%
#數值≧25記為1,否則記為2,同時將資料改為factor格式
mutate(owob=factor(ifelse(BMXBMI>=25, 1, 2), labels=c("Yes", "No"))) %>%
#選擇個案編號與判斷結果
select(SEQN, owob) %>%
#將資料格式改為tibble
tibble::as_tibble()
#與先前的資料合併
data <- left_join(data, df_obesity, by="SEQN")
df_bodycom <-
#合併人口學、身體組成和身體測量資料
join_all(list(DEMO, DX, BMX), by="SEQN", type="left") %>%
#選擇欲使用columns
select(SEQN, DXDTOFAT, DXDTOTOT, BMXHT) %>%
#將單位改為kg
mutate(total_fat=DXDTOFAT/1000,
lean_mass=(DXDTOTOT - DXDTOFAT)/1000,
#計算fat mass index
fmi=total_fat/((BMXHT/100)^2),
#計算lean mass index
lmi=lean_mass/((BMXHT/100)^2)) %>%
#選擇個案編號、FMI和LMI
select(SEQN, fmi, lmi) %>%
#將資料格式改為tibble
tibble::as_tibble()
#與先前資料合併
data <- left_join(data, df_bodycom, by="SEQN")
df_pa <-
#合併人口學和身體活動問卷資料
left_join(DEMO, PAQ, by="SEQN") %>%
#選擇欲使用columns
select(SEQN, PAD615, PAD630, PAD645, PAD660, PAD675) %>%
#將紀錄為不知道者(9999)改為遺漏值(NA)
mutate_at(vars(starts_with("PAD")), list(~na_if(., 9999))) %>%
#依照代謝當量表計算各類分數
mutate(PAD615=PAD615*8, PAD630=PAD630*4, PAD645=PAD645*4,
PAD660=PAD660*8, PAD675=PAD675*4) %>%
#計算總代謝當量
mutate(pa=rowSums(select(., starts_with("PAD")), na.rm=T)) %>%
#將代謝當量為0者改記為遺漏值(NA)
mutate_at(vars(pa), list(~na_if(., 0))) %>%
#選取個案編號與代謝當量分數
select(SEQN, pa) %>%
#將資料格式改為tibble
tibble::as.tibble()
#與先前資料合併
data <- left_join(data, df_pa, by="SEQN")
df_demo <- DEMO %>%
#選取欲使用columns
select(SEQN, RIDAGEYR, RIAGENDR, DMDEDUC2, RIDRETH3, #年齡、性別、教育程度、種族
SDMVPSU, SDMVSTRA, INDFMPIR) %>% #問卷分析用參數、社經地位
#改column名稱
mutate(age=RIDAGEYR,
#改column名稱,同使改為factor格式
gndr=factor(RIAGENDR, labels=c("Male", "Female")),
#將教育程度資料中紀錄為拒絕回答(7)與不知道(9)者改記為遺漏值(NA)
DMDEDUC2=ifelse(DMDEDUC2==7 | DMDEDUC2==9, NA, DMDEDUC2),
#將資料改為factor格式
edu=factor(DMDEDUC2,
labels=c("less than 9th grade",
"9-11th grade (includes 12th with no diploma)",
"high school graduate/GED or equivalent",
"some college or AA degree",
"college graduate or above")),
race=factor(RIDRETH3,
labels=c("Mexican American", "Other Hispanic",
"Non-Hispanic White", "Non-Hispanic Black",
"Non-Hispanic Asian", "Other")),
#改column名稱
ses=INDFMPIR, psu=SDMVPSU, stra=SDMVSTRA) %>%
#選取調整後的columns
select(1, 9:15) %>%
#將資料格式改為tibble
tibble::as.tibble()
#與先前資料合併
data <- left_join(data, df_demo, by="SEQN")
#計算第一天攝入熱量
df_D1EI <- DR1 %>%
#選取欲使用的columns
select(SEQN, DR1IKCAL) %>%
#將相同編號聚集
group_by(SEQN) %>%
#計算第一天所攝取總熱量
summarise(d1_totalEnergyIntake=sum(DR1IKCAL, na.rm=T)) %>%
#若攝入熱量為0則改記成為遺漏值(NA)
mutate(d1_totalEnergyIntake=na_if(d1_totalEnergyIntake,0)) %>%
#將資料格式改為tibble
tibble::as.tibble()
#計算第二天攝入熱量
df_D2EI <- DR2 %>%
#選取欲使用的columns
select(SEQN, DR2IKCAL) %>%
#將相同編號聚集
group_by(SEQN) %>%
#計算第二天所攝取總熱量
summarise(d2_totalEnergyIntake=sum(DR2IKCAL, na.rm=T)) %>%
#若攝入熱量為0則改記成遺漏值(NA)
mutate(d2_totalEnergyIntake=na_if(d2_totalEnergyIntake,0)) %>%
#將資料格式改為tibble
tibble::as.tibble()
df_energy <-
#合併兩天資料
left_join(df_D1EI, df_D2EI, by="SEQN") %>%
#計算熱量一天或是兩天平均
mutate(avg_engy=ifelse(is.na(d2_totalEnergyIntake), d1_totalEnergyIntake,
(d2_totalEnergyIntake+d1_totalEnergyIntake)/2)) %>%
#選取個案編號與計算結果
select(SEQN, avg_engy) %>%
#將資料格式改為tibble
tibble::as.tibble()
#與先前資料合併
data <- left_join(data, df_energy, by="SEQN")
special_diet <- DRQD1 %>%
#選取欲使用columns
select(SEQN, DRQSDIET) %>%
#將遺漏值或紀錄為不知道(9)者都記為遺漏值
mutate(spcl_dt=ifelse(is.na(DRQSDIET) | DRQSDIET==9, NA, DRQSDIET),
#改資料為factor格式
spcl_dt=factor(spcl_dt, labels=c("Yes", "No"))) %>%
#選取個案編號與整理結果
select(SEQN, spcl_dt) %>%
#將資料格式改為tibble
tibble::as.tibble()
#與先前資料合併
data <- left_join(data, special_diet, by="SEQN")
df_menopause <- RHQ %>%
#將回答Menopause/Change of life(7)者記為1,其餘為2,並改為factor格式
mutate(mp=factor(ifelse(RHD043==7, 1, 2), labels=c("Yes", "No"))) %>%
#選取個案編號與整理結果
select(SEQN, mp) %>%
#將資料格式改為tibble
tibble::as.tibble()
#與先前資料合併
data <- left_join(data, df_menopause, by="SEQN")
#將個案編號改為character格式
data <- data %>% mutate(SEQN=as.character(SEQN))
#將整理後資料改存至data_all
data_all <- data
data <- data %>%
#選取大於20歲的資料
filter(age>=20) %>%
#新增一個column方便後續分析進行
mutate(one=1)
nhanes<- svydesign(data=data, id=~psu, strata=~stra, weights=~wt_d1, nest=T)
#找出加權後的quantile
q_wt <- svyquantile(~fi, nhanes, c(.25,.5,.75))
#輸出table
q_wt$fi %>% kbl(digits=2) %>% kable_classic_2() %>% kable_styling(full_width=F)
| quantile | ci.2.5 | ci.97.5 | se | |
|---|---|---|---|---|
| 0.25 | 10.75 | 10.75 | 11.00 | 0.06 |
| 0.5 | 12.04 | 12.00 | 12.25 | 0.06 |
| 0.75 | 13.75 | 13.50 | 14.00 | 0.12 |
#用加權後的斷食時間作為分組依據
data <- data %>%
mutate(fi_grp=factor(cut(data$fi, breaks=c(0, q_wt$fi[,1], 24), include.lowest=T),
labels=c("[0, 10.75]", "(10.75, 12.04]", "(12.04, 13.75]", "(13.75, 24]")))
#更新問卷設計的資料
nhanes<- svydesign(data=data, id=~psu, strata=~stra, weights=~wt_d1, nest=T)
flowchart <- grViz("digraph flowchart1 {
graph [overlap=true]
node [shape=rectangle, fixedsize=true, width=5, height=1, fontsize=18]
s1 [label='2017-2018 NHANES sample\n(n=9254)']
s2 [label='Have at least 1 day of dietary recall data\n(n=7591)']
s3 [label='At least 20 years old\n(n=4740)']
node [shape=rectangle, fixedsize=true, width=4, height=1, fontsize=18]
s4 [label='Have MetS data\n(n=4420)']
s5 [label='Have BMI data\n(n=4689)']
s6 [label='Have body composition data\n(n=2096)']
s1 -> s2 [label='Missing (n=1663)']
s2 -> s3 [label='Under 20 years old (n=2851)']
s3 -> s4 [label='Missing\n(n=320)']
s3 -> s5 [label='Missing\n(n=51)']
s3 -> s6 [label='Missing\n(n=2644)']
}")
flowchart
#計算百分比和SE
met_l_per <- svymean(~mets_l, nhanes, na.rm=T) %>% as.data.frame()
met_l_per <- met_l_per*100
met_s_per <- svymean(~mets_s, nhanes, na.rm=T) %>% as.data.frame()
met_s_per <- met_s_per*100
#合併資料
mets_table <- rbind(cbind(met_l_per[1, ], met_l_per[2, ]), cbind(met_s_per[1, ], met_s_per[2, ]))
#重新命名行列
colnames(mets_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(mets_table) <- c("Not Strict (n=4420)", "Strict (n=2465)")
#輸出表格
mets_table %>%
kbl(digits=2, col.names=rep(c("Percentage (%)", "SE"), 2)) %>%
kable_classic_2() %>%
add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>%
kable_styling(full_width=F)
|
Yes
|
No
|
|||
|---|---|---|---|---|
| Percentage (%) | SE | Percentage (%) | SE | |
| Not Strict (n=4420) | 21.45 | 1.23 | 78.55 | 1.23 |
| Strict (n=2465) | 41.30 | 2.26 | 58.70 | 2.26 |
#計算百分比和SE
owob_per <- svymean(~owob, nhanes, na.rm=T) %>% as.data.frame()
owob_per <- owob_per*100
#轉置
owob_table <- cbind(owob_per[1,], owob_per[2,])
#重新命名行列
colnames(owob_table) <- c("Yes(%)", "Yes(SE)", "No(%)", "No(SE)")
rownames(owob_table) <- c("OWOB (n=4689)")
#輸出表格
owob_table %>%
kbl(digits=2, col.names=rep(c("Percentage", "SE"), 2)) %>%
kable_classic_2() %>%
add_header_above(header=c(" "=1, "Yes"=2, "No"=2)) %>%
kable_styling(full_width=F)
|
Yes
|
No
|
|||
|---|---|---|---|---|
| Percentage | SE | Percentage | SE | |
| OWOB (n=4689) | 74.02 | 1.57 | 25.98 | 1.57 |
#計算平均
fmi_table <- svymean(~fmi, nhanes, na.rm=T) %>% as.data.frame()
lmi_table <- svymean(~lmi, nhanes, na.rm=T) %>% as.data.frame()
#計算SD、合併資料
bc_table <- rbind(fmi_table[1], lmi_table[1]) %>%
cbind(sd=c(svysd(~fmi, nhanes, na.rm=T), svysd(~lmi, nhanes, na.rm=T)))
#改名稱
rownames(bc_table) <- c("Fat Mass Index (kg/m2)", "Lean Mass Index (kg/m2)")
#輸出表格
rbind(fmi_table[1], lmi_table[1]) %>%
cbind(sd=c(svysd(~fmi, nhanes, na.rm=T), svysd(~lmi, nhanes, na.rm=T))) %>%
tibble::as_tibble() %>%
mutate("var"=c("Fat Mass Index (kg/m2)", "Lean Mass Index (kg/m2)"),
"Unweighted n"=c(2096, 2096)) %>% select(3,4,1,2) %>%
kbl(digits=2) %>% kable_classic_2() %>% kable_styling(full_width=F)
| var | Unweighted n | mean | sd |
|---|---|---|---|
| Fat Mass Index (kg/m2) | 2096 | 9.96 | 4.37 |
| Lean Mass Index (kg/m2) | 2096 | 19.18 | 3.50 |
#畫圖資料、問卷設計、區間
svyhist(~fi, nhanes, breaks=seq(0, 24, 1),
#x軸、y軸範圍
xlim=c(0, 25), ylim=c(0, .20),
#標題、x軸名稱
main="Histogram of Fasting Interval", xlab="Fasting Interval (hr(s))")
#將wide data轉為long data
mets_table[c(1,3)] %>% melt() %>% cbind(grp=rep(c("Not Strict", "Strict"), 2), .) %>%
mutate(value=round(value, 2)) %>%
#畫圖的資料,依照"是"或"否"具MetS填滿
ggplot(aes(x=grp, y=value, fill=variable)) +
#畫bar chart #設定圖表主題
geom_bar(stat="identity", position="dodge") + theme_classic() +
#命名y軸、圖表標題
labs(x="", y="Percentage", title="MetS") +
#加上label:資料為value,調整位置、顏色
geom_text(aes(label=value), position=position_dodge(width=.9), vjust=-.5) +
#調整圖例的位置、背景顏色、標題
theme(legend.position=c(.8,.9),
legend.title=element_blank()) + ylim(c(0,85)) +
#設定圖表顏色
scale_fill_grey()
#wide改long資料 #小數點兩位
owob_table[c(1, 3)] %>% melt() %>% mutate(value=round(value, 2)) %>%
#畫圖資料
ggplot(aes(x=variable, y=value)) +
#畫bar chart #加上label
geom_bar(stat="identity") + geom_text(aes(label=value), vjust=-.5) +
#設定主題 #y軸範圍
theme_classic() + ylim(c(0,85)) +
#兩軸、圖表名稱
labs(x="OWOB", y="Percentage", title="OWOB")
#設定1*2格式
par(mfrow = c(1, 2))
#輸出FMI box plot
svyboxplot(~fmi~1, nhanes, all.outliers=T, ylim=c(0, 35), xlab="Fat Mass Index (kg/m2)")
#輸出LMI box plot
svyboxplot(~lmi~1, nhanes, all.outliers=T, ylim=c(0, 35), xlab="Lean Mass Index (kg/m2)")
#算百分比和SE
day_table <- svyby(~fi_grp, ~days, nhanes, svymean, na.rm=T)
day_table_p <- day_table[2:5] * 100
day_table_se <- day_table[6:9] * 100
#儲存分組名稱
grp_names <- c("0 <= x <= 10.75", "10.75 < x <= 12.04",
"12.04 < x <= 13.75", "13.75 < x <= 24")
#改行列名稱
colnames(day_table_p) <- grp_names
colnames(day_table_se) <- grp_names
#算total百分比和SE、轉置
day_table_t <- t(svyby(~days, ~one, nhanes, svymean, na.rm=T) * 100)
#合併資料
opts <- options(knitr.kable.NA="")
day_table <- rbind(day_table_p[1,], day_table_se[1,],
day_table_p[2,], day_table_se[2,]) %>%
cbind("var"=c("1 day (%)", "1 day (SE)", "2 days (%)", "2 days (SE)"),
"Unweighted n"=c(605, NA, 4135, NA),
"total"=c(day_table_t[2], day_table_t[4], day_table_t[3], day_table_t[5]), .) %>%
tibble::as_tibble()
#輸出表格
day_table %>% kbl(digits=2) %>% kable_classic_2() %>%
row_spec(c(2, 4), color="grey")
| var | Unweighted n | total | 0 <= x <= 10.75 | 10.75 < x <= 12.04 | 12.04 < x <= 13.75 | 13.75 < x <= 24 |
|---|---|---|---|---|---|---|
| 1 day (%) | 605 | 12.21 | 34.20 | 28.91 | 12.70 | 24.19 |
| 1 day (SE) | 1.07 | 3.46 | 2.93 | 1.52 | 3.76 | |
| 2 days (%) | 4135 | 87.79 | 25.91 | 22.31 | 27.79 | 23.99 |
| 2 days (SE) | 1.07 | 0.97 | 0.96 | 1.22 | 1.33 |
#選取畫圖用資料 #小數點兩位
day_table[c(1, 3), c(1, 3)] %>% mutate(total=round(total, 2)) %>%
#畫圖資料 #畫bar chart
ggplot(aes(x=var, y=total)) + geom_bar(stat="identity") +
#加上label #設定主題
geom_text(aes(label=total), vjust=-.5) + theme_classic() +
#y軸範圍 #兩軸名稱
ylim(c(0, 90)) + labs(x="", y="Percentage")
#選取畫圖用資料 #小數點兩位 #wide to long
day_table[c(1, 3), c(1, 4:7)] %>% mutate_if(is.double, list(~round(., 2))) %>% melt() %>%
#畫圖資料
ggplot(aes(x=variable, y=value, fill=var)) +
#畫bar chart #設定主題
geom_bar(stat="identity", position="dodge") + theme_classic() +
#兩軸名稱 #顏色主題
labs(x="Fasting Interval Group", y="Percentage") + scale_fill_grey() +
#加上label
geom_text(aes(label=value), position=position_dodge(width=.9), vjust=-.5) +
#y軸範圍 #改圖例位置
ylim(c(0, 40)) + theme(legend.position=c(.9, .9), legend.title=element_blank())
#算分組的百分比和SE
gndr_grp_table <- svyby(~gndr, ~fi_grp, nhanes, svymean, na.rm=T)
gndr_grp_table <- t(gndr_grp_table[2:5]*100)
#算total的百分比和SE
gndr_total_table <- t(svyby(~gndr, ~one, nhanes, svymean, na.rm=T)*100)
#合併資料
gndr_grp_table <-
cbind("Total_p"=gndr_total_table[c(2,3)], "Total_se"=gndr_total_table[c(4,5)],
"G1_p"=gndr_grp_table[c(1,2), 1], "G1_se"=gndr_grp_table[c(3,4), 1],
"G2_p"=gndr_grp_table[c(1,2), 2], "G2_se"=gndr_grp_table[c(3,4), 2],
"G3_p"=gndr_grp_table[c(1,2), 3], "G3_se"=gndr_grp_table[c(3,4), 3],
"G4_p"=gndr_grp_table[c(1,2), 4], "G4_se"=gndr_grp_table[c(3,4), 4]) %>%
tibble::as_tibble() %>%
mutate(chr=c("Male", "Female")) %>% select(11, 1:10)
edu_grp_table <- svyby(~edu, ~fi_grp, nhanes, svymean, na.rm=T)
edu_grp_table <- t(edu_grp_table[2:11]*100)
edu_total_table <- t(svyby(~edu, ~one, nhanes, svymean, na.rm=T)*100)
edu_grp_table <-
cbind("Total_p"=edu_total_table[c(2:6)], "Total_se"=edu_total_table[c(7:11)],
"G1_p"=edu_grp_table[c(1:5), 1], "G1_se"=edu_grp_table[c(6:10), 1],
"G2_p"=edu_grp_table[c(1:5), 2], "G2_se"=edu_grp_table[c(6:10), 2],
"G3_p"=edu_grp_table[c(1:5), 3], "G3_se"=edu_grp_table[c(6:10), 3],
"G4_p"=edu_grp_table[c(1:5), 4], "G4_se"=edu_grp_table[c(6:10), 4]) %>%
tibble::as_tibble() %>%
mutate(chr=data$edu %>% levels()) %>% select(11, 1:10)
race_grp_table <- svyby(~race, ~fi_grp, nhanes, svymean, na.rm=T)
race_grp_table <- t(race_grp_table[2:13]*100)
race_total_table <- t(svyby(~race, ~one, nhanes, svymean, na.rm=T)*100)
race_grp_table <-
cbind("Total_p"=race_total_table[c(2:7)], "Total_se"=race_total_table[c(8:13)],
"G1_p"=race_grp_table[c(1:6), 1], "G1_se"=race_grp_table[c(7:12), 1],
"G2_p"=race_grp_table[c(1:6), 2], "G2_se"=race_grp_table[c(7:12), 2],
"G3_p"=race_grp_table[c(1:6), 3], "G3_se"=race_grp_table[c(7:12), 3],
"G4_p"=race_grp_table[c(1:6), 4], "G4_se"=race_grp_table[c(7:12), 4]) %>%
tibble::as_tibble() %>%
mutate(chr=data$race %>% levels()) %>% select(11, 1:10)
#合併資料
chr_smry <- rbind(gndr_grp_table, edu_grp_table, race_grp_table)
#卡方檢定
gndr_chi <- svychisq(~fi_grp+gndr, nhanes, na.rm=T)
edu_chi <- svychisq(~fi_grp+edu, nhanes, na.rm=T)
race_chi <- svychisq(~fi_grp+race, nhanes, na.rm=T)
#合併資料
chr_smry <- chr_smry %>%
mutate("p-value"=c(scales::pvalue(gndr_chi$p.value), rep(NA, 1),
scales::pvalue(edu_chi$p.value), rep(NA, 4),
scales::pvalue(race_chi$p.value), rep(NA, 5)))
#設定輸出表格中NA顯示為空白
opts <- options(knitr.kable.NA="")
#輸出表格
chr_smry %>%
kbl(digits=2,
col.names=c("Variable", rep(c("Percentage (%)", "se"), 5), "p-value")) %>%
column_spec(c(3, 5, 7, 9), color="gray") %>%
add_header_above(header=c(" "=1, "Total"=2, "0 <= x <= 10.75"=2, "10.75 < x <= 12.04"=2,
"12.04 < x <= 13.75"=2, "13.75 < x <= 24"=2, " "=1)) %>%
add_header_above(header=c(" "=3, "Fasting Interval (hr(s))"=8, " "=1)) %>%
pack_rows(index=c("Gender (n=4740)"=2, "Education Level (n=4732)"=5, "Ethnicity (n=4740)"=6)) %>%
kable_classic_2() %>% kable_styling(full_width=F)
| Variable | Percentage (%) | se | Percentage (%) | se | Percentage (%) | se | Percentage (%) | se | Percentage (%) | se | p-value |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Gender (n=4740) | |||||||||||
| Male | 48.02 | 1.09 | 53.63 | 1.92 | 44.57 | 1.84 | 45.43 | 1.75 | 47.83 | 1.86 | 0.004 |
| Female | 51.98 | 1.09 | 46.37 | 1.92 | 55.43 | 1.84 | 54.57 | 1.75 | 52.17 | 1.86 | |
| Education Level (n=4732) | |||||||||||
| less than 9th grade | 3.53 | 0.54 | 2.52 | 0.56 | 3.60 | 0.74 | 4.06 | 0.69 | 4.01 | 0.79 | 0.014 |
| 9-11th grade (includes 12th with no diploma) | 7.19 | 0.54 | 6.07 | 0.61 | 7.23 | 1.42 | 6.13 | 0.76 | 9.55 | 1.27 | |
| high school graduate/GED or equivalent | 28.14 | 1.47 | 25.88 | 2.32 | 24.63 | 2.52 | 27.51 | 2.46 | 34.74 | 1.89 | |
| some college or AA degree | 30.92 | 1.24 | 33.58 | 2.05 | 29.84 | 2.90 | 29.78 | 1.80 | 30.23 | 2.23 | |
| college graduate or above | 30.21 | 2.42 | 31.95 | 4.16 | 34.69 | 3.29 | 32.52 | 3.68 | 21.46 | 2.34 | |
| Ethnicity (n=4740) | |||||||||||
| Mexican American | 8.93 | 1.62 | 6.64 | 1.22 | 8.74 | 1.68 | 8.78 | 1.81 | 11.82 | 2.51 | <0.001 |
| Other Hispanic | 6.88 | 0.78 | 5.59 | 1.08 | 5.77 | 1.15 | 9.27 | 1.21 | 6.80 | 1.10 | |
| Non-Hispanic White | 62.03 | 2.83 | 69.26 | 2.86 | 64.92 | 3.33 | 60.13 | 3.56 | 53.19 | 2.86 | |
| Non-Hispanic Black | 11.59 | 1.60 | 9.22 | 1.40 | 10.34 | 1.36 | 10.33 | 1.79 | 16.80 | 2.26 | |
| Non-Hispanic Asian | 5.89 | 0.89 | 5.44 | 1.24 | 6.25 | 1.04 | 5.84 | 0.88 | 6.10 | 0.89 | |
| Other | 4.69 | 0.62 | 3.84 | 1.01 | 3.97 | 0.71 | 5.65 | 0.95 | 5.29 | 0.81 | |
#算各組平均和SE
age_grp_table <- svyby(~age, ~fi_grp, nhanes, svymean, na.rm=T)
#wide to long
age_grp_table <- t(age_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
#重新命名
colnames(age_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se",
"G3_mean", "G3_se", "G4_mean", "G4_se")
#改為tibble格式
age_grp_table <- tibble::as_tibble(age_grp_table)
#算total平均和SE
age_total_table <- svyby(~age, ~one, nhanes, svymean, na.rm=T)
#合併資料
age_grp_table <- age_grp_table %>%
mutate(total_mean=age_total_table[1,2], total_se=age_total_table[1,3]) %>%
select(9,10,1:8)
#ANOVA
aov_age <- aov(age~fi_grp, data=data, weights=wt_d1) %>% summary()
pa_grp_table <- svyby(~pa, ~fi_grp, nhanes, svymean, na.rm=T)
pa_grp_table <- t(pa_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(pa_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se",
"G3_mean", "G3_se", "G4_mean", "G4_se")
pa_grp_table <- tibble::as_tibble(pa_grp_table)
pa_total_table <- svyby(~pa, ~one, nhanes, svymean, na.rm=T)
pa_grp_table <- pa_grp_table %>%
mutate(total_mean=pa_total_table[1,2], total_se=pa_total_table[1,3]) %>%
select(9,10,1:8)
aov_pa <- aov(pa~fi_grp, data=data, weights=wt_d1) %>% summary()
ses_grp_table <- svyby(~ses, ~fi_grp, nhanes, svymean, na.rm=T)
ses_grp_table <- t(ses_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(ses_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se",
"G3_mean", "G3_se", "G4_mean", "G4_se")
ses_grp_table <- tibble::as_tibble(ses_grp_table)
ses_total_table <- svyby(~ses, ~one, nhanes, svymean, na.rm=T)
ses_grp_table <- ses_grp_table %>%
mutate(total_mean=ses_total_table[1,2], total_se=ses_total_table[1,3]) %>%
select(9,10,1:8)
aov_ses <- aov(ses~fi_grp, data=data, weights=wt_d1) %>% summary()
avg_engy_grp_table <- svyby(~avg_engy, ~fi_grp, nhanes, svymean, na.rm=T)
avg_engy_grp_table <- t(avg_engy_grp_table[c(2,3)]) %>% melt() %>% select(3) %>% t()
colnames(avg_engy_grp_table) <- c("G1_mean", "G1_se", "G2_mean", "G2_se",
"G3_mean", "G3_se", "G4_mean", "G4_se")
avg_engy_grp_table <- tibble::as_tibble(avg_engy_grp_table)
avg_engy_total_table <- svyby(~avg_engy, ~one, nhanes, svymean, na.rm=T)
avg_engy_grp_table <- avg_engy_grp_table %>%
mutate(total_mean=avg_engy_total_table[1,2], total_se=avg_engy_total_table[1,3]) %>%
select(9,10,1:8)
aov_avg_engy <- aov(avg_engy~fi_grp, data=data, weights=wt_d1) %>% summary()
#合併資料
con_smry <- rbind(age_grp_table, pa_grp_table, ses_grp_table, avg_engy_grp_table) %>%
mutate("p-value"=c(scales::pvalue(aov_age[[1]][["Pr(>F)"]][1]),
scales::pvalue(aov_pa[[1]][["Pr(>F)"]][1]),
scales::pvalue(aov_ses[[1]][["Pr(>F)"]][1]),
scales::pvalue(aov_avg_engy[[1]][["Pr(>F)"]][1])),
var=c("Age (y)", "Physical Activity (MET score)",
"Ratio of Family Income to Poverty", "Energy Intake (kcal)"),
"Unweighted n"=c(4740, 3518, 4183, 4740),
"Missing n"=c(0, 1222, 557, 0)) %>%
select(12:14, 1:11)
#輸出表格
con_smry %>%
kbl(digits=2,
col.names=c("Variable", "Unweighted n", "Missing", rep(c("mean", "se"), 5), "p-value")) %>%
column_spec(c(5, 7, 9, 11), color="gray") %>%
add_header_above(header=c(" "=3, "Total"=2, "0 <= x <= 10.75"=2, "10.75 < x <= 12.04"=2,
"12.04 < x <= 13.75"=2, "13.75 < x <= 24"=2, " "=1)) %>%
add_header_above(header=c(" "=5, "Fasting Interval (hr(s))"=8, " "=1)) %>%
kable_classic_2() %>% kable_styling(full_width=F)
| Variable | Unweighted n | Missing | mean | se | mean | se | mean | se | mean | se | mean | se | p-value |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age (y) | 4740 | 0 | 48.34 | 0.61 | 47.81 | 0.78 | 50.71 | 1.26 | 49.05 | 0.57 | 45.88 | 0.72 | <0.001 |
| Physical Activity (MET score) | 3518 | 1222 | 1346.50 | 47.48 | 1537.84 | 82.04 | 1244.50 | 46.82 | 1240.55 | 76.11 | 1331.99 | 81.93 | <0.001 |
| Ratio of Family Income to Poverty | 4183 | 557 | 3.06 | 0.06 | 3.14 | 0.10 | 3.13 | 0.11 | 3.09 | 0.09 | 2.87 | 0.06 | <0.001 |
| Energy Intake (kcal) | 4740 | 0 | 2079.35 | 18.19 | 2412.04 | 33.89 | 2115.25 | 35.03 | 2011.80 | 32.45 | 1744.74 | 35.15 | <0.001 |