1) Packages
suppressPackageStartupMessages({
library(dplyr)
library(table1)
})
2) Helpers
f_yesno <- function(x) factor(x, levels = c(0,1), labels = c("No","Yes"))
safe_factor <- function(x, lv, lb) {
lv2 <- intersect(lv, unique(na.omit(x)))
lb2 <- lb[match(lv2, lv)]
factor(x, levels = lv2, labels = lb2)
}
rndr <- function(x, ...) {
if (is.numeric(x)) {
s <- sprintf("%0.1f (%0.1f)", mean(x, na.rm=TRUE), sd(x, na.rm=TRUE))
m <- stats::median(x, na.rm=TRUE)
q1 <- quantile(x, 0.25, na.rm=TRUE)
q3 <- quantile(x, 0.75, na.rm=TRUE)
paste0(s, "; median [IQR] = ", sprintf("%0.1f [%0.1f–%0.1f]", m, q1, q3))
} else table1::render.default(x, ...)
}
rndr_strat <- function(label, n, ...) paste0(label, " (n=", n, ")")
3) Load data
csv_path <- params$input_csv
if (!file.exists(csv_path)) stop("Không tìm thấy file CSV: ", csv_path)
df <- read.csv(csv_path, check.names = FALSE)
4) Recode & derive analysis variables
df_t1 <- df %>%
mutate(
# Demographics
age = RIDAGEYR,
age_grp = cut(RIDAGEYR, breaks = c(0, 39, 59, 120),
right = TRUE, labels = c("<40","40–59","≥60")),
sex = factor(RIAGENDR, levels = c(1,2), labels = c("Male","Female")),
race = safe_factor(RIDRETH3,
lv = c(3,4,1,2,6,7),
lb = c("Non-Hispanic White","Non-Hispanic Black",
"Mexican American","Other Hispanic",
"Non-Hispanic Asian","Other/Multiracial")),
# Anthropometrics
bmi = BMXBMI,
waist = BMXWAIST,
# Clinical risk factors (đã có)
hypertension_f = f_yesno(hypertension),
diabetes_f = f_yesno(diabetes),
dyslip_f = f_yesno(dyslip),
# Behaviors
smoking_f = safe_factor(smoking, lv = c(1,2,3),
lb = c("Current","Former","Never")),
drinking_f = safe_factor(drinking, lv = c(1,2,3),
lb = c("Current","Former","Never")),
PA_f = safe_factor(PA, lv = c(1,2,3),
lb = c("Low","Moderate","High")),
# Labs
TC = LBXTC,
HDL = LBDHDD,
Creatinine = LBXSCR,
eGFR = eGFR,
HbA1c = LBXGH,
Glucose = if ("LBXSGL" %in% names(df)) LBXSGL else if ("LBXGLU" %in% names(df)) LBXGLU else NA_real_,
# Stratification outcome
CVD_f = f_yesno(CVD)
)
5) Labels
label(df_t1$age) <- "Age, years"
label(df_t1$age_grp) <- "Age group"
label(df_t1$sex) <- "Sex"
label(df_t1$race) <- "Race/Ethnicity"
label(df_t1$bmi) <- "Body Mass Index, kg/m^2"
label(df_t1$waist) <- "Waist circumference, cm"
label(df_t1$hypertension_f) <- "Hypertension"
label(df_t1$diabetes_f) <- "Diabetes"
label(df_t1$dyslip_f) <- "Dyslipidemia"
label(df_t1$smoking_f) <- "Smoking status"
label(df_t1$drinking_f) <- "Drinking status"
label(df_t1$PA_f) <- "Physical activity"
label(df_t1$TC) <- "Total cholesterol, mg/dL"
label(df_t1$HDL) <- "HDL-cholesterol, mg/dL"
label(df_t1$Creatinine) <- "Serum creatinine, mg/dL"
label(df_t1$eGFR) <- "eGFR, mL/min/1.73m^2"
label(df_t1$HbA1c) <- "HbA1c, %"
label(df_t1$Glucose) <- "Serum glucose, mg/dL"
# Optional: labels cho nutrition
nice_label <- c(
carb_avg="Carbohydrate, g/day", prot_avg="Protein, g/day",
sfat_avg="Saturated fat, g/day", mfat_avg="Monounsaturated fat, g/day", pfat_avg="Polyunsaturated fat, g/day",
fibe_avg="Dietary fiber, g/day", chol_avg="Dietary cholesterol, mg/day",
sodi_avg="Sodium, mg/day", pota_avg="Potassium, mg/day", magn_avg="Magnesium, mg/day",
calc_avg="Calcium, mg/day", phos_avg="Phosphorus, mg/day",
iron_avg="Iron, mg/day", zinc_avg="Zinc, mg/day",
vitA_avg="Vitamin A, µg RAE/day", vitB1_avg="Vitamin B1 (Thiamine), mg/day",
vitB6_avg="Vitamin B6, mg/day", vitB12_avg="Vitamin B12, µg/day",
vitB9_folate_avg="Folate (B9), µg DFE/day", vitC_avg="Vitamin C, mg/day",
vitD_avg="Vitamin D, IU/day", vitE_avg="Vitamin E, mg/day"
)
for (v in intersect(names(nice_label), names(df_t1))) label(df_t1[[v]]) <- nice_label[[v]]
6) Variable lists
vars_clinical_labs <- c(
"age","age_grp","sex","race",
"bmi","waist",
"hypertension_f","diabetes_f","dyslip_f",
"smoking_f","drinking_f","PA_f",
"TC","HDL","Creatinine","eGFR","HbA1c","Glucose"
)
vars_nutrition <- c(
"carb_avg","prot_avg",
"sfat_avg","mfat_avg","pfat_avg",
"fibe_avg","chol_avg",
"sodi_avg","pota_avg","magn_avg","calc_avg","phos_avg",
"iron_avg","zinc_avg",
"vitA_avg","vitB1_avg","vitB6_avg","vitB12_avg","vitB9_folate_avg",
"vitC_avg","vitD_avg","vitE_avg"
)
8) Table 1 — Clinical & Labs (with p-value)
table1(
form_clinical_labs,
data = df_t1,
overall = "Overall",
test = params$show_pvalue, # p-value giữa CVD=No vs Yes
render.continuous = rndr,
render.strat = rndr_strat,
topclass = "Rtable1-zebra"
)
|
No (n=20014) |
Yes (n=2502) |
Overall (n=22516) |
| Age, years |
47.6 (17.2); median [IQR] = 47.0 [33.0–61.0] |
66.4 (12.6); median [IQR] = 69.0 [59.0–78.0] |
49.7 (17.8); median [IQR] = 50.0 [34.0–64.0] |
| Age group |
|
|
|
| <40 |
7436 (37.2%) |
101 (4.0%) |
7537 (33.5%) |
| 40–59 |
6833 (34.1%) |
529 (21.1%) |
7362 (32.7%) |
| ≥60 |
5745 (28.7%) |
1872 (74.8%) |
7617 (33.8%) |
| Sex |
|
|
|
| Male |
9500 (47.5%) |
1391 (55.6%) |
10891 (48.4%) |
| Female |
10514 (52.5%) |
1111 (44.4%) |
11625 (51.6%) |
| Race/Ethnicity |
|
|
|
| Non-Hispanic White |
7066 (35.3%) |
1204 (48.1%) |
8270 (36.7%) |
| Non-Hispanic Black |
4506 (22.5%) |
606 (24.2%) |
5112 (22.7%) |
| Mexican American |
2803 (14.0%) |
220 (8.8%) |
3023 (13.4%) |
| Other Hispanic |
2142 (10.7%) |
218 (8.7%) |
2360 (10.5%) |
| Non-Hispanic Asian |
2799 (14.0%) |
150 (6.0%) |
2949 (13.1%) |
| Other/Multiracial |
698 (3.5%) |
104 (4.2%) |
802 (3.6%) |
| Body Mass Index, kg/m^2 |
29.2 (7.1); median [IQR] = 28.0 [24.2–32.7] |
30.4 (7.4); median [IQR] = 29.1 [25.4–34.0] |
29.3 (7.2); median [IQR] = 28.1 [24.3–32.9] |
| Waist circumference, cm |
99.1 (16.8); median [IQR] = 97.4 [87.1–108.5] |
105.9 (16.5); median [IQR] = 104.3 [94.4–115.5] |
99.8 (16.9); median [IQR] = 98.2 [87.8–109.5] |
| Hypertension |
|
|
|
| No |
11828 (59.1%) |
462 (18.5%) |
12290 (54.6%) |
| Yes |
8186 (40.9%) |
2040 (81.5%) |
10226 (45.4%) |
| Diabetes |
|
|
|
| No |
15395 (76.9%) |
1159 (46.3%) |
16554 (73.5%) |
| Yes |
4619 (23.1%) |
1343 (53.7%) |
5962 (26.5%) |
| Dyslipidemia |
|
|
|
| No |
3134 (15.7%) |
152 (6.1%) |
3286 (14.6%) |
| Yes |
16880 (84.3%) |
2350 (93.9%) |
19230 (85.4%) |
| Smoking status |
|
|
|
| Current |
3820 (19.1%) |
540 (21.6%) |
4360 (19.4%) |
| Former |
4271 (21.3%) |
951 (38.0%) |
5222 (23.2%) |
| Never |
11923 (59.6%) |
1011 (40.4%) |
12934 (57.4%) |
| Drinking status |
|
|
|
| Current |
13955 (69.7%) |
1713 (68.5%) |
15668 (69.6%) |
| Former |
2669 (13.3%) |
374 (14.9%) |
3043 (13.5%) |
| Never |
3390 (16.9%) |
415 (16.6%) |
3805 (16.9%) |
| Physical activity |
|
|
|
| Low |
4845 (24.2%) |
1100 (44.0%) |
5945 (26.4%) |
| Moderate |
7413 (37.0%) |
917 (36.7%) |
8330 (37.0%) |
| High |
7756 (38.8%) |
485 (19.4%) |
8241 (36.6%) |
| Total cholesterol, mg/dL |
191.6 (41.1); median [IQR] = 189.0 [163.0–215.0] |
177.5 (43.1); median [IQR] = 173.0 [146.0–205.0] |
190.0 (41.5); median [IQR] = 187.0 [162.0–215.0] |
| HDL-cholesterol, mg/dL |
53.4 (16.0); median [IQR] = 51.0 [42.0–62.0] |
50.5 (15.9); median [IQR] = 47.0 [40.0–59.0] |
53.1 (16.0); median [IQR] = 50.5 [42.0–62.0] |
| Serum creatinine, mg/dL |
0.9 (0.5); median [IQR] = 0.8 [0.7–1.0] |
1.1 (0.8); median [IQR] = 1.0 [0.8–1.2] |
0.9 (0.5); median [IQR] = 0.8 [0.7–1.0] |
| eGFR, mL/min/1.73m^2 |
95.9 (22.8); median [IQR] = 98.8 [82.4–112.6] |
73.8 (24.5); median [IQR] = 75.5 [57.1–92.7] |
93.4 (24.0); median [IQR] = 96.7 [78.9–111.1] |
| HbA1c, % |
5.8 (1.1); median [IQR] = 5.5 [5.2–5.9] |
6.3 (1.4); median [IQR] = 5.9 [5.5–6.6] |
5.8 (1.1); median [IQR] = 5.5 [5.3–5.9] |
| Serum glucose, mg/dL |
102.5 (38.3); median [IQR] = 93.0 [86.0–104.0] |
117.9 (53.0); median [IQR] = 101.0 [90.0–123.0] |
104.2 (40.5); median [IQR] = 94.0 [86.0–105.0] |
9) (Optional) Table 1 — Nutrition intake (with p-value)
table1(
form_nutrition,
data = df_t1,
overall = "Overall",
test = params$show_pvalue,
render.continuous = rndr,
render.strat = rndr_strat,
topclass = "Rtable1-zebra"
)
|
No (n=20014) |
Yes (n=2502) |
Overall (n=22516) |
| Carbohydrate, g/day |
247.1 (106.7); median [IQR] = 231.8 [174.2–301.6] |
224.0 (102.6); median [IQR] = 211.4 [156.6–273.3] |
244.5 (106.5); median [IQR] = 229.1 [172.0–298.4] |
| Protein, g/day |
81.0 (35.8); median [IQR] = 75.5 [57.0–98.0] |
71.7 (31.4); median [IQR] = 67.4 [50.4–87.9] |
79.9 (35.5); median [IQR] = 74.5 [56.2–96.9] |
| Saturated fat, g/day |
25.3 (13.9); median [IQR] = 22.8 [15.7–31.9] |
23.6 (13.0); median [IQR] = 21.5 [14.5–29.8] |
25.1 (13.9); median [IQR] = 22.6 [15.6–31.7] |
| Monounsaturated fat, g/day |
27.9 (14.5); median [IQR] = 25.4 [18.0–34.8] |
25.6 (13.5); median [IQR] = 23.4 [16.3–32.2] |
27.6 (14.4); median [IQR] = 25.2 [17.8–34.4] |
| Polyunsaturated fat, g/day |
18.7 (10.6); median [IQR] = 16.7 [11.6–23.6] |
17.0 (9.8); median [IQR] = 15.2 [10.1–21.8] |
18.5 (10.5); median [IQR] = 16.6 [11.4–23.4] |
| Dietary fiber, g/day |
17.2 (9.5); median [IQR] = 15.4 [10.6–21.7] |
15.4 (8.5); median [IQR] = 13.9 [9.6–19.8] |
17.0 (9.4); median [IQR] = 15.2 [10.5–21.5] |
| Dietary cholesterol, mg/day |
295.8 (198.3); median [IQR] = 251.5 [157.0–385.5] |
278.1 (186.0); median [IQR] = 234.5 [145.6–366.5] |
293.8 (197.0); median [IQR] = 250.0 [155.5–383.0] |
| Sodium, mg/day |
3439.2 (1529.3); median [IQR] = 3200.5 [2395.0–4177.4] |
3087.3 (1378.2); median [IQR] = 2895.2 [2138.1–3770.9] |
3400.1 (1517.3); median [IQR] = 3163.5 [2366.4–4131.6] |
| Potassium, mg/day |
2582.7 (1072.8); median [IQR] = 2428.2 [1863.6–3128.5] |
2426.5 (1026.1); median [IQR] = 2306.2 [1749.9–2982.2] |
2565.4 (1068.8); median [IQR] = 2413.0 [1849.0–3111.5] |
| Magnesium, mg/day |
295.5 (131.5); median [IQR] = 273.0 [207.0–357.5] |
266.4 (120.6); median [IQR] = 249.5 [185.5–327.0] |
292.3 (130.6); median [IQR] = 270.0 [204.0–354.0] |
| Calcium, mg/day |
911.2 (487.8); median [IQR] = 825.0 [579.6–1129.5] |
831.7 (448.1); median [IQR] = 758.0 [514.2–1051.4] |
902.4 (484.2); median [IQR] = 817.5 [571.0–1122.0] |
| Phosphorus, mg/day |
1339.9 (573.3); median [IQR] = 1251.0 [961.5–1613.0] |
1205.8 (520.3); median [IQR] = 1133.5 [858.0–1473.8] |
1325.0 (569.2); median [IQR] = 1236.5 [948.5–1599.6] |
| Iron, mg/day |
14.2 (7.2); median [IQR] = 12.9 [9.5–17.3] |
13.5 (7.0); median [IQR] = 12.2 [8.9–16.5] |
14.1 (7.2); median [IQR] = 12.8 [9.4–17.2] |
| Zinc, mg/day |
10.8 (5.9); median [IQR] = 9.7 [7.1–13.1] |
9.9 (5.1); median [IQR] = 9.1 [6.5–12.2] |
10.7 (5.8); median [IQR] = 9.6 [7.0–13.0] |
| Vitamin A, µg RAE/day |
390.5 (369.5); median [IQR] = 316.0 [180.0–502.0] |
411.0 (470.5); median [IQR] = 326.2 [186.1–509.4] |
392.8 (382.1); median [IQR] = 317.0 [180.5–503.0] |
| Vitamin B1 (Thiamine), mg/day |
1.6 (0.8); median [IQR] = 1.4 [1.1–1.9] |
1.5 (0.7); median [IQR] = 1.3 [1.0–1.8] |
1.6 (0.8); median [IQR] = 1.4 [1.1–1.9] |
| Vitamin B6, mg/day |
2.1 (1.4); median [IQR] = 1.8 [1.3–2.5] |
1.9 (1.4); median [IQR] = 1.6 [1.2–2.2] |
2.1 (1.4); median [IQR] = 1.8 [1.3–2.5] |
| Vitamin B12, µg/day |
4.8 (4.5); median [IQR] = 3.8 [2.4–5.9] |
4.5 (5.3); median [IQR] = 3.6 [2.2–5.5] |
4.7 (4.6); median [IQR] = 3.8 [2.4–5.8] |
| Folate (B9), µg DFE/day |
392.2 (216.6); median [IQR] = 350.5 [251.5–483.0] |
350.9 (191.6); median [IQR] = 309.2 [224.6–437.5] |
387.6 (214.3); median [IQR] = 346.0 [248.5–477.1] |
| Vitamin C, mg/day |
82.8 (78.7); median [IQR] = 61.4 [28.5–113.1] |
76.1 (72.5); median [IQR] = 55.4 [25.1–103.8] |
82.1 (78.0); median [IQR] = 60.7 [28.1–112.0] |
| Vitamin D, IU/day |
4.5 (4.7); median [IQR] = 3.4 [1.6–5.8] |
4.7 (4.7); median [IQR] = 3.5 [1.9–5.9] |
4.5 (4.7); median [IQR] = 3.4 [1.7–5.8] |
| Vitamin E, mg/day |
0.7 (3.0); median [IQR] = 0.0 [0.0–0.0] |
0.7 (2.8); median [IQR] = 0.0 [0.0–0.0] |
0.7 (2.9); median [IQR] = 0.0 [0.0–0.0] |
10) Render từ Console (không chạy trong Rmd)
# Ví dụ chạy ở Console:
# rmarkdown::render("CVD---Table1.Rmd",
# output_format = "word_document",
# params = list(
# input_csv = "/Users/thien/Desktop/NHANES_data/Dr Nhat Minh/imputed_df_CVD.csv",
# show_pvalue = TRUE
# ),
# output_file = "Table1_CVD.docx"
# )
Sys.setenv(https_proxy = "http://user:pass@proxy.company.com:8080")
# hoặc
Sys.setenv(HTTPS_PROXY = "http://user:pass@proxy.company.com:8080")