0) Setup

1) Packages


suppressPackageStartupMessages({
  library(dplyr)
  library(table1)
})

2) Helpers


f_yesno <- function(x) factor(x, levels = c(0,1), labels = c("No","Yes"))

safe_factor <- function(x, lv, lb) {
  lv2 <- intersect(lv, unique(na.omit(x)))
  lb2 <- lb[match(lv2, lv)]
  factor(x, levels = lv2, labels = lb2)
}

rndr <- function(x, ...) {
  if (is.numeric(x)) {
    s  <- sprintf("%0.1f (%0.1f)", mean(x, na.rm=TRUE), sd(x, na.rm=TRUE))
    m  <- stats::median(x, na.rm=TRUE)
    q1 <- quantile(x, 0.25, na.rm=TRUE)
    q3 <- quantile(x, 0.75, na.rm=TRUE)
    paste0(s, "; median [IQR] = ", sprintf("%0.1f [%0.1f–%0.1f]", m, q1, q3))
  } else table1::render.default(x, ...)
}

rndr_strat <- function(label, n, ...) paste0(label, " (n=", n, ")")

3) Load data


csv_path <- params$input_csv
if (!file.exists(csv_path)) stop("Không tìm thấy file CSV: ", csv_path)
df <- read.csv(csv_path, check.names = FALSE)

4) Recode & derive analysis variables


df_t1 <- df %>%
  mutate(
    # Demographics
    age     = RIDAGEYR,
    age_grp = cut(RIDAGEYR, breaks = c(0, 39, 59, 120),
                  right = TRUE, labels = c("<40","40–59","≥60")),
    sex  = factor(RIAGENDR, levels = c(1,2), labels = c("Male","Female")),
    race = safe_factor(RIDRETH3,
                       lv = c(3,4,1,2,6,7),
                       lb = c("Non-Hispanic White","Non-Hispanic Black",
                              "Mexican American","Other Hispanic",
                              "Non-Hispanic Asian","Other/Multiracial")),
    # Anthropometrics
    bmi   = BMXBMI,
    waist = BMXWAIST,

    # Clinical risk factors (đã có)
    hypertension_f = f_yesno(hypertension),
    diabetes_f     = f_yesno(diabetes),
    dyslip_f       = f_yesno(dyslip),

    # Behaviors
    smoking_f  = safe_factor(smoking,  lv = c(1,2,3),
                             lb = c("Current","Former","Never")),
    drinking_f = safe_factor(drinking, lv = c(1,2,3),
                             lb = c("Current","Former","Never")),
    PA_f       = safe_factor(PA,       lv = c(1,2,3),
                             lb = c("Low","Moderate","High")),

    # Labs
    TC         = LBXTC,
    HDL        = LBDHDD,
    Creatinine = LBXSCR,
    eGFR       = eGFR,
    HbA1c      = LBXGH,
    Glucose    = if ("LBXSGL" %in% names(df)) LBXSGL else if ("LBXGLU" %in% names(df)) LBXGLU else NA_real_,

    # Stratification outcome
    CVD_f = f_yesno(CVD)
  )

5) Labels


label(df_t1$age)      <- "Age, years"
label(df_t1$age_grp)  <- "Age group"
label(df_t1$sex)      <- "Sex"
label(df_t1$race)     <- "Race/Ethnicity"
label(df_t1$bmi)      <- "Body Mass Index, kg/m^2"
label(df_t1$waist)    <- "Waist circumference, cm"

label(df_t1$hypertension_f) <- "Hypertension"
label(df_t1$diabetes_f)     <- "Diabetes"
label(df_t1$dyslip_f)       <- "Dyslipidemia"
label(df_t1$smoking_f)      <- "Smoking status"
label(df_t1$drinking_f)     <- "Drinking status"
label(df_t1$PA_f)           <- "Physical activity"

label(df_t1$TC)        <- "Total cholesterol, mg/dL"
label(df_t1$HDL)       <- "HDL-cholesterol, mg/dL"
label(df_t1$Creatinine) <- "Serum creatinine, mg/dL"
label(df_t1$eGFR)      <- "eGFR, mL/min/1.73m^2"
label(df_t1$HbA1c)     <- "HbA1c, %"
label(df_t1$Glucose)   <- "Serum glucose, mg/dL"

# Optional: labels cho nutrition
nice_label <- c(
  carb_avg="Carbohydrate, g/day", prot_avg="Protein, g/day",
  sfat_avg="Saturated fat, g/day", mfat_avg="Monounsaturated fat, g/day", pfat_avg="Polyunsaturated fat, g/day",
  fibe_avg="Dietary fiber, g/day", chol_avg="Dietary cholesterol, mg/day",
  sodi_avg="Sodium, mg/day", pota_avg="Potassium, mg/day", magn_avg="Magnesium, mg/day",
  calc_avg="Calcium, mg/day", phos_avg="Phosphorus, mg/day",
  iron_avg="Iron, mg/day", zinc_avg="Zinc, mg/day",
  vitA_avg="Vitamin A, µg RAE/day", vitB1_avg="Vitamin B1 (Thiamine), mg/day",
  vitB6_avg="Vitamin B6, mg/day", vitB12_avg="Vitamin B12, µg/day",
  vitB9_folate_avg="Folate (B9), µg DFE/day", vitC_avg="Vitamin C, mg/day",
  vitD_avg="Vitamin D, IU/day", vitE_avg="Vitamin E, mg/day"
)
for (v in intersect(names(nice_label), names(df_t1))) label(df_t1[[v]]) <- nice_label[[v]]

6) Variable lists


vars_clinical_labs <- c(
  "age","age_grp","sex","race",
  "bmi","waist",
  "hypertension_f","diabetes_f","dyslip_f",
  "smoking_f","drinking_f","PA_f",
  "TC","HDL","Creatinine","eGFR","HbA1c","Glucose"
)

vars_nutrition <- c(
  "carb_avg","prot_avg",
  "sfat_avg","mfat_avg","pfat_avg",
  "fibe_avg","chol_avg",
  "sodi_avg","pota_avg","magn_avg","calc_avg","phos_avg",
  "iron_avg","zinc_avg",
  "vitA_avg","vitB1_avg","vitB6_avg","vitB12_avg","vitB9_folate_avg",
  "vitC_avg","vitD_avg","vitE_avg"
)

7) Formulas


form_clinical_labs <- as.formula(paste("~", paste(vars_clinical_labs, collapse=" + "), "| CVD_f"))
form_nutrition     <- as.formula(paste("~", paste(vars_nutrition,     collapse=" + "), "| CVD_f"))

8) Table 1 — Clinical & Labs (with p-value)


table1(
  form_clinical_labs,
  data = df_t1,
  overall = "Overall",
  test = params$show_pvalue,      # p-value giữa CVD=No vs Yes
  render.continuous = rndr,
  render.strat = rndr_strat,
  topclass = "Rtable1-zebra"
)
No (n=20014) Yes (n=2502) Overall (n=22516)
Age, years 47.6 (17.2); median [IQR] = 47.0 [33.0–61.0] 66.4 (12.6); median [IQR] = 69.0 [59.0–78.0] 49.7 (17.8); median [IQR] = 50.0 [34.0–64.0]
Age group
<40 7436 (37.2%) 101 (4.0%) 7537 (33.5%)
40–59 6833 (34.1%) 529 (21.1%) 7362 (32.7%)
≥60 5745 (28.7%) 1872 (74.8%) 7617 (33.8%)
Sex
Male 9500 (47.5%) 1391 (55.6%) 10891 (48.4%)
Female 10514 (52.5%) 1111 (44.4%) 11625 (51.6%)
Race/Ethnicity
Non-Hispanic White 7066 (35.3%) 1204 (48.1%) 8270 (36.7%)
Non-Hispanic Black 4506 (22.5%) 606 (24.2%) 5112 (22.7%)
Mexican American 2803 (14.0%) 220 (8.8%) 3023 (13.4%)
Other Hispanic 2142 (10.7%) 218 (8.7%) 2360 (10.5%)
Non-Hispanic Asian 2799 (14.0%) 150 (6.0%) 2949 (13.1%)
Other/Multiracial 698 (3.5%) 104 (4.2%) 802 (3.6%)
Body Mass Index, kg/m^2 29.2 (7.1); median [IQR] = 28.0 [24.2–32.7] 30.4 (7.4); median [IQR] = 29.1 [25.4–34.0] 29.3 (7.2); median [IQR] = 28.1 [24.3–32.9]
Waist circumference, cm 99.1 (16.8); median [IQR] = 97.4 [87.1–108.5] 105.9 (16.5); median [IQR] = 104.3 [94.4–115.5] 99.8 (16.9); median [IQR] = 98.2 [87.8–109.5]
Hypertension
No 11828 (59.1%) 462 (18.5%) 12290 (54.6%)
Yes 8186 (40.9%) 2040 (81.5%) 10226 (45.4%)
Diabetes
No 15395 (76.9%) 1159 (46.3%) 16554 (73.5%)
Yes 4619 (23.1%) 1343 (53.7%) 5962 (26.5%)
Dyslipidemia
No 3134 (15.7%) 152 (6.1%) 3286 (14.6%)
Yes 16880 (84.3%) 2350 (93.9%) 19230 (85.4%)
Smoking status
Current 3820 (19.1%) 540 (21.6%) 4360 (19.4%)
Former 4271 (21.3%) 951 (38.0%) 5222 (23.2%)
Never 11923 (59.6%) 1011 (40.4%) 12934 (57.4%)
Drinking status
Current 13955 (69.7%) 1713 (68.5%) 15668 (69.6%)
Former 2669 (13.3%) 374 (14.9%) 3043 (13.5%)
Never 3390 (16.9%) 415 (16.6%) 3805 (16.9%)
Physical activity
Low 4845 (24.2%) 1100 (44.0%) 5945 (26.4%)
Moderate 7413 (37.0%) 917 (36.7%) 8330 (37.0%)
High 7756 (38.8%) 485 (19.4%) 8241 (36.6%)
Total cholesterol, mg/dL 191.6 (41.1); median [IQR] = 189.0 [163.0–215.0] 177.5 (43.1); median [IQR] = 173.0 [146.0–205.0] 190.0 (41.5); median [IQR] = 187.0 [162.0–215.0]
HDL-cholesterol, mg/dL 53.4 (16.0); median [IQR] = 51.0 [42.0–62.0] 50.5 (15.9); median [IQR] = 47.0 [40.0–59.0] 53.1 (16.0); median [IQR] = 50.5 [42.0–62.0]
Serum creatinine, mg/dL 0.9 (0.5); median [IQR] = 0.8 [0.7–1.0] 1.1 (0.8); median [IQR] = 1.0 [0.8–1.2] 0.9 (0.5); median [IQR] = 0.8 [0.7–1.0]
eGFR, mL/min/1.73m^2 95.9 (22.8); median [IQR] = 98.8 [82.4–112.6] 73.8 (24.5); median [IQR] = 75.5 [57.1–92.7] 93.4 (24.0); median [IQR] = 96.7 [78.9–111.1]
HbA1c, % 5.8 (1.1); median [IQR] = 5.5 [5.2–5.9] 6.3 (1.4); median [IQR] = 5.9 [5.5–6.6] 5.8 (1.1); median [IQR] = 5.5 [5.3–5.9]
Serum glucose, mg/dL 102.5 (38.3); median [IQR] = 93.0 [86.0–104.0] 117.9 (53.0); median [IQR] = 101.0 [90.0–123.0] 104.2 (40.5); median [IQR] = 94.0 [86.0–105.0]

9) (Optional) Table 1 — Nutrition intake (with p-value)


table1(
  form_nutrition,
  data = df_t1,
  overall = "Overall",
  test = params$show_pvalue,
  render.continuous = rndr,
  render.strat = rndr_strat,
  topclass = "Rtable1-zebra"
)
No (n=20014) Yes (n=2502) Overall (n=22516)
Carbohydrate, g/day 247.1 (106.7); median [IQR] = 231.8 [174.2–301.6] 224.0 (102.6); median [IQR] = 211.4 [156.6–273.3] 244.5 (106.5); median [IQR] = 229.1 [172.0–298.4]
Protein, g/day 81.0 (35.8); median [IQR] = 75.5 [57.0–98.0] 71.7 (31.4); median [IQR] = 67.4 [50.4–87.9] 79.9 (35.5); median [IQR] = 74.5 [56.2–96.9]
Saturated fat, g/day 25.3 (13.9); median [IQR] = 22.8 [15.7–31.9] 23.6 (13.0); median [IQR] = 21.5 [14.5–29.8] 25.1 (13.9); median [IQR] = 22.6 [15.6–31.7]
Monounsaturated fat, g/day 27.9 (14.5); median [IQR] = 25.4 [18.0–34.8] 25.6 (13.5); median [IQR] = 23.4 [16.3–32.2] 27.6 (14.4); median [IQR] = 25.2 [17.8–34.4]
Polyunsaturated fat, g/day 18.7 (10.6); median [IQR] = 16.7 [11.6–23.6] 17.0 (9.8); median [IQR] = 15.2 [10.1–21.8] 18.5 (10.5); median [IQR] = 16.6 [11.4–23.4]
Dietary fiber, g/day 17.2 (9.5); median [IQR] = 15.4 [10.6–21.7] 15.4 (8.5); median [IQR] = 13.9 [9.6–19.8] 17.0 (9.4); median [IQR] = 15.2 [10.5–21.5]
Dietary cholesterol, mg/day 295.8 (198.3); median [IQR] = 251.5 [157.0–385.5] 278.1 (186.0); median [IQR] = 234.5 [145.6–366.5] 293.8 (197.0); median [IQR] = 250.0 [155.5–383.0]
Sodium, mg/day 3439.2 (1529.3); median [IQR] = 3200.5 [2395.0–4177.4] 3087.3 (1378.2); median [IQR] = 2895.2 [2138.1–3770.9] 3400.1 (1517.3); median [IQR] = 3163.5 [2366.4–4131.6]
Potassium, mg/day 2582.7 (1072.8); median [IQR] = 2428.2 [1863.6–3128.5] 2426.5 (1026.1); median [IQR] = 2306.2 [1749.9–2982.2] 2565.4 (1068.8); median [IQR] = 2413.0 [1849.0–3111.5]
Magnesium, mg/day 295.5 (131.5); median [IQR] = 273.0 [207.0–357.5] 266.4 (120.6); median [IQR] = 249.5 [185.5–327.0] 292.3 (130.6); median [IQR] = 270.0 [204.0–354.0]
Calcium, mg/day 911.2 (487.8); median [IQR] = 825.0 [579.6–1129.5] 831.7 (448.1); median [IQR] = 758.0 [514.2–1051.4] 902.4 (484.2); median [IQR] = 817.5 [571.0–1122.0]
Phosphorus, mg/day 1339.9 (573.3); median [IQR] = 1251.0 [961.5–1613.0] 1205.8 (520.3); median [IQR] = 1133.5 [858.0–1473.8] 1325.0 (569.2); median [IQR] = 1236.5 [948.5–1599.6]
Iron, mg/day 14.2 (7.2); median [IQR] = 12.9 [9.5–17.3] 13.5 (7.0); median [IQR] = 12.2 [8.9–16.5] 14.1 (7.2); median [IQR] = 12.8 [9.4–17.2]
Zinc, mg/day 10.8 (5.9); median [IQR] = 9.7 [7.1–13.1] 9.9 (5.1); median [IQR] = 9.1 [6.5–12.2] 10.7 (5.8); median [IQR] = 9.6 [7.0–13.0]
Vitamin A, µg RAE/day 390.5 (369.5); median [IQR] = 316.0 [180.0–502.0] 411.0 (470.5); median [IQR] = 326.2 [186.1–509.4] 392.8 (382.1); median [IQR] = 317.0 [180.5–503.0]
Vitamin B1 (Thiamine), mg/day 1.6 (0.8); median [IQR] = 1.4 [1.1–1.9] 1.5 (0.7); median [IQR] = 1.3 [1.0–1.8] 1.6 (0.8); median [IQR] = 1.4 [1.1–1.9]
Vitamin B6, mg/day 2.1 (1.4); median [IQR] = 1.8 [1.3–2.5] 1.9 (1.4); median [IQR] = 1.6 [1.2–2.2] 2.1 (1.4); median [IQR] = 1.8 [1.3–2.5]
Vitamin B12, µg/day 4.8 (4.5); median [IQR] = 3.8 [2.4–5.9] 4.5 (5.3); median [IQR] = 3.6 [2.2–5.5] 4.7 (4.6); median [IQR] = 3.8 [2.4–5.8]
Folate (B9), µg DFE/day 392.2 (216.6); median [IQR] = 350.5 [251.5–483.0] 350.9 (191.6); median [IQR] = 309.2 [224.6–437.5] 387.6 (214.3); median [IQR] = 346.0 [248.5–477.1]
Vitamin C, mg/day 82.8 (78.7); median [IQR] = 61.4 [28.5–113.1] 76.1 (72.5); median [IQR] = 55.4 [25.1–103.8] 82.1 (78.0); median [IQR] = 60.7 [28.1–112.0]
Vitamin D, IU/day 4.5 (4.7); median [IQR] = 3.4 [1.6–5.8] 4.7 (4.7); median [IQR] = 3.5 [1.9–5.9] 4.5 (4.7); median [IQR] = 3.4 [1.7–5.8]
Vitamin E, mg/day 0.7 (3.0); median [IQR] = 0.0 [0.0–0.0] 0.7 (2.8); median [IQR] = 0.0 [0.0–0.0] 0.7 (2.9); median [IQR] = 0.0 [0.0–0.0]

10) Render từ Console (không chạy trong Rmd)


# Ví dụ chạy ở Console:
# rmarkdown::render("CVD---Table1.Rmd",
#   output_format = "word_document",
#   params = list(
#     input_csv   = "/Users/thien/Desktop/NHANES_data/Dr Nhat Minh/imputed_df_CVD.csv",
#     show_pvalue = TRUE
#   ),
#   output_file = "Table1_CVD.docx"
# )
Sys.setenv(https_proxy = "http://user:pass@proxy.company.com:8080")
# hoặc
Sys.setenv(HTTPS_PROXY = "http://user:pass@proxy.company.com:8080")