## Sequence.no Blood.Pressure HsCRP Age Gender Race Poverty.Ratio
## 1 130378 Yes 1.78 43 Male Non-Hispanic Asian 5.00
## 2 130379 Yes 2.03 66 Male Non-Hispanic White 5.00
## 3 130380 No 5.62 44 Female Other Hispanic 1.41
## 4 130386 No 1.05 34 Male Mexican American 1.33
## 5 130387 Yes 3.96 68 Female Non-Hispanic White 1.32
## 6 130390 No 11.20 31 Female Non-Hispanic White 2.16
## BMI Waist Diabetes Kidney.Disease Smoking Cholesterol HDLC HbA1c
## 1 27.0 98.3 No No Yes 6.83 45 5.6
## 2 33.5 114.7 No No Yes 5.53 60 5.6
## 3 29.7 93.5 Yes No No 4.84 49 6.2
## 4 30.2 106.1 No No Yes 4.73 46 5.1
## 5 42.6 122.0 No No No 5.25 42 5.9
## 6 46.0 131.0 No No No 4.11 39 5.9
library(dplyr)
library(gtsummary)
library(gt)
library(tableone)
library(kableExtra)
# --- prepare data & variable lists -----------------------------------------
# Use the dataset you already loaded: merged_data
# Make sure exposure and categorical vars are factors
merged_data2 <- merged_data %>%
mutate(
Blood.Pressure = factor(Blood.Pressure, levels = c("No","Yes")),
Gender = factor(Gender),
Race = factor(Race),
Diabetes = factor(Diabetes),
Kidney.Disease = factor(Kidney.Disease),
Smoking = factor(Smoking)
)
# Define variables
exposure <- "Blood.Pressure"
outcome <- "HsCRP" # just kept for reference (we will include in the table)
covariates <- c("Age", "Gender", "Race", "Poverty.Ratio", "BMI", "Waist",
"Diabetes", "Kidney.Disease", "Smoking", "Cholesterol", "HDLC", "HbA1c")
# A) Pretty summary table with p-values and overall column (gtsummary -> gt)
tbl <- merged_data2 %>%
dplyr::select(all_of(c(exposure, outcome, covariates))) %>%
tbl_summary(
by = !!rlang::sym(exposure), # stratify by Blood.Pressure
include = all_of(c(outcome, covariates)), # include HsCRP + covariates
missing = "no",
statistic = list(
all_continuous() ~ "{mean} ({sd})", # mean (sd) for continuous
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 2
) %>%
add_overall() %>% # include Overall column
add_n() %>% # show N in headers
add_p(test = list(
all_continuous() ~ "t.test", # default tests (can be adjusted)
all_categorical() ~ "chisq.test"
)) %>%
modify_header(label = "**Characteristic**") %>%
bold_labels() %>%
modify_caption("**Table 1. Baseline / Demographic Characteristics **")## 31 missing rows in the "Blood.Pressure" column have been removed.
## The following warnings were returned during `modify_caption()`:
## ! For variable `Diabetes` (`Blood.Pressure`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
## ! For variable `Smoking` (`Blood.Pressure`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
| Characteristic | N | Overall N = 25,7661 |
No N = 16,7911 |
Yes N = 8,9751 |
p-value2 |
|---|---|---|---|---|---|
| HsCRP | 25,766 | 4.00 (8.11) | 3.36 (6.39) | 5.19 (10.50) | <0.001 |
| Age | 25,766 | 48.44 (19.32) | 42.07 (18.46) | 60.36 (14.68) | <0.001 |
| Gender | 25,766 | 0.029 | |||
| Female | 13,495 (52%) | 8,878 (53%) | 4,617 (51%) | ||
| Male | 12,271 (48%) | 7,913 (47%) | 4,358 (49%) | ||
| Race | 25,766 | <0.001 | |||
| Mexican American | 3,367 (13%) | 2,508 (15%) | 859 (9.6%) | ||
| Non-Hispanic Asian | 2,781 (11%) | 2,034 (12%) | 747 (8.3%) | ||
| Non-Hispanic Black | 5,250 (20%) | 2,925 (17%) | 2,325 (26%) | ||
| Non-Hispanic White | 10,232 (40%) | 6,541 (39%) | 3,691 (41%) | ||
| Other Hispanic | 2,782 (11%) | 1,885 (11%) | 897 (10.0%) | ||
| Other Race - Including Multi-Racial | 1,354 (5.3%) | 898 (5.3%) | 456 (5.1%) | ||
| Poverty.Ratio | 22,556 | 2.60 (1.63) | 2.63 (1.65) | 2.55 (1.61) | 0.002 |
| BMI | 25,397 | 29.52 (7.39) | 28.35 (7.01) | 31.72 (7.59) | <0.001 |
| Waist | 24,522 | 99.54 (17.35) | 95.82 (16.67) | 106.67 (16.37) | <0.001 |
| Diabetes | 25,766 | <0.001 | |||
| Borderline | 732 (2.8%) | 322 (1.9%) | 410 (4.6%) | ||
| Don't know | 14 (<0.1%) | 11 (<0.1%) | 3 (<0.1%) | ||
| No | 21,450 (83%) | 15,382 (92%) | 6,068 (68%) | ||
| Yes | 3,570 (14%) | 1,076 (6.4%) | 2,494 (28%) | ||
| Kidney.Disease | 23,469 | <0.001 | |||
| Don't know | 39 (0.2%) | 16 (0.1%) | 23 (0.3%) | ||
| No | 22,489 (96%) | 14,339 (98%) | 8,150 (92%) | ||
| Yes | 941 (4.0%) | 224 (1.5%) | 717 (8.1%) | ||
| Smoking | 24,541 | <0.001 | |||
| Don't know | 14 (<0.1%) | 7 (<0.1%) | 7 (<0.1%) | ||
| No | 14,640 (60%) | 10,019 (64%) | 4,621 (52%) | ||
| Refused | 6 (<0.1%) | 2 (<0.1%) | 4 (<0.1%) | ||
| Yes | 9,881 (40%) | 5,579 (36%) | 4,302 (48%) | ||
| Cholesterol | 25,651 | 4.79 (1.08) | 4.79 (1.06) | 4.77 (1.11) | 0.093 |
| HDLC | 25,651 | 53.69 (15.74) | 54.16 (15.46) | 52.83 (16.23) | <0.001 |
| HbA1c | 25,743 | 5.79 (1.10) | 5.58 (0.93) | 6.17 (1.27) | <0.001 |
| 1 Mean (SD); n (%) | |||||
| 2 Welch Two Sample t-test; Pearson’s Chi-squared test | |||||
# B) TableOne with Standardized Mean Differences (SMD) for quick balance check
catVars <- c("Gender", "Race", "Diabetes", "Kidney.Disease", "Smoking")
# Create TableOne
tab1 <- CreateTableOne(
vars = c(outcome, covariates),
strata = exposure,
data = merged_data2,
factorVars = catVars,
includeNA = FALSE
)
# Print with SMD (as a data frame for nicer rendering)
tab1_print <- print(tab1, showAllLevels = TRUE, smd = TRUE)## Stratified by Blood.Pressure
## level No
## n 16791
## HsCRP (mean (SD)) 3.36 (6.39)
## Age (mean (SD)) 42.07 (18.46)
## Gender (%) Female 8878 (52.9)
## Male 7913 (47.1)
## Race (%) Mexican American 2508 (14.9)
## Non-Hispanic Asian 2034 (12.1)
## Non-Hispanic Black 2925 (17.4)
## Non-Hispanic White 6541 (39.0)
## Other Hispanic 1885 (11.2)
## Other Race - Including Multi-Racial 898 ( 5.3)
## Poverty.Ratio (mean (SD)) 2.63 (1.65)
## BMI (mean (SD)) 28.35 (7.01)
## Waist (mean (SD)) 95.82 (16.67)
## Diabetes (%) Borderline 322 ( 1.9)
## Don't know 11 ( 0.1)
## No 15382 (91.6)
## Yes 1076 ( 6.4)
## Kidney.Disease (%) Don't know 16 ( 0.1)
## No 14339 (98.4)
## Yes 224 ( 1.5)
## Smoking (%) Don't know 7 ( 0.0)
## No 10019 (64.2)
## Refused 2 ( 0.0)
## Yes 5579 (35.7)
## Cholesterol (mean (SD)) 4.79 (1.06)
## HDLC (mean (SD)) 54.16 (15.46)
## HbA1c (mean (SD)) 5.58 (0.93)
## Stratified by Blood.Pressure
## Yes p test SMD
## n 8975
## HsCRP (mean (SD)) 5.19 (10.50) <0.001 0.211
## Age (mean (SD)) 60.36 (14.68) <0.001 1.097
## Gender (%) 4617 (51.4) 0.029 0.029
## 4358 (48.6)
## Race (%) 859 ( 9.6) <0.001 0.274
## 747 ( 8.3)
## 2325 (25.9)
## 3691 (41.1)
## 897 (10.0)
## 456 ( 5.1)
## Poverty.Ratio (mean (SD)) 2.55 (1.61) 0.002 0.044
## BMI (mean (SD)) 31.72 (7.59) <0.001 0.461
## Waist (mean (SD)) 106.67 (16.37) <0.001 0.657
## Diabetes (%) 410 ( 4.6) <0.001 0.630
## 3 ( 0.0)
## 6068 (67.6)
## 2494 (27.8)
## Kidney.Disease (%) 23 ( 0.3) <0.001 0.311
## 8150 (91.7)
## 717 ( 8.1)
## Smoking (%) 7 ( 0.1) <0.001 0.255
## 4621 (51.7)
## 4 ( 0.0)
## 4302 (48.2)
## Cholesterol (mean (SD)) 4.77 (1.11) 0.089 0.022
## HDLC (mean (SD)) 52.83 (16.23) <0.001 0.084
## HbA1c (mean (SD)) 6.17 (1.27) <0.001 0.525
# Convert to data.frame for kable
tab1_df <- as.data.frame.matrix(tab1_print)
# Add rownames as first column
tab1_df <- tibble::rownames_to_column(tab1_df, var = "Variable")
# Show the SMD table (selecting the SMD column usually named "Std. Mean Diff" or "SMD")
# Depending on tableone version the column name might differ; try to find SMD column
smd_col <- grep("Std.*Mean|SMD|std", names(tab1_df), ignore.case = TRUE, value = TRUE)[1]
if (is.na(smd_col)) smd_col <- NULL
# Print via kable
tab1_df %>%
kable(caption = "Table 2. TableOne output including standardized mean differences (SMD)") %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed"))| Variable | level | No | Yes | p | test | SMD |
|---|---|---|---|---|---|---|
| n | 16791 | 8975 | ||||
| HsCRP..mean..SD.. | 3.36 (6.39) | 5.19 (10.50) | <0.001 | 0.211 | ||
| Age..mean..SD.. | 42.07 (18.46) | 60.36 (14.68) | <0.001 | 1.097 | ||
| Gender…. | Female | 8878 (52.9) | 4617 (51.4) | 0.029 | 0.029 | |
| X | Male | 7913 (47.1) | 4358 (48.6) | |||
| Race…. | Mexican American | 2508 (14.9) | 859 ( 9.6) | <0.001 | 0.274 | |
| X.1 | Non-Hispanic Asian | 2034 (12.1) | 747 ( 8.3) | |||
| X.2 | Non-Hispanic Black | 2925 (17.4) | 2325 (25.9) | |||
| X.3 | Non-Hispanic White | 6541 (39.0) | 3691 (41.1) | |||
| X.4 | Other Hispanic | 1885 (11.2) | 897 (10.0) | |||
| X.5 | Other Race - Including Multi-Racial | 898 ( 5.3) | 456 ( 5.1) | |||
| Poverty.Ratio..mean..SD.. | 2.63 (1.65) | 2.55 (1.61) | 0.002 | 0.044 | ||
| BMI..mean..SD.. | 28.35 (7.01) | 31.72 (7.59) | <0.001 | 0.461 | ||
| Waist..mean..SD.. | 95.82 (16.67) | 106.67 (16.37) | <0.001 | 0.657 | ||
| Diabetes…. | Borderline | 322 ( 1.9) | 410 ( 4.6) | <0.001 | 0.630 | |
| X.6 | Don’t know | 11 ( 0.1) | 3 ( 0.0) | |||
| X.7 | No | 15382 (91.6) | 6068 (67.6) | |||
| X.8 | Yes | 1076 ( 6.4) | 2494 (27.8) | |||
| Kidney.Disease…. | Don’t know | 16 ( 0.1) | 23 ( 0.3) | <0.001 | 0.311 | |
| X.9 | No | 14339 (98.4) | 8150 (91.7) | |||
| X.10 | Yes | 224 ( 1.5) | 717 ( 8.1) | |||
| Smoking…. | Don’t know | 7 ( 0.0) | 7 ( 0.1) | <0.001 | 0.255 | |
| X.11 | No | 10019 (64.2) | 4621 (51.7) | |||
| X.12 | Refused | 2 ( 0.0) | 4 ( 0.0) | |||
| X.13 | Yes | 5579 (35.7) | 4302 (48.2) | |||
| Cholesterol..mean..SD.. | 4.79 (1.06) | 4.77 (1.11) | 0.089 | 0.022 | ||
| HDLC..mean..SD.. | 54.16 (15.46) | 52.83 (16.23) | <0.001 | 0.084 | ||
| HbA1c..mean..SD.. | 5.58 (0.93) | 6.17 (1.27) | <0.001 | 0.525 |
# Optionally show only key columns (Variable, No, Yes, SMD) if SMD found:
if (!is.null(smd_col)) {
tab1_df %>%
dplyr::select(Variable, starts_with("No"), starts_with("Yes"), all_of(smd_col)) %>%
kable(caption = "Table 2. Key columns with SMD (No vs Yes)") %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped","hover","condensed"))
}| Variable | No | Yes | SMD |
|---|---|---|---|
| n | 16791 | 8975 | |
| HsCRP..mean..SD.. | 3.36 (6.39) | 5.19 (10.50) | 0.211 |
| Age..mean..SD.. | 42.07 (18.46) | 60.36 (14.68) | 1.097 |
| Gender…. | 8878 (52.9) | 4617 (51.4) | 0.029 |
| X | 7913 (47.1) | 4358 (48.6) | |
| Race…. | 2508 (14.9) | 859 ( 9.6) | 0.274 |
| X.1 | 2034 (12.1) | 747 ( 8.3) | |
| X.2 | 2925 (17.4) | 2325 (25.9) | |
| X.3 | 6541 (39.0) | 3691 (41.1) | |
| X.4 | 1885 (11.2) | 897 (10.0) | |
| X.5 | 898 ( 5.3) | 456 ( 5.1) | |
| Poverty.Ratio..mean..SD.. | 2.63 (1.65) | 2.55 (1.61) | 0.044 |
| BMI..mean..SD.. | 28.35 (7.01) | 31.72 (7.59) | 0.461 |
| Waist..mean..SD.. | 95.82 (16.67) | 106.67 (16.37) | 0.657 |
| Diabetes…. | 322 ( 1.9) | 410 ( 4.6) | 0.630 |
| X.6 | 11 ( 0.1) | 3 ( 0.0) | |
| X.7 | 15382 (91.6) | 6068 (67.6) | |
| X.8 | 1076 ( 6.4) | 2494 (27.8) | |
| Kidney.Disease…. | 16 ( 0.1) | 23 ( 0.3) | 0.311 |
| X.9 | 14339 (98.4) | 8150 (91.7) | |
| X.10 | 224 ( 1.5) | 717 ( 8.1) | |
| Smoking…. | 7 ( 0.0) | 7 ( 0.1) | 0.255 |
| X.11 | 10019 (64.2) | 4621 (51.7) | |
| X.12 | 2 ( 0.0) | 4 ( 0.0) | |
| X.13 | 5579 (35.7) | 4302 (48.2) | |
| Cholesterol..mean..SD.. | 4.79 (1.06) | 4.77 (1.11) | 0.022 |
| HDLC..mean..SD.. | 54.16 (15.46) | 52.83 (16.23) | 0.084 |
| HbA1c..mean..SD.. | 5.58 (0.93) | 6.17 (1.27) | 0.525 |