library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
library(gt)
# Load your merged dataset

merged_data <- read.csv("merged2_data.csv")

# View the first few rows

head(merged_data)
##   Sequence.no Blood.Pressure HsCRP Age Gender               Race Poverty.Ratio
## 1      130378            Yes  1.78  43   Male Non-Hispanic Asian          5.00
## 2      130379            Yes  2.03  66   Male Non-Hispanic White          5.00
## 3      130380             No  5.62  44 Female     Other Hispanic          1.41
## 4      130386             No  1.05  34   Male   Mexican American          1.33
## 5      130387            Yes  3.96  68 Female Non-Hispanic White          1.32
## 6      130390             No 11.20  31 Female Non-Hispanic White          2.16
##    BMI Waist Diabetes Kidney.Disease Smoking Cholesterol HDLC HbA1c
## 1 27.0  98.3       No             No     Yes        6.83   45   5.6
## 2 33.5 114.7       No             No     Yes        5.53   60   5.6
## 3 29.7  93.5      Yes             No      No        4.84   49   6.2
## 4 30.2 106.1       No             No     Yes        4.73   46   5.1
## 5 42.6 122.0       No             No      No        5.25   42   5.9
## 6 46.0 131.0       No             No      No        4.11   39   5.9
# Convert variables into factors where needed

merged_data <- merged_data %>%
mutate(
Blood.Pressure = factor(Blood.Pressure, levels = c("No", "Yes")),
Gender = factor(Gender),
Race = factor(Race),
Diabetes = factor(Diabetes),
Kidney.Disease = factor(Kidney.Disease),
Smoking = factor(Smoking)
)
# Create a beautiful demographic summary table
demographic_table <- merged_data %>%
  select(
    Blood.Pressure, HsCRP, Age, Gender, Race, Poverty.Ratio, BMI, Waist,
    Diabetes, Kidney.Disease, Smoking, Cholesterol, HDLC, HbA1c
  ) %>%
  tbl_summary(
    by = Blood.Pressure,
    statistic = list(
      all_continuous() ~ "{mean} ± {sd}",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      HsCRP ~ "HsCRP (mg/L)",
      Age ~ "Age (years)",
      Gender ~ "Gender",
      Race ~ "Race/Ethnicity",
      Poverty.Ratio ~ "Poverty Ratio",
      BMI ~ "Body Mass Index",
      Waist ~ "Waist (cm)",
      Diabetes ~ "Diabetes Status",
      Kidney.Disease ~ "Kidney Disease",
      Smoking ~ "Smoking Status",
      Cholesterol ~ "Cholesterol (mmol/L)",
      HDLC ~ "HDL-C (mg/dL)",
      HbA1c ~ "HbA1c (%)"
    )
  ) %>%
  add_p(
    test = list(
      all_continuous() ~ "t.test",
      all_categorical() ~ "chisq.test"
    ),
    pvalue_fun = function(x) style_pvalue(x, digits = 3)
  ) %>%
  modify_header(label ~ "**Variables**") %>%
  modify_caption("**Table 1. Baseline Demographic and Clinical Characteristics by Hypertension Status**") %>%
  bold_labels()
## 31 missing rows in the "Blood.Pressure" column have been removed.
## The following warnings were returned during `modify_caption()`:
## ! For variable `Diabetes` (`Blood.Pressure`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect
## ! For variable `Smoking` (`Blood.Pressure`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect
# Show the table in HTML format
demographic_table %>%
  as_gt() %>%
  gt::opt_table_font(font = list(gt::google_font("Poppins"), gt::default_fonts()))
Table 1. Baseline Demographic and Clinical Characteristics by Hypertension Status
Variables No
N = 16,791
1
Yes
N = 8,975
1
p-value2
HsCRP (mg/L) 3.36 ± 6.39 5.19 ± 10.50 <0.001
Age (years) 42.07 ± 18.46 60.36 ± 14.68 <0.001
Gender

0.029
    Female 8,878 (53%) 4,617 (51%)
    Male 7,913 (47%) 4,358 (49%)
Race/Ethnicity

<0.001
    Mexican American 2,508 (15%) 859 (9.6%)
    Non-Hispanic Asian 2,034 (12%) 747 (8.3%)
    Non-Hispanic Black 2,925 (17%) 2,325 (26%)
    Non-Hispanic White 6,541 (39%) 3,691 (41%)
    Other Hispanic 1,885 (11%) 897 (10.0%)
    Other Race - Including Multi-Racial 898 (5.3%) 456 (5.1%)
Poverty Ratio 2.63 ± 1.65 2.55 ± 1.61 0.002
    Unknown 2,049 1,161
Body Mass Index 28.35 ± 7.01 31.72 ± 7.59 <0.001
    Unknown 186 183
Waist (cm) 95.82 ± 16.67 106.67 ± 16.37 <0.001
    Unknown 683 561
Diabetes Status

<0.001
    Borderline 322 (1.9%) 410 (4.6%)
    Don't know 11 (<0.1%) 3 (<0.1%)
    No 15,382 (92%) 6,068 (68%)
    Yes 1,076 (6.4%) 2,494 (28%)
Kidney Disease

<0.001
    Don't know 16 (0.1%) 23 (0.3%)
    No 14,339 (98%) 8,150 (92%)
    Yes 224 (1.5%) 717 (8.1%)
    Unknown 2,212 85
Smoking Status

<0.001
    Don't know 7 (<0.1%) 7 (<0.1%)
    No 10,019 (64%) 4,621 (52%)
    Refused 2 (<0.1%) 4 (<0.1%)
    Yes 5,579 (36%) 4,302 (48%)
    Unknown 1,184 41
Cholesterol (mmol/L) 4.79 ± 1.06 4.77 ± 1.11 0.093
    Unknown 73 42
HDL-C (mg/dL) 54.16 ± 15.46 52.83 ± 16.23 <0.001
    Unknown 73 42
HbA1c (%) 5.58 ± 0.93 6.17 ± 1.27 <0.001
    Unknown 14 9
1 Mean ± SD; n (%)
2 Welch Two Sample t-test; Pearson’s Chi-squared test