Demographic table

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)
library(gt)

# Load your merged dataset

merged_data <- read.csv("merged2_data.csv")

# View the first few rows

head(merged_data)

##   Sequence.no Blood.Pressure HsCRP Age Gender               Race Poverty.Ratio
## 1      130378            Yes  1.78  43   Male Non-Hispanic Asian          5.00
## 2      130379            Yes  2.03  66   Male Non-Hispanic White          5.00
## 3      130380             No  5.62  44 Female     Other Hispanic          1.41
## 4      130386             No  1.05  34   Male   Mexican American          1.33
## 5      130387            Yes  3.96  68 Female Non-Hispanic White          1.32
## 6      130390             No 11.20  31 Female Non-Hispanic White          2.16
##    BMI Waist Diabetes Kidney.Disease Smoking Cholesterol HDLC HbA1c
## 1 27.0  98.3       No             No     Yes        6.83   45   5.6
## 2 33.5 114.7       No             No     Yes        5.53   60   5.6
## 3 29.7  93.5      Yes             No      No        4.84   49   6.2
## 4 30.2 106.1       No             No     Yes        4.73   46   5.1
## 5 42.6 122.0       No             No      No        5.25   42   5.9
## 6 46.0 131.0       No             No      No        4.11   39   5.9

# Convert variables into factors where needed

merged_data <- merged_data %>%
mutate(
Blood.Pressure = factor(Blood.Pressure, levels = c("No", "Yes")),
Gender = factor(Gender),
Race = factor(Race),
Diabetes = factor(Diabetes),
Kidney.Disease = factor(Kidney.Disease),
Smoking = factor(Smoking)
)

# Create a beautiful demographic summary table
demographic_table <- merged_data %>%
  select(
    Blood.Pressure, HsCRP, Age, Gender, Race, Poverty.Ratio, BMI, Waist,
    Diabetes, Kidney.Disease, Smoking, Cholesterol, HDLC, HbA1c
  ) %>%
  tbl_summary(
    by = Blood.Pressure,
    statistic = list(
      all_continuous() ~ "{mean} ± {sd}",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    label = list(
      HsCRP ~ "HsCRP (mg/L)",
      Age ~ "Age (years)",
      Gender ~ "Gender",
      Race ~ "Race/Ethnicity",
      Poverty.Ratio ~ "Poverty Ratio",
      BMI ~ "Body Mass Index",
      Waist ~ "Waist (cm)",
      Diabetes ~ "Diabetes Status",
      Kidney.Disease ~ "Kidney Disease",
      Smoking ~ "Smoking Status",
      Cholesterol ~ "Cholesterol (mmol/L)",
      HDLC ~ "HDL-C (mg/dL)",
      HbA1c ~ "HbA1c (%)"
    )
  ) %>%
  add_p(
    test = list(
      all_continuous() ~ "t.test",
      all_categorical() ~ "chisq.test"
    ),
    pvalue_fun = function(x) style_pvalue(x, digits = 3)
  ) %>%
  modify_header(label ~ "**Variables**") %>%
  modify_caption("**Table 1. Baseline Demographic and Clinical Characteristics by Hypertension Status**") %>%
  bold_labels()

## 31 missing rows in the "Blood.Pressure" column have been removed.
## The following warnings were returned during `modify_caption()`:
## ! For variable `Diabetes` (`Blood.Pressure`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect
## ! For variable `Smoking` (`Blood.Pressure`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect

# Show the table in HTML format
demographic_table %>%
  as_gt() %>%
  gt::opt_table_font(font = list(gt::google_font("Poppins"), gt::default_fonts()))

**Table 1. Baseline Demographic and Clinical Characteristics by Hypertension Status**
Variables	No N = 16,791¹	Yes N = 8,975¹	p-value²
HsCRP (mg/L)	3.36 ± 6.39	5.19 ± 10.50	<0.001
Age (years)	42.07 ± 18.46	60.36 ± 14.68	<0.001
Gender			0.029
Female	8,878 (53%)	4,617 (51%)
Male	7,913 (47%)	4,358 (49%)
Race/Ethnicity			<0.001
Mexican American	2,508 (15%)	859 (9.6%)
Non-Hispanic Asian	2,034 (12%)	747 (8.3%)
Non-Hispanic Black	2,925 (17%)	2,325 (26%)
Non-Hispanic White	6,541 (39%)	3,691 (41%)
Other Hispanic	1,885 (11%)	897 (10.0%)
Other Race - Including Multi-Racial	898 (5.3%)	456 (5.1%)
Poverty Ratio	2.63 ± 1.65	2.55 ± 1.61	0.002
Unknown	2,049	1,161
Body Mass Index	28.35 ± 7.01	31.72 ± 7.59	<0.001
Unknown	186	183
Waist (cm)	95.82 ± 16.67	106.67 ± 16.37	<0.001
Unknown	683	561
Diabetes Status			<0.001
Borderline	322 (1.9%)	410 (4.6%)
Don't know	11 (<0.1%)	3 (<0.1%)
No	15,382 (92%)	6,068 (68%)
Yes	1,076 (6.4%)	2,494 (28%)
Kidney Disease			<0.001
Don't know	16 (0.1%)	23 (0.3%)
No	14,339 (98%)	8,150 (92%)
Yes	224 (1.5%)	717 (8.1%)
Unknown	2,212	85
Smoking Status			<0.001
Don't know	7 (<0.1%)	7 (<0.1%)
No	10,019 (64%)	4,621 (52%)
Refused	2 (<0.1%)	4 (<0.1%)
Yes	5,579 (36%)	4,302 (48%)
Unknown	1,184	41
Cholesterol (mmol/L)	4.79 ± 1.06	4.77 ± 1.11	0.093
Unknown	73	42
HDL-C (mg/dL)	54.16 ± 15.46	52.83 ± 16.23	<0.001
Unknown	73	42
HbA1c (%)	5.58 ± 0.93	6.17 ± 1.27	<0.001
Unknown	14	9
¹ Mean ± SD; n (%)
² Welch Two Sample t-test; Pearson’s Chi-squared test

Demographic table

Sadek Ahmed

2025-10-17 21:51:51.384589