## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load your merged dataset
merged_data <- read.csv("merged2_data.csv")
# View the first few rows
head(merged_data)
## Sequence.no Blood.Pressure HsCRP Age Gender Race Poverty.Ratio
## 1 130378 Yes 1.78 43 Male Non-Hispanic Asian 5.00
## 2 130379 Yes 2.03 66 Male Non-Hispanic White 5.00
## 3 130380 No 5.62 44 Female Other Hispanic 1.41
## 4 130386 No 1.05 34 Male Mexican American 1.33
## 5 130387 Yes 3.96 68 Female Non-Hispanic White 1.32
## 6 130390 No 11.20 31 Female Non-Hispanic White 2.16
## BMI Waist Diabetes Kidney.Disease Smoking Cholesterol HDLC HbA1c
## 1 27.0 98.3 No No Yes 6.83 45 5.6
## 2 33.5 114.7 No No Yes 5.53 60 5.6
## 3 29.7 93.5 Yes No No 4.84 49 6.2
## 4 30.2 106.1 No No Yes 4.73 46 5.1
## 5 42.6 122.0 No No No 5.25 42 5.9
## 6 46.0 131.0 No No No 4.11 39 5.9
# Convert variables into factors where needed
merged_data <- merged_data %>%
mutate(
Blood.Pressure = factor(Blood.Pressure, levels = c("No", "Yes")),
Gender = factor(Gender),
Race = factor(Race),
Diabetes = factor(Diabetes),
Kidney.Disease = factor(Kidney.Disease),
Smoking = factor(Smoking)
)
# Create a beautiful demographic summary table
demographic_table <- merged_data %>%
select(
Blood.Pressure, HsCRP, Age, Gender, Race, Poverty.Ratio, BMI, Waist,
Diabetes, Kidney.Disease, Smoking, Cholesterol, HDLC, HbA1c
) %>%
tbl_summary(
by = Blood.Pressure,
statistic = list(
all_continuous() ~ "{mean} ± {sd}",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 2,
label = list(
HsCRP ~ "HsCRP (mg/L)",
Age ~ "Age (years)",
Gender ~ "Gender",
Race ~ "Race/Ethnicity",
Poverty.Ratio ~ "Poverty Ratio",
BMI ~ "Body Mass Index",
Waist ~ "Waist (cm)",
Diabetes ~ "Diabetes Status",
Kidney.Disease ~ "Kidney Disease",
Smoking ~ "Smoking Status",
Cholesterol ~ "Cholesterol (mmol/L)",
HDLC ~ "HDL-C (mg/dL)",
HbA1c ~ "HbA1c (%)"
)
) %>%
add_p(
test = list(
all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"
),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) %>%
modify_header(label ~ "**Variables**") %>%
modify_caption("**Table 1. Baseline Demographic and Clinical Characteristics by Hypertension Status**") %>%
bold_labels()
## 31 missing rows in the "Blood.Pressure" column have been removed.
## The following warnings were returned during `modify_caption()`:
## ! For variable `Diabetes` (`Blood.Pressure`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
## ! For variable `Smoking` (`Blood.Pressure`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
# Show the table in HTML format
demographic_table %>%
as_gt() %>%
gt::opt_table_font(font = list(gt::google_font("Poppins"), gt::default_fonts()))
Variables | No N = 16,7911 |
Yes N = 8,9751 |
p-value2 |
---|---|---|---|
HsCRP (mg/L) | 3.36 ± 6.39 | 5.19 ± 10.50 | <0.001 |
Age (years) | 42.07 ± 18.46 | 60.36 ± 14.68 | <0.001 |
Gender | 0.029 | ||
Female | 8,878 (53%) | 4,617 (51%) | |
Male | 7,913 (47%) | 4,358 (49%) | |
Race/Ethnicity | <0.001 | ||
Mexican American | 2,508 (15%) | 859 (9.6%) | |
Non-Hispanic Asian | 2,034 (12%) | 747 (8.3%) | |
Non-Hispanic Black | 2,925 (17%) | 2,325 (26%) | |
Non-Hispanic White | 6,541 (39%) | 3,691 (41%) | |
Other Hispanic | 1,885 (11%) | 897 (10.0%) | |
Other Race - Including Multi-Racial | 898 (5.3%) | 456 (5.1%) | |
Poverty Ratio | 2.63 ± 1.65 | 2.55 ± 1.61 | 0.002 |
Unknown | 2,049 | 1,161 | |
Body Mass Index | 28.35 ± 7.01 | 31.72 ± 7.59 | <0.001 |
Unknown | 186 | 183 | |
Waist (cm) | 95.82 ± 16.67 | 106.67 ± 16.37 | <0.001 |
Unknown | 683 | 561 | |
Diabetes Status | <0.001 | ||
Borderline | 322 (1.9%) | 410 (4.6%) | |
Don't know | 11 (<0.1%) | 3 (<0.1%) | |
No | 15,382 (92%) | 6,068 (68%) | |
Yes | 1,076 (6.4%) | 2,494 (28%) | |
Kidney Disease | <0.001 | ||
Don't know | 16 (0.1%) | 23 (0.3%) | |
No | 14,339 (98%) | 8,150 (92%) | |
Yes | 224 (1.5%) | 717 (8.1%) | |
Unknown | 2,212 | 85 | |
Smoking Status | <0.001 | ||
Don't know | 7 (<0.1%) | 7 (<0.1%) | |
No | 10,019 (64%) | 4,621 (52%) | |
Refused | 2 (<0.1%) | 4 (<0.1%) | |
Yes | 5,579 (36%) | 4,302 (48%) | |
Unknown | 1,184 | 41 | |
Cholesterol (mmol/L) | 4.79 ± 1.06 | 4.77 ± 1.11 | 0.093 |
Unknown | 73 | 42 | |
HDL-C (mg/dL) | 54.16 ± 15.46 | 52.83 ± 16.23 | <0.001 |
Unknown | 73 | 42 | |
HbA1c (%) | 5.58 ± 0.93 | 6.17 ± 1.27 | <0.001 |
Unknown | 14 | 9 | |
1 Mean ± SD; n (%) | |||
2 Welch Two Sample t-test; Pearson’s Chi-squared test |