#install.packages("conflicted")
#install.packages("dplyr")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("PULL DE BASE DE DATOS PARA ANALISIS DESCRIPTIVO.xlsx")
## New names:
## • `` -> `...1`
knitr::opts_knit$set(root.dir = "/Users/lorandacalderonzamora/Downloads")
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "...1" "ID_GROUP" "ID_SAMPLE"
## [4] "SEX" "AGE" "WEIGHT"
## [7] "Waist_circunference" "waist_hip_ratio" "IMC"
## [10] "BODY_FAT" "MUSCLE_MASS" "VISCERAL_FAT"
## [13] "GLUCOSE" "UREA" "CREATININE"
## [16] "CHOLESTEROL" "TRIGLUCERIDES" "HDL"
## [19] "VLDL" "LDL" "HBA1c"
## [22] "EGFR"
data <- as.data.frame(data)
data <- data %>%
mutate(
Groups = dplyr::recode(ID_GROUP, `0` = "CONTROL", `1` = "T2DM", `2` = "DN" ),
Sex = dplyr::recode(SEX, `0` = "Male", `1` = "Female"),
Age = as.numeric(AGE), # Crear o convertir Age a numérico
BMI = as.numeric(IMC),
Glucose = as.numeric(GLUCOSE),
Triglicerides = as.numeric(TRIGLUCERIDES),
HBA1c = as.numeric(HBA1c),
Urea = as.numeric(UREA),
HDL = as.numeric(HDL),
eGFR = as.numeric(EGFR),
WC = as.numeric(Waist_circunference),
WHR = as.numeric(waist_hip_ratio),
Muscle_mass = as.numeric(MUSCLE_MASS),
Creatinine = as.numeric(CREATININE),
VLDL = as.numeric(VLDL),
Visceral_fat = as.numeric(VISCERAL_FAT),
Cholesterol = as.numeric(CHOLESTEROL),
LDL = as.numeric(LDL)
)
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `WC = as.numeric(Waist_circunference)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
class(data)
## [1] "data.frame"
view(data)
str(data)
## 'data.frame': 170 obs. of 36 variables:
## $ ...1 : num 1 2 3 4 5 6 7 8 9 10 ...
## $ ID_GROUP : chr "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
## $ ID_SAMPLE : chr "AJ204" "AJ210" "AJ215" "AJ216" ...
## $ SEX : chr "F" "F" "F" "F" ...
## $ AGE : num 59 53 57 58 34 49 33 33 44 37 ...
## $ WEIGHT : chr "50.8" "48.1" "56.8" "57.9" ...
## $ Waist_circunference: chr "75" "69" "79" "80.5" ...
## $ waist_hip_ratio : chr "0.81967213114754101" "0.7752808988764045" "0.86813186813186816" "0.82818930041152261" ...
## $ IMC : chr "20.6" "20.8" "26.3" "24.2" ...
## $ BODY_FAT : chr "35.4" "29.3" "35.9" "34.5" ...
## $ MUSCLE_MASS : chr "31.1" "32.200000000000003" "34.5" "35.700000000000003" ...
## $ VISCERAL_FAT : chr "6.5" "5" "8" "7.5" ...
## $ GLUCOSE : chr "92" "84" "87" "82" ...
## $ UREA : chr "31.3" "34.799999999999997" "40.5" "31" ...
## $ CREATININE : chr "0.7" "0.9" "0.6" "0.5" ...
## $ CHOLESTEROL : chr "175" "191" "209" "237" ...
## $ TRIGLUCERIDES : chr "81" "99" "88" "117" ...
## $ HDL : num 74 66 53 39 44 63 61 64 47 61 ...
## $ VLDL : num 16.2 19.8 17.6 23.4 23 15.2 13 10.4 31.6 22 ...
## $ LDL : num 84.8 105.2 138.4 174.6 98 ...
## $ HBA1c : num 6.6 5.3 5.9 5.4 4.8 5.6 5.7 4.7 5.3 5.3 ...
## $ EGFR : num 101 73 102 107 111 ...
## $ Groups : chr "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
## $ Sex : chr "F" "F" "F" "F" ...
## $ Age : num 59 53 57 58 34 49 33 33 44 37 ...
## $ BMI : num 20.6 20.8 26.3 24.2 25.6 25.6 23.8 22.6 24.5 26.7 ...
## $ Glucose : num 92 84 87 82 82 90 84 82 86 87 ...
## $ Triglicerides : num 81 99 88 117 115 76 65 52 158 110 ...
## $ Urea : num 31.3 34.8 40.5 31 36.5 27.4 32.5 24.1 29.9 19.7 ...
## $ eGFR : num 101 73 102 107 111 ...
## $ WC : num 75 69 79 80.5 85.6 83 87.5 73.2 76 89 ...
## $ WHR : num 0.82 0.775 0.868 0.828 0.771 ...
## $ Muscle_mass : num 31.1 32.2 34.5 35.7 61.4 38.7 57.8 36.1 33.4 55 ...
## $ Creatinine : num 0.7 0.9 0.6 0.5 0.9 1 0.9 0.5 0.7 0.7 ...
## $ Visceral_fat : num 6.5 5 8 7.5 8.5 7 6.5 2.5 6 5 ...
## $ Cholesterol : num 175 191 209 237 165 267 155 153 179 221 ...
data <- as.data.frame(data)
selected_data <- data %>%
dplyr::select(Groups, Sex, Age, BMI, Glucose, Triglicerides, HBA1c, Urea, HDL, eGFR, WC, WHR, Muscle_mass, Creatinine,
VLDL, Visceral_fat, Cholesterol, LDL
)
summary_table <- selected_data %>%
tbl_summary(
by = Groups, # Agrupar por la variable 'Groups' creada en mutate
missing = "no",
type = list(
Age ~ "continuous",
Sex ~ "categorical",
BMI ~ "continuous",
Glucose ~ "continuous",
Triglicerides ~ "continuous",
HBA1c ~ "continuous",
Urea ~ "continuous",
HDL ~ "continuous",
eGFR ~ "continuous",
WC ~ "continuous",
WHR ~ "continuous",
Muscle_mass ~ "continuous",
Creatinine ~ "continuous",
VLDL ~ "continuous",
Visceral_fat ~ "continuous",
Cholesterol ~ "continuous",
LDL ~ "continuous"
),
statistic = list(
all_continuous() ~ "{median} ({p25} - {p75})",
all_categorical() ~ "{n} ({p}%)"
)
) %>%
add_overall() %>%
add_p(
test = list(
all_categorical() ~ "fisher.test",
all_continuous() ~ "kruskal.test"
),
test.args = list(
all_categorical() ~ list(simulate.p.value = TRUE)
)
) %>%
modify_header(label = "**Variable**") %>%
modify_caption("**Table: Clinical and biochemical characteristics by Groups**") %>%
as_gt() %>%
gt::tab_style(
style = gt::cell_text(weight = "bold"),
locations = gt::cells_column_labels()
)
summary_table
Variable | Overall N = 1701 |
CONTROL N = 571 |
DN N = 431 |
T2DM N = 701 |
p-value2 |
---|---|---|---|---|---|
Sex | 0.021 | ||||
    F | 115 (68%) | 44 (77%) | 22 (51%) | 49 (70%) | |
    M | 55 (32%) | 13 (23%) | 21 (49%) | 21 (30%) | |
Age | 60 (51 - 66) | 57 (39 - 65) | 65 (59 - 69) | 58 (52 - 65) | <0.001 |
BMI | 27.3 (24.8 - 31.9) | 24.5 (23.3 - 25.5) | 30.0 (26.5 - 32.9) | 30.6 (27.8 - 34.3) | <0.001 |
Glucose | 96 (83 - 139) | 83 (80 - 90) | 112 (95 - 165) | 109 (94 - 167) | <0.001 |
Triglicerides | 133 (87 - 177) | 88 (70 - 134) | 166 (124 - 237) | 142 (104 - 210) | <0.001 |
HBA1c | 6.00 (5.30 - 7.10) | 5.20 (4.90 - 5.80) | 6.31 (5.80 - 7.90) | 6.70 (5.70 - 8.21) | <0.001 |
Urea | 30 (24 - 37) | 30 (24 - 34) | 35 (27 - 58) | 28 (24 - 35) | 0.004 |
HDL | 50 (40 - 59) | 50 (45 - 61) | 53 (40 - 60) | 47 (37 - 55) | 0.033 |
eGFR | 90 (75 - 102) | 93 (81 - 104) | 70 (55 - 79) | 98 (90 - 105) | <0.001 |
WC | 91 (80 - 103) | 78 (73 - 85) | 96 (88 - 110) | 98 (90 - 108) | <0.001 |
WHR | 0.89 (0.82 - 0.96) | 0.81 (0.76 - 0.87) | 0.91 (0.88 - 0.99) | 0.92 (0.86 - 0.97) | <0.001 |
Muscle_mass | 43 (38 - 53) | 38 (36 - 47) | 47 (41 - 56) | 44 (40 - 54) | <0.001 |
Creatinine | 0.80 (0.70 - 0.91) | 0.75 (0.70 - 0.90) | 0.96 (0.83 - 1.34) | 0.70 (0.60 - 0.82) | <0.001 |
VLDL | 27 (18 - 37) | 18 (14 - 27) | 33 (25 - 49) | 28 (21 - 42) | <0.001 |
Visceral_fat | 10.5 (8.0 - 14.5) | 8.0 (6.0 - 9.0) | 14.0 (10.5 - 19.0) | 11.8 (9.5 - 14.8) | <0.001 |
Cholesterol | 191 (165 - 229) | 197 (173 - 233) | 203 (170 - 245) | 187 (152 - 222) | 0.14 |
LDL | 115 (83 - 146) | 123 (98 - 146) | 108 (77 - 158) | 111 (81 - 142) | 0.12 |
1 n (%); Median (Q1 - Q3) | |||||
2 Fisher’s Exact Test for Count Data with simulated p-value (based on 2000 replicates); Kruskal-Wallis rank sum test |
library(ggpubr)
library(cowplot)
library(ggplot2)
# Convertir las variables necesarias a numéricas
data <- data %>%
mutate(
Age = as.numeric(Age),
BMI = as.numeric(BMI),
Glucose = as.numeric(Glucose),
Triglicerides = as.numeric(Triglicerides),
HBA1c = as.numeric(HBA1c),
Urea = as.numeric(Urea),
HDL = as.numeric(HDL),
eGFR = as.numeric(eGFR),
WC = as.numeric(WC),
WHR = as.numeric(WHR),
Muscle_mass = as.numeric(Muscle_mass),
Creatinine = as.numeric(Creatinine),
VLDL = as.numeric(VLDL),
Visceral_fat = as.numeric(Visceral_fat),
Cholesterol = as.numeric(Cholesterol),
LDL = as.numeric(LDL)
)
# Colores personalizados para tres grupos
custom_colors <- c("CONTROL" = "#1f77b4", "T2DM" = "#ff7f0e", "DN" = "#2ca02c")
# Crear un tema personalizado
theme_custom <- theme_pubr() +
theme(
axis.title.x = element_blank(),
axis.text = element_text(size = 10),
legend.position = "none"
)
# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
ggplot(data, aes(x = Groups, y = !!sym(variable), fill = Groups)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
stat_compare_means(aes(label = ..p.signif..), method = "kruskal.test", label.x.npc = "center", label.y = y_lim - 0.1*y_lim) +
labs(title = var_label, y = y_label, fill = " ") +
scale_x_discrete(labels = c("CONTROL", "T2DM", "DN")) +
scale_fill_manual(values = custom_colors) +
ylim(0, y_lim) +
theme_custom
}
# Crear gráficos para cada variable
p1 <- create_boxplot(data, "Age", " ", "Age (years)", 100)
p2 <- create_boxplot(data, "BMI", "", "BMI (kg/m²)", 50)
p3 <- create_boxplot(data, "Glucose", "", "Glucose (mg/dL)", 400)
p4 <- create_boxplot(data, "Triglicerides", " ", "Triglycerides (mg/dL)", 300)
p5 <- create_boxplot(data, "HBA1c", " ", "HbA1c (%)", 20)
p6 <- create_boxplot(data, "Urea", "", "Urea (mg/dL)", 150)
p7 <- create_boxplot(data, "HDL", "", "HDL-C (mg/dL)", 100)
p8 <- create_boxplot(data, "eGFR", "", "eGFR (mL/min/1.73 m²)", 150)
p9 <- create_boxplot(data, "WHR", "", "WHR", 2.0)
p10 <- create_boxplot(data, "Muscle_mass", "", "Muscle Mass (kg)", 50)
p11 <- create_boxplot(data, "Creatinine", "", "Creatinine (mg/dL)", 5)
p12 <- create_boxplot(data, "VLDL", "", "VLDL-C (mg/dL)", 100)
p13 <- create_boxplot(data, "Visceral_fat", "", "Visceral Fat", 50)
p14 <- create_boxplot(data, "Cholesterol", "", "Cholesterol (mg/dL)", 400)
p15 <- create_boxplot(data, "LDL", "", "LDL-C (mg/dL)", 300)
p16 <- create_boxplot(data, "WC", "", "Waist circumference", 150)
# Mostrar ejemplos de gráficos
print(p1)
print(p2)
print(p3)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p4)
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p5)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p6)
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p7)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p8)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p9)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p10)
## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p11)
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p12)
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p13)
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p14)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p15)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p16)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).