#install.packages("conflicted")
#install.packages("dplyr")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("PULL DE BASE DE DATOS PARA ANALISIS DESCRIPTIVO.xlsx")
## New names:
## • `` -> `...1`
knitr::opts_knit$set(root.dir = "/Users/lorandacalderonzamora/Downloads")
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
##  [1] "...1"                "ID_GROUP"            "ID_SAMPLE"          
##  [4] "SEX"                 "AGE"                 "WEIGHT"             
##  [7] "Waist_circunference" "waist_hip_ratio"     "IMC"                
## [10] "BODY_FAT"            "MUSCLE_MASS"         "VISCERAL_FAT"       
## [13] "GLUCOSE"             "UREA"                "CREATININE"         
## [16] "CHOLESTEROL"         "TRIGLUCERIDES"       "HDL"                
## [19] "VLDL"                "LDL"                 "HBA1c"              
## [22] "EGFR"
data <- as.data.frame(data)

data <- data %>%
  mutate(
    Groups = dplyr::recode(ID_GROUP, `0` = "CONTROL", `1` = "T2DM", `2` = "DN" ),
    Sex = dplyr::recode(SEX, `0` = "Male", `1` = "Female"),
    Age = as.numeric(AGE),  # Crear o convertir Age a numérico
    BMI = as.numeric(IMC),
    Glucose = as.numeric(GLUCOSE),
    Triglicerides = as.numeric(TRIGLUCERIDES),
    HBA1c = as.numeric(HBA1c),
    Urea = as.numeric(UREA),
    HDL = as.numeric(HDL),
    eGFR = as.numeric(EGFR),
    WC = as.numeric(Waist_circunference),
    WHR = as.numeric(waist_hip_ratio),
    Muscle_mass = as.numeric(MUSCLE_MASS),
    Creatinine = as.numeric(CREATININE),
    VLDL = as.numeric(VLDL),
    Visceral_fat = as.numeric(VISCERAL_FAT),
    Cholesterol = as.numeric(CHOLESTEROL),
    LDL = as.numeric(LDL)
  )
## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `WC = as.numeric(Waist_circunference)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
class(data)
## [1] "data.frame"
view(data)
str(data)
## 'data.frame':    170 obs. of  36 variables:
##  $ ...1               : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ ID_GROUP           : chr  "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
##  $ ID_SAMPLE          : chr  "AJ204" "AJ210" "AJ215" "AJ216" ...
##  $ SEX                : chr  "F" "F" "F" "F" ...
##  $ AGE                : num  59 53 57 58 34 49 33 33 44 37 ...
##  $ WEIGHT             : chr  "50.8" "48.1" "56.8" "57.9" ...
##  $ Waist_circunference: chr  "75" "69" "79" "80.5" ...
##  $ waist_hip_ratio    : chr  "0.81967213114754101" "0.7752808988764045" "0.86813186813186816" "0.82818930041152261" ...
##  $ IMC                : chr  "20.6" "20.8" "26.3" "24.2" ...
##  $ BODY_FAT           : chr  "35.4" "29.3" "35.9" "34.5" ...
##  $ MUSCLE_MASS        : chr  "31.1" "32.200000000000003" "34.5" "35.700000000000003" ...
##  $ VISCERAL_FAT       : chr  "6.5" "5" "8" "7.5" ...
##  $ GLUCOSE            : chr  "92" "84" "87" "82" ...
##  $ UREA               : chr  "31.3" "34.799999999999997" "40.5" "31" ...
##  $ CREATININE         : chr  "0.7" "0.9" "0.6" "0.5" ...
##  $ CHOLESTEROL        : chr  "175" "191" "209" "237" ...
##  $ TRIGLUCERIDES      : chr  "81" "99" "88" "117" ...
##  $ HDL                : num  74 66 53 39 44 63 61 64 47 61 ...
##  $ VLDL               : num  16.2 19.8 17.6 23.4 23 15.2 13 10.4 31.6 22 ...
##  $ LDL                : num  84.8 105.2 138.4 174.6 98 ...
##  $ HBA1c              : num  6.6 5.3 5.9 5.4 4.8 5.6 5.7 4.7 5.3 5.3 ...
##  $ EGFR               : num  101 73 102 107 111 ...
##  $ Groups             : chr  "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
##  $ Sex                : chr  "F" "F" "F" "F" ...
##  $ Age                : num  59 53 57 58 34 49 33 33 44 37 ...
##  $ BMI                : num  20.6 20.8 26.3 24.2 25.6 25.6 23.8 22.6 24.5 26.7 ...
##  $ Glucose            : num  92 84 87 82 82 90 84 82 86 87 ...
##  $ Triglicerides      : num  81 99 88 117 115 76 65 52 158 110 ...
##  $ Urea               : num  31.3 34.8 40.5 31 36.5 27.4 32.5 24.1 29.9 19.7 ...
##  $ eGFR               : num  101 73 102 107 111 ...
##  $ WC                 : num  75 69 79 80.5 85.6 83 87.5 73.2 76 89 ...
##  $ WHR                : num  0.82 0.775 0.868 0.828 0.771 ...
##  $ Muscle_mass        : num  31.1 32.2 34.5 35.7 61.4 38.7 57.8 36.1 33.4 55 ...
##  $ Creatinine         : num  0.7 0.9 0.6 0.5 0.9 1 0.9 0.5 0.7 0.7 ...
##  $ Visceral_fat       : num  6.5 5 8 7.5 8.5 7 6.5 2.5 6 5 ...
##  $ Cholesterol        : num  175 191 209 237 165 267 155 153 179 221 ...
data <- as.data.frame(data)

selected_data <- data %>% 
  dplyr::select(Groups, Sex, Age, BMI, Glucose, Triglicerides, HBA1c, Urea, HDL, eGFR, WC, WHR, Muscle_mass, Creatinine, 
    VLDL, Visceral_fat, Cholesterol, LDL
  )
summary_table <- selected_data %>% 
  tbl_summary(
    by = Groups,  # Agrupar por la variable 'Groups' creada en mutate
    missing = "no",  
    type = list(
      Age ~ "continuous",        
      Sex ~ "categorical",       
      BMI ~ "continuous",
      Glucose ~ "continuous",
      Triglicerides ~ "continuous",
      HBA1c ~ "continuous",
      Urea ~ "continuous",
      HDL ~ "continuous",
      eGFR ~ "continuous",
      WC ~ "continuous",
      WHR ~ "continuous",
      Muscle_mass ~ "continuous",
      Creatinine ~ "continuous",
      VLDL ~ "continuous",
      Visceral_fat ~ "continuous",
      Cholesterol ~ "continuous",
      LDL ~ "continuous"
    ),
    statistic = list(   
      all_continuous() ~ "{median} ({p25} - {p75})",  
      all_categorical() ~ "{n} ({p}%)"  
    )
  ) %>%
  add_overall() %>%  
  add_p(
    test = list(
      all_categorical() ~ "fisher.test",  
      all_continuous() ~ "kruskal.test"
    ),
    test.args = list(
      all_categorical() ~ list(simulate.p.value = TRUE)
    )
  ) %>%
  modify_header(label = "**Variable**") %>%  
  modify_caption("**Table: Clinical and biochemical characteristics by Groups**") %>%
  as_gt() %>%  
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

summary_table
Table: Clinical and biochemical characteristics by Groups
Variable Overall
N = 170
1
CONTROL
N = 57
1
DN
N = 43
1
T2DM
N = 70
1
p-value2
Sex



0.021
    F 115 (68%) 44 (77%) 22 (51%) 49 (70%)
    M 55 (32%) 13 (23%) 21 (49%) 21 (30%)
Age 60 (51 - 66) 57 (39 - 65) 65 (59 - 69) 58 (52 - 65) <0.001
BMI 27.3 (24.8 - 31.9) 24.5 (23.3 - 25.5) 30.0 (26.5 - 32.9) 30.6 (27.8 - 34.3) <0.001
Glucose 96 (83 - 139) 83 (80 - 90) 112 (95 - 165) 109 (94 - 167) <0.001
Triglicerides 133 (87 - 177) 88 (70 - 134) 166 (124 - 237) 142 (104 - 210) <0.001
HBA1c 6.00 (5.30 - 7.10) 5.20 (4.90 - 5.80) 6.31 (5.80 - 7.90) 6.70 (5.70 - 8.21) <0.001
Urea 30 (24 - 37) 30 (24 - 34) 35 (27 - 58) 28 (24 - 35) 0.004
HDL 50 (40 - 59) 50 (45 - 61) 53 (40 - 60) 47 (37 - 55) 0.033
eGFR 90 (75 - 102) 93 (81 - 104) 70 (55 - 79) 98 (90 - 105) <0.001
WC 91 (80 - 103) 78 (73 - 85) 96 (88 - 110) 98 (90 - 108) <0.001
WHR 0.89 (0.82 - 0.96) 0.81 (0.76 - 0.87) 0.91 (0.88 - 0.99) 0.92 (0.86 - 0.97) <0.001
Muscle_mass 43 (38 - 53) 38 (36 - 47) 47 (41 - 56) 44 (40 - 54) <0.001
Creatinine 0.80 (0.70 - 0.91) 0.75 (0.70 - 0.90) 0.96 (0.83 - 1.34) 0.70 (0.60 - 0.82) <0.001
VLDL 27 (18 - 37) 18 (14 - 27) 33 (25 - 49) 28 (21 - 42) <0.001
Visceral_fat 10.5 (8.0 - 14.5) 8.0 (6.0 - 9.0) 14.0 (10.5 - 19.0) 11.8 (9.5 - 14.8) <0.001
Cholesterol 191 (165 - 229) 197 (173 - 233) 203 (170 - 245) 187 (152 - 222) 0.14
LDL 115 (83 - 146) 123 (98 - 146) 108 (77 - 158) 111 (81 - 142) 0.12
1 n (%); Median (Q1 - Q3)
2 Fisher’s Exact Test for Count Data with simulated p-value (based on 2000 replicates); Kruskal-Wallis rank sum test
library(ggpubr)
library(cowplot)
library(ggplot2)
# Convertir las variables necesarias a numéricas
data <- data %>% 
  mutate(
    Age = as.numeric(Age),
    BMI = as.numeric(BMI),
    Glucose = as.numeric(Glucose),
    Triglicerides = as.numeric(Triglicerides),
    HBA1c = as.numeric(HBA1c),
    Urea = as.numeric(Urea),
    HDL = as.numeric(HDL),
    eGFR = as.numeric(eGFR),
    WC = as.numeric(WC),
    WHR = as.numeric(WHR),
    Muscle_mass = as.numeric(Muscle_mass),
    Creatinine = as.numeric(Creatinine),
    VLDL = as.numeric(VLDL),
    Visceral_fat = as.numeric(Visceral_fat),
    Cholesterol = as.numeric(Cholesterol),
    LDL = as.numeric(LDL)
  )

# Colores personalizados para tres grupos
custom_colors <- c("CONTROL" = "#1f77b4", "T2DM" = "#ff7f0e", "DN" = "#2ca02c")

# Crear un tema personalizado
theme_custom <- theme_pubr() + 
  theme(
    axis.title.x = element_blank(),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Groups, y = !!sym(variable), fill = Groups)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "kruskal.test", label.x.npc = "center", label.y = y_lim - 0.1*y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("CONTROL", "T2DM", "DN")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +
    theme_custom
}

# Crear gráficos para cada variable
p1 <- create_boxplot(data, "Age", " ", "Age (years)", 100)
p2 <- create_boxplot(data, "BMI", "", "BMI (kg/m²)", 50)
p3 <- create_boxplot(data, "Glucose", "", "Glucose (mg/dL)", 400)
p4 <- create_boxplot(data, "Triglicerides", " ", "Triglycerides (mg/dL)", 300)
p5 <- create_boxplot(data, "HBA1c", " ", "HbA1c (%)", 20)
p6 <- create_boxplot(data, "Urea", "", "Urea (mg/dL)", 150)
p7 <- create_boxplot(data, "HDL", "", "HDL-C (mg/dL)", 100)
p8 <- create_boxplot(data, "eGFR", "", "eGFR (mL/min/1.73 m²)", 150)
p9 <- create_boxplot(data, "WHR", "", "WHR", 2.0)
p10 <- create_boxplot(data, "Muscle_mass", "", "Muscle Mass (kg)", 50)
p11 <- create_boxplot(data, "Creatinine", "", "Creatinine (mg/dL)", 5)
p12 <- create_boxplot(data, "VLDL", "", "VLDL-C (mg/dL)", 100)
p13 <- create_boxplot(data, "Visceral_fat", "", "Visceral Fat", 50)
p14 <- create_boxplot(data, "Cholesterol", "", "Cholesterol (mg/dL)", 400)
p15 <- create_boxplot(data, "LDL", "", "LDL-C (mg/dL)", 300)
p16 <- create_boxplot(data, "WC", "", "Waist circumference", 150)

# Mostrar ejemplos de gráficos
print(p1)

print(p2)

print(p3)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p4)
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p5)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p6)
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p8)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p9)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p10)
## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p11)
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p12)
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p13)
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p14)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p15)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p16)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).