IzamarAnalisys

#install.packages("conflicted")
#install.packages("dplyr")

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)

setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("PULL DE BASE DE DATOS PARA ANALISIS DESCRIPTIVO.xlsx")

## New names:
## • `` -> `...1`

knitr::opts_knit$set(root.dir = "/Users/lorandacalderonzamora/Downloads")
class(data)

## [1] "tbl_df"     "tbl"        "data.frame"

names(data)

##  [1] "...1"                "ID_GROUP"            "ID_SAMPLE"          
##  [4] "SEX"                 "AGE"                 "WEIGHT"             
##  [7] "Waist_circunference" "waist_hip_ratio"     "IMC"                
## [10] "BODY_FAT"            "MUSCLE_MASS"         "VISCERAL_FAT"       
## [13] "GLUCOSE"             "UREA"                "CREATININE"         
## [16] "CHOLESTEROL"         "TRIGLUCERIDES"       "HDL"                
## [19] "VLDL"                "LDL"                 "HBA1c"              
## [22] "EGFR"

data <- as.data.frame(data)

data <- data %>%
  mutate(
    Groups = dplyr::recode(ID_GROUP, `0` = "CONTROL", `1` = "T2DM", `2` = "DN" ),
    Sex = dplyr::recode(SEX, `0` = "Male", `1` = "Female"),
    Age = as.numeric(AGE),  # Crear o convertir Age a numérico
    BMI = as.numeric(IMC),
    Glucose = as.numeric(GLUCOSE),
    Triglicerides = as.numeric(TRIGLUCERIDES),
    HBA1c = as.numeric(HBA1c),
    Urea = as.numeric(UREA),
    HDL = as.numeric(HDL),
    eGFR = as.numeric(EGFR),
    WC = as.numeric(Waist_circunference),
    WHR = as.numeric(waist_hip_ratio),
    Muscle_mass = as.numeric(MUSCLE_MASS),
    Creatinine = as.numeric(CREATININE),
    VLDL = as.numeric(VLDL),
    Visceral_fat = as.numeric(VISCERAL_FAT),
    Cholesterol = as.numeric(CHOLESTEROL),
    LDL = as.numeric(LDL)
  )

## Warning: There were 5 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `WC = as.numeric(Waist_circunference)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.

class(data)

## [1] "data.frame"

view(data)
str(data)

## 'data.frame':    170 obs. of  36 variables:
##  $ ...1               : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ ID_GROUP           : chr  "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
##  $ ID_SAMPLE          : chr  "AJ204" "AJ210" "AJ215" "AJ216" ...
##  $ SEX                : chr  "F" "F" "F" "F" ...
##  $ AGE                : num  59 53 57 58 34 49 33 33 44 37 ...
##  $ WEIGHT             : chr  "50.8" "48.1" "56.8" "57.9" ...
##  $ Waist_circunference: chr  "75" "69" "79" "80.5" ...
##  $ waist_hip_ratio    : chr  "0.81967213114754101" "0.7752808988764045" "0.86813186813186816" "0.82818930041152261" ...
##  $ IMC                : chr  "20.6" "20.8" "26.3" "24.2" ...
##  $ BODY_FAT           : chr  "35.4" "29.3" "35.9" "34.5" ...
##  $ MUSCLE_MASS        : chr  "31.1" "32.200000000000003" "34.5" "35.700000000000003" ...
##  $ VISCERAL_FAT       : chr  "6.5" "5" "8" "7.5" ...
##  $ GLUCOSE            : chr  "92" "84" "87" "82" ...
##  $ UREA               : chr  "31.3" "34.799999999999997" "40.5" "31" ...
##  $ CREATININE         : chr  "0.7" "0.9" "0.6" "0.5" ...
##  $ CHOLESTEROL        : chr  "175" "191" "209" "237" ...
##  $ TRIGLUCERIDES      : chr  "81" "99" "88" "117" ...
##  $ HDL                : num  74 66 53 39 44 63 61 64 47 61 ...
##  $ VLDL               : num  16.2 19.8 17.6 23.4 23 15.2 13 10.4 31.6 22 ...
##  $ LDL                : num  84.8 105.2 138.4 174.6 98 ...
##  $ HBA1c              : num  6.6 5.3 5.9 5.4 4.8 5.6 5.7 4.7 5.3 5.3 ...
##  $ EGFR               : num  101 73 102 107 111 ...
##  $ Groups             : chr  "CONTROL" "CONTROL" "CONTROL" "CONTROL" ...
##  $ Sex                : chr  "F" "F" "F" "F" ...
##  $ Age                : num  59 53 57 58 34 49 33 33 44 37 ...
##  $ BMI                : num  20.6 20.8 26.3 24.2 25.6 25.6 23.8 22.6 24.5 26.7 ...
##  $ Glucose            : num  92 84 87 82 82 90 84 82 86 87 ...
##  $ Triglicerides      : num  81 99 88 117 115 76 65 52 158 110 ...
##  $ Urea               : num  31.3 34.8 40.5 31 36.5 27.4 32.5 24.1 29.9 19.7 ...
##  $ eGFR               : num  101 73 102 107 111 ...
##  $ WC                 : num  75 69 79 80.5 85.6 83 87.5 73.2 76 89 ...
##  $ WHR                : num  0.82 0.775 0.868 0.828 0.771 ...
##  $ Muscle_mass        : num  31.1 32.2 34.5 35.7 61.4 38.7 57.8 36.1 33.4 55 ...
##  $ Creatinine         : num  0.7 0.9 0.6 0.5 0.9 1 0.9 0.5 0.7 0.7 ...
##  $ Visceral_fat       : num  6.5 5 8 7.5 8.5 7 6.5 2.5 6 5 ...
##  $ Cholesterol        : num  175 191 209 237 165 267 155 153 179 221 ...

data <- as.data.frame(data)

selected_data <- data %>% 
  dplyr::select(Groups, Sex, Age, BMI, Glucose, Triglicerides, HBA1c, Urea, HDL, eGFR, WC, WHR, Muscle_mass, Creatinine, 
    VLDL, Visceral_fat, Cholesterol, LDL
  )

summary_table <- selected_data %>% 
  tbl_summary(
    by = Groups,  # Agrupar por la variable 'Groups' creada en mutate
    missing = "no",  
    type = list(
      Age ~ "continuous",        
      Sex ~ "categorical",       
      BMI ~ "continuous",
      Glucose ~ "continuous",
      Triglicerides ~ "continuous",
      HBA1c ~ "continuous",
      Urea ~ "continuous",
      HDL ~ "continuous",
      eGFR ~ "continuous",
      WC ~ "continuous",
      WHR ~ "continuous",
      Muscle_mass ~ "continuous",
      Creatinine ~ "continuous",
      VLDL ~ "continuous",
      Visceral_fat ~ "continuous",
      Cholesterol ~ "continuous",
      LDL ~ "continuous"
    ),
    statistic = list(   
      all_continuous() ~ "{median} ({p25} - {p75})",  
      all_categorical() ~ "{n} ({p}%)"  
    )
  ) %>%
  add_overall() %>%  
  add_p(
    test = list(
      all_categorical() ~ "fisher.test",  
      all_continuous() ~ "kruskal.test"
    ),
    test.args = list(
      all_categorical() ~ list(simulate.p.value = TRUE)
    )
  ) %>%
  modify_header(label = "**Variable**") %>%  
  modify_caption("**Table: Clinical and biochemical characteristics by Groups**") %>%
  as_gt() %>%  
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

summary_table

**Table: Clinical and biochemical characteristics by Groups**
Variable	Overall N = 170¹	CONTROL N = 57¹	DN N = 43¹	T2DM N = 70¹	p-value²
Sex					0.021
F	115 (68%)	44 (77%)	22 (51%)	49 (70%)
M	55 (32%)	13 (23%)	21 (49%)	21 (30%)
Age	60 (51 - 66)	57 (39 - 65)	65 (59 - 69)	58 (52 - 65)	<0.001
BMI	27.3 (24.8 - 31.9)	24.5 (23.3 - 25.5)	30.0 (26.5 - 32.9)	30.6 (27.8 - 34.3)	<0.001
Glucose	96 (83 - 139)	83 (80 - 90)	112 (95 - 165)	109 (94 - 167)	<0.001
Triglicerides	133 (87 - 177)	88 (70 - 134)	166 (124 - 237)	142 (104 - 210)	<0.001
HBA1c	6.00 (5.30 - 7.10)	5.20 (4.90 - 5.80)	6.31 (5.80 - 7.90)	6.70 (5.70 - 8.21)	<0.001
Urea	30 (24 - 37)	30 (24 - 34)	35 (27 - 58)	28 (24 - 35)	0.004
HDL	50 (40 - 59)	50 (45 - 61)	53 (40 - 60)	47 (37 - 55)	0.033
eGFR	90 (75 - 102)	93 (81 - 104)	70 (55 - 79)	98 (90 - 105)	<0.001
WC	91 (80 - 103)	78 (73 - 85)	96 (88 - 110)	98 (90 - 108)	<0.001
WHR	0.89 (0.82 - 0.96)	0.81 (0.76 - 0.87)	0.91 (0.88 - 0.99)	0.92 (0.86 - 0.97)	<0.001
Muscle_mass	43 (38 - 53)	38 (36 - 47)	47 (41 - 56)	44 (40 - 54)	<0.001
Creatinine	0.80 (0.70 - 0.91)	0.75 (0.70 - 0.90)	0.96 (0.83 - 1.34)	0.70 (0.60 - 0.82)	<0.001
VLDL	27 (18 - 37)	18 (14 - 27)	33 (25 - 49)	28 (21 - 42)	<0.001
Visceral_fat	10.5 (8.0 - 14.5)	8.0 (6.0 - 9.0)	14.0 (10.5 - 19.0)	11.8 (9.5 - 14.8)	<0.001
Cholesterol	191 (165 - 229)	197 (173 - 233)	203 (170 - 245)	187 (152 - 222)	0.14
LDL	115 (83 - 146)	123 (98 - 146)	108 (77 - 158)	111 (81 - 142)	0.12
¹ n (%); Median (Q1 - Q3)
² Fisher’s Exact Test for Count Data with simulated p-value (based on 2000 replicates); Kruskal-Wallis rank sum test

library(ggpubr)
library(cowplot)
library(ggplot2)

# Convertir las variables necesarias a numéricas
data <- data %>% 
  mutate(
    Age = as.numeric(Age),
    BMI = as.numeric(BMI),
    Glucose = as.numeric(Glucose),
    Triglicerides = as.numeric(Triglicerides),
    HBA1c = as.numeric(HBA1c),
    Urea = as.numeric(Urea),
    HDL = as.numeric(HDL),
    eGFR = as.numeric(eGFR),
    WC = as.numeric(WC),
    WHR = as.numeric(WHR),
    Muscle_mass = as.numeric(Muscle_mass),
    Creatinine = as.numeric(Creatinine),
    VLDL = as.numeric(VLDL),
    Visceral_fat = as.numeric(Visceral_fat),
    Cholesterol = as.numeric(Cholesterol),
    LDL = as.numeric(LDL)
  )

# Colores personalizados para tres grupos
custom_colors <- c("CONTROL" = "#1f77b4", "T2DM" = "#ff7f0e", "DN" = "#2ca02c")

# Crear un tema personalizado
theme_custom <- theme_pubr() + 
  theme(
    axis.title.x = element_blank(),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Groups, y = !!sym(variable), fill = Groups)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "kruskal.test", label.x.npc = "center", label.y = y_lim - 0.1*y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("CONTROL", "T2DM", "DN")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +
    theme_custom
}

# Crear gráficos para cada variable
p1 <- create_boxplot(data, "Age", " ", "Age (years)", 100)
p2 <- create_boxplot(data, "BMI", "", "BMI (kg/m²)", 50)
p3 <- create_boxplot(data, "Glucose", "", "Glucose (mg/dL)", 400)
p4 <- create_boxplot(data, "Triglicerides", " ", "Triglycerides (mg/dL)", 300)
p5 <- create_boxplot(data, "HBA1c", " ", "HbA1c (%)", 20)
p6 <- create_boxplot(data, "Urea", "", "Urea (mg/dL)", 150)
p7 <- create_boxplot(data, "HDL", "", "HDL-C (mg/dL)", 100)
p8 <- create_boxplot(data, "eGFR", "", "eGFR (mL/min/1.73 m²)", 150)
p9 <- create_boxplot(data, "WHR", "", "WHR", 2.0)
p10 <- create_boxplot(data, "Muscle_mass", "", "Muscle Mass (kg)", 50)
p11 <- create_boxplot(data, "Creatinine", "", "Creatinine (mg/dL)", 5)
p12 <- create_boxplot(data, "VLDL", "", "VLDL-C (mg/dL)", 100)
p13 <- create_boxplot(data, "Visceral_fat", "", "Visceral Fat", 50)
p14 <- create_boxplot(data, "Cholesterol", "", "Cholesterol (mg/dL)", 400)
p15 <- create_boxplot(data, "LDL", "", "LDL-C (mg/dL)", 300)
p16 <- create_boxplot(data, "WC", "", "Waist circumference", 150)

# Mostrar ejemplos de gráficos
print(p1)

print(p2)

print(p3)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p4)

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p5)

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p6)

## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p8)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p9)

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p10)

## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 66 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p11)

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p12)

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p13)

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p14)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p15)

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p16)

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

IzamarAnalisys

Loranda_Calderon

2024-09-19