Analysis_KatiaAvina

#install.packages("conflicted")
#install.packages("dplyr")

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)

setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("Reorganized_BASE_miRNAs.xlsx")
class(data)

## [1] "tbl_df"     "tbl"        "data.frame"

names(data)

##  [1] "Patient_ID"                "miRNA_Group_4"            
##  [3] "miRNA_Group_2"             "DAI"                      
##  [5] "Leptin_ng_ml"              "Adiponectin"              
##  [7] "Adiponectin_Leptin_Index"  "Sex"                      
##  [9] "IR"                        "Age"                      
## [11] "Hypertension"              "Dyslipidemia"             
## [13] "Liver_Disease"             "Obesity"                  
## [15] "Ghrelin"                   "AIC_BI"                   
## [17] "AEC_BI"                    "ACT_BI"                   
## [19] "MLG_BI"                    "AEC_ACT_BI"               
## [21] "AEC_ACT_Torso_BI"          "Cell_Mass_BI"             
## [23] "Bone_Minerals_BI"          "AGV_BI"                   
## [25] "Basal_Metabolic_Rate_BI"   "AF50khz_BD_BI"            
## [27] "DII"                       "Total_Cholesterol"        
## [29] "Triglycerides"             "LDL_C"                    
## [31] "HDL_C"                     "HDL2"                     
## [33] "HDL3"                      "HDL2B"                    
## [35] "HDL2A"                     "HDL3A"                    
## [37] "HDL3B"                     "HDL3C"                    
## [39] "Apo_B"                     "Apo_A"                    
## [41] "Glucose"                   "Insulin_2"                
## [43] "HOMA_IR"                   "C_Peptide"                
## [45] "GIP"                       "GLP_1"                    
## [47] "Glucagon"                  "ALT"                      
## [49] "AST"                       "ALP"                      
## [51] "GGT"                       "Creatinine"               
## [53] "Uric_Acid"                 "CRP"                      
## [55] "IL_1B"                     "IL_6"                     
## [57] "IL_10"                     "TNF_a"                    
## [59] "MCP_1"                     "MCP_1_ng_ml"              
## [61] "PAI_1"                     "PAI_1_ng_ml"              
## [63] "Resistin"                  "Resistin_ng_ml"           
## [65] "Vaspin"                    "Alcohol"                  
## [67] "Smoking"                   "Sedentarism"              
## [69] "BMI"                       "WHR"                      
## [71] "Weight"                    "Height"                   
## [73] "Arm_Circumference_BI"      "Fat_BI"                   
## [75] "Lean_Mass_BI"              "Muscle_Mass_BI"           
## [77] "Fat_Percentage_BI"         "Lean_Mass_Percentage_BI"  
## [79] "Muscle_Mass_Percentage_BI" "hsa-miR-370-3p22"         
## [81] "hsa-miR-376c-3p20"         "hsa-miR-769-5p86"         
## [83] "hsa-miR-3913-5p87"         "hsa-miR-338-5p01"         
## [85] "hsa-miR-203a-3p64"         "hsa-miR-16-5p69"          
## [87] "hsa-miR-15a-5p68"          "hsa-let-7a-5p62"          
## [89] "hsa-miR-451047"

data <- as.data.frame(data)
data <- data %>%
  mutate(
    Insulin_resistance = dplyr::recode(miRNA_Group_2, `0` = "Insulin sensitivity", `1` = "Insulin resistance"),
    Sex = dplyr::recode(Sex, `0` = "Male", `1` = "Female"),
    Hypertension = dplyr::recode(Hypertension, `0` = "No", `1` = "Yes"),
    Dyslipidemia = dplyr::recode(Dyslipidemia, `0` = "No", `1` = "Yes"),
    Liver_Disease = dplyr::recode(Liver_Disease, `0` = "No", `1` = "Yes"),
    Obesity = dplyr::recode(Obesity, `0` = "No", `1` = "Yes"),
    Smoking = dplyr::recode(Smoking, 'no' = "Non-smoker", 'yes' = "Smoker"),
    Alcohol = dplyr::recode(Alcohol, 'no' = "Non-drinker", 'yes' = "Drinker"),
    Sedentarism = dplyr::recode(Sedentarism, `0` = "Active", `1` = "Sedentary")
  )

str(data)

## 'data.frame':    19 obs. of  90 variables:
##  $ Patient_ID               : chr  "137MP" "139MP" "140MP" "142MP" ...
##  $ miRNA_Group_4            : num  0 0 0 1 2 3 1 3 2 1 ...
##  $ miRNA_Group_2            : num  0 0 0 1 0 1 1 1 0 1 ...
##  $ DAI                      : num  0.88 2.51 0.52 0.8 0.82 2.47 1.52 1.31 1.77 1.12 ...
##  $ Leptin_ng_ml             : num  1.959 1.025 0.793 4.262 1.143 ...
##  $ Adiponectin              : num  10.07 5.48 22.84 8.55 18.58 ...
##  $ Adiponectin_Leptin_Index : num  5.14 5.35 28.79 2.01 16.26 ...
##  $ Sex                      : chr  "Male" "Male" "Male" "Male" ...
##  $ IR                       : num  0 0 0 1 0 1 1 1 0 1 ...
##  $ Age                      : num  61 75 74 59 65 70 66 73 60 55 ...
##  $ Hypertension             : chr  "No" "No" "Yes" "No" ...
##  $ Dyslipidemia             : chr  "No" "Yes" "No" "No" ...
##  $ Liver_Disease            : chr  "No" "No" "No" "No" ...
##  $ Obesity                  : chr  "No" "No" "No" "No" ...
##  $ Ghrelin                  : num  185 227 233 274 224 ...
##  $ AIC_BI                   : chr  "18.7" "18.600000000000001" "18.399999999999999" "14.2" ...
##  $ AEC_BI                   : chr  "12" "11.8" "12.4" "8.6999999999999993" ...
##  $ ACT_BI                   : chr  "30.7" "30.4" "30.8" "22.9" ...
##  $ MLG_BI                   : chr  "41.7" "41.5" "42" "31.3" ...
##  $ AEC_ACT_BI               : chr  "0.39" "0.38900000000000001" "0.40200000000000002" "0.38" ...
##  $ AEC_ACT_Torso_BI         : chr  "0.39" "0.39" "0.40200000000000002" "0.38100000000000001" ...
##  $ Cell_Mass_BI             : chr  "26.8" "26.6" "26.4" "20.3" ...
##  $ Bone_Minerals_BI         : chr  "2.4" "2.58" "2.63" "1.91" ...
##  $ AGV_BI                   : chr  "139.69999999999999" "121.1" "129.80000000000001" "110.4" ...
##  $ Basal_Metabolic_Rate_BI  : chr  "1271" "1267" "1276" "1047" ...
##  $ AF50khz_BD_BI            : chr  "5.0999999999999996" "4.8" "3.9" "4.7" ...
##  $ DII                      : chr  "-4.0599999999999996" "-1.87" "-3.06" "NA" ...
##  $ Total_Cholesterol        : num  217 201 197 198 166 ...
##  $ Triglycerides            : num  92.5 166.6 96 90 98.8 ...
##  $ LDL_C                    : num  149.7 138 101.7 129.7 89.2 ...
##  $ HDL_C                    : num  52.6 36 80.2 53.4 60.6 38 39.1 33.6 34.5 32.2 ...
##  $ HDL2                     : chr  "21.36" "17.77" "NA" "NA" ...
##  $ HDL3                     : chr  "78.64" "82.23" "NA" "NA" ...
##  $ HDL2B                    : chr  "5.54" "5.88" "NA" "NA" ...
##  $ HDL2A                    : chr  "15.82" "11.9" "NA" "NA" ...
##  $ HDL3A                    : chr  "21.9" "22.55" "NA" "NA" ...
##  $ HDL3B                    : chr  "23.62" "27.35" "NA" "NA" ...
##  $ HDL3C                    : chr  "33.11" "32.32" "NA" "NA" ...
##  $ Apo_B                    : num  120.9 126.8 89.1 110.3 81.3 ...
##  $ Apo_A                    : num  155 136 190 159 172 ...
##  $ Glucose                  : num  82 99.5 89.9 103.4 92.6 ...
##  $ Insulin_2                : num  7.45 7.22 3.93 8.28 6.59 ...
##  $ HOMA_IR                  : num  1.508 1.775 0.872 2.113 1.507 ...
##  $ C_Peptide                : num  2013 2049 1748 2456 2045 ...
##  $ GIP                      : num  47.3 91.4 119.2 1433.4 123.9 ...
##  $ GLP_1                    : num  442 410 376 312 348 ...
##  $ Glucagon                 : num  2901 3635 2962 2954 2715 ...
##  $ ALT                      : num  22.7 23.8 14.8 27.1 11.2 30.6 55.7 10.5 29 20.1 ...
##  $ AST                      : num  21 27 22.1 24.8 16 20.3 38.4 18.5 20.9 17.4 ...
##  $ ALP                      : num  116.6 98.1 84 131.8 69.1 ...
##  $ GGT                      : num  47.1 12.2 15.3 25.3 21.4 27.2 66.3 20.2 20.5 26.1 ...
##  $ Creatinine               : num  0.6 0.7 0.8 0.6 0.9 0.6 0.9 0.5 0.6 0.7 ...
##  $ Uric_Acid                : num  4.1 6 4 3.4 5.4 5 7.4 7 6 4.9 ...
##  $ CRP                      : num  0.77 1.55 2.04 2.11 0.56 6.06 0.32 6.36 0.96 7.25 ...
##  $ IL_1B                    : num  1.61 1.77 1.43 1.49 1.04 1.07 0.59 1.91 1.74 1.66 ...
##  $ IL_6                     : num  3.46 5.61 2.41 3.95 2.57 ...
##  $ IL_10                    : num  3.42 4.92 4.3 3.81 3.47 2.03 2.46 4.6 7.04 4.4 ...
##  $ TNF_a                    : num  157122 170708 243543 159033 70673 ...
##  $ MCP_1                    : num  14 11.9 13.1 14.2 17.1 ...
##  $ MCP_1_ng_ml              : num  0.014 0.0119 0.0131 0.0142 0.0171 ...
##  $ PAI_1                    : num  5036 7558 4570 4674 5689 ...
##  $ PAI_1_ng_ml              : num  5.04 7.56 4.57 4.67 5.69 ...
##  $ Resistin                 : num  4036 3162 8519 2553 5772 ...
##  $ Resistin_ng_ml           : num  4.04 3.16 8.52 2.55 5.77 ...
##  $ Vaspin                   : num  6830 9594 6454 27712 6085 ...
##  $ Alcohol                  : chr  "si" "si" "si" "si" ...
##  $ Smoking                  : chr  "Non-smoker" "Non-smoker" "Non-smoker" "Non-smoker" ...
##  $ Sedentarism              : chr  "Active" "Sedentary" "Sedentary" "Active" ...
##  $ BMI                      : num  24.8 23.4 23.6 24.5 25.8 ...
##  $ WHR                      : num  87.5 91 73 81.8 90 ...
##  $ Weight                   : num  67.2 63.5 64.9 51.4 65.3 79.2 66.5 62 67.5 55 ...
##  $ Height                   : num  165 165 166 145 159 ...
##  $ Arm_Circumference_BI     : chr  "30.9" "30.4" "29" "29.3" ...
##  $ Fat_BI                   : chr  "25.5" "22" "22.9" "20.100000000000001" ...
##  $ Lean_Mass_BI             : chr  "39.299999999999997" "38.9" "39.4" "29.4" ...
##  $ Muscle_Mass_BI           : chr  "22.4" "22.2" "22.1" "16.5" ...
##  $ Fat_Percentage_BI        : chr  "37.9" "34.6" "35.4" "39" ...
##  $ Lean_Mass_Percentage_BI  : chr  "58.482142857142797" "61.259842519685002" "60.708782742681002" "57.198443579766497" ...
##  $ Muscle_Mass_Percentage_BI: chr  "33.3333333333333" "34.960629921259802" "34.052388289676401" "32.101167315175097" ...
##  $ hsa-miR-370-3p22         : num  12.96 15.8 1.37 13.56 8.53 ...
##  $ hsa-miR-376c-3p20        : num  10.49 4.31 6.83 9.69 48.35 ...
##  $ hsa-miR-769-5p86         : num  1093 1107 760 2254 930 ...
##  $ hsa-miR-3913-5p87        : num  14.82 23.7 23.68 34.88 5.69 ...
##  $ hsa-miR-338-5p01         : num  6.79 3.59 2.73 7.75 11.38 ...
##  $ hsa-miR-203a-3p64        : num  16.1 207.5 27.3 73.6 11.4 ...
##  $ hsa-miR-16-5p69          : num  7572 6214 10803 5459 17800 ...
##  $ hsa-miR-15a-5p68         : num  1162 995 1144 690 1996 ...
##  $ hsa-let-7a-5p62          : num  45143 70585 34691 44336 49640 ...
##  $ hsa-miR-451047           : num  2.47 3.59 1.37 1.94 11.38 ...
##  $ Insulin_resistance       : chr  "Insulin sensitivity" "Insulin sensitivity" "Insulin sensitivity" "Insulin resistance" ...

#Convertir variables para que sean leidas correctamente 
data <- data %>%
  mutate(
    Glucose = as.numeric(as.character(Glucose)),
    Insulin_2 = as.numeric(as.character(Insulin_2)),
    HOMA_IR = as.numeric(as.character(HOMA_IR)),
    Glucagon = as.numeric(as.character(Glucagon)),
    GIP = as.numeric(as.character(GIP)),
    GLP_1 = as.numeric(as.character(GLP_1)),
    C_Peptide = as.numeric(as.character(C_Peptide)),
    Total_Cholesterol = as.numeric(as.character(Total_Cholesterol)),
    Triglycerides = as.numeric(as.character(Triglycerides)),
    HDL_C = as.numeric(as.character(HDL_C)),
    LDL_C = as.numeric(as.character(LDL_C)),
    Apo_A = as.numeric(as.character(Apo_A)),
    Apo_B = as.numeric(as.character(Apo_B)),
    Creatinine = as.numeric(as.character(Creatinine)),
    Uric_Acid = as.numeric(as.character(Uric_Acid)),
    ALT = as.numeric(as.character(ALT)),
    AST = as.numeric(as.character(AST)),
    GGT = as.numeric(as.character(GGT)),
    ALP = as.numeric(as.character(ALP)),
    Age = as.numeric(Age),
    BMI = as.numeric(BMI),
    Resistin = as.numeric(Resistin_ng_ml),
    `hsa-miR-370-3p22` = as.numeric(`hsa-miR-370-3p22`),
    `hsa-miR-376c-3p20` = as.numeric(`hsa-miR-376c-3p20`),
    `hsa-miR-769-5p86` = as.numeric(`hsa-miR-769-5p86`),
    `hsa-miR-3913-5p87` = as.numeric(`hsa-miR-3913-5p87`),
    `hsa-miR-338-5p01` = as.numeric(`hsa-miR-338-5p01`),
    `hsa-miR-203a-3p64` = as.numeric(`hsa-miR-203a-3p64`),
    `hsa-miR-16-5p69` = as.numeric(`hsa-miR-16-5p69`),
    `hsa-miR-15a-5p68` = as.numeric(`hsa-miR-15a-5p68`),
    `hsa-let-7a-5p62` = as.numeric(`hsa-let-7a-5p62`),
    `hsa-miR-451047` = as.numeric(`hsa-miR-451047`)
  )

# Seleccionar solo las variables relevantes para el análisis
selected_data <- data %>%
  select(
  Insulin_resistance, Age, Sex, Smoking, Alcohol, Sedentarism, BMI, 
     Hypertension, Dyslipidemia, Obesity 
  )

# Crear la tabla de resumen
summary_table <- selected_data %>%
  tbl_summary(
    by =   Insulin_resistance, 
    missing = "no", 
    type = list(
      Age ~ "continuous", 
      Sex ~ "categorical",
      Smoking ~ "categorical",
      Alcohol ~ "categorical",
      Sedentarism ~ "categorical",
      Hypertension ~ "categorical",
      Dyslipidemia ~ "categorical",
      Obesity ~ "categorical",
      BMI ~ "continuous"
    ), 
    statistic = list(
      Age ~ "{mean} ({sd})",  
      Sex ~ "{n} / {N} ({p}%)", 
      Smoking ~ "{n} / {N} ({p}%)", 
      Alcohol ~ "{n} / {N} ({p}%)", 
      Sedentarism ~ "{n} / {N} ({p}%)",
      Hypertension ~ "{n} / {N} ({p}%)",
      Dyslipidemia ~ "{n} / {N} ({p}%)",
      Obesity ~ "{n} / {N} ({p}%)",
      BMI ~ "{mean} ({sd})" 
    ) 
  ) %>%
  add_overall() %>% 
  add_p() %>% 
  modify_header(label = "**Variable**") %>% 
  modify_caption("**Table: Sociodemographic and lifestyle characteristics by insulin resistance status**") %>%
  as_gt() %>% 
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

## The following warnings were returned during `as_gt()`:
## ! For variable `Age` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `Age` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties

summary_table

**Table: Sociodemographic and lifestyle characteristics by insulin resistance status**
Variable	Overall N = 19¹	Insulin resistance N = 10¹	Insulin sensitivity N = 9¹	p-value²
Age	59 (10)	58 (9)	60 (10)	0.6
Sex				>0.9
Female	5 / 19 (26%)	3 / 10 (30%)	2 / 9 (22%)
Male	14 / 19 (74%)	7 / 10 (70%)	7 / 9 (78%)
Smoking				0.5
Non-smoker	17 / 19 (89%)	8 / 10 (80%)	9 / 9 (100%)
si	2 / 19 (11%)	2 / 10 (20%)	0 / 9 (0%)
Alcohol				0.6
Non-drinker	3 / 19 (16%)	1 / 10 (10%)	2 / 9 (22%)
si	16 / 19 (84%)	9 / 10 (90%)	7 / 9 (78%)
Sedentarism				>0.9
Active	9 / 19 (47%)	5 / 10 (50%)	4 / 9 (44%)
Sedentary	10 / 19 (53%)	5 / 10 (50%)	5 / 9 (56%)
BMI	26.9 (3.9)	28.6 (4.4)	25.1 (2.4)	0.079
Hypertension				>0.9
No	16 / 19 (84%)	8 / 10 (80%)	8 / 9 (89%)
Yes	3 / 19 (16%)	2 / 10 (20%)	1 / 9 (11%)
Dyslipidemia				0.4
No	10 / 19 (53%)	4 / 10 (40%)	6 / 9 (67%)
Yes	9 / 19 (47%)	6 / 10 (60%)	3 / 9 (33%)
Obesity				0.087
No	15 / 19 (79%)	6 / 10 (60%)	9 / 9 (100%)
Yes	4 / 19 (21%)	4 / 10 (40%)	0 / 9 (0%)
¹ Mean (SD); n / N (%)
² Wilcoxon rank sum test; Fisher’s exact test; Wilcoxon rank sum exact test

selected_data <- data %>%
  select(
  Insulin_resistance, Glucose, Insulin_2, HOMA_IR, Glucagon, GIP, GLP_1, C_Peptide, Total_Cholesterol, Triglycerides, HDL_C, LDL_C, Apo_A, Apo_B, Creatinine, Uric_Acid, ALT, AST, GGT, ALP, Leptin_ng_ml, Adiponectin, Adiponectin_Leptin_Index, Ghrelin
  )

summary_table <- selected_data %>%
  tbl_summary(
    by = Insulin_resistance, 
    missing = "no", 
    type = list(
      Glucose ~ "continuous", 
      Insulin_2 ~ "continuous",
      HOMA_IR ~ "continuous",
      Glucagon ~ "continuous",
      GIP ~ "continuous",
      GLP_1 ~ "continuous",
      C_Peptide ~ "continuous",
      Total_Cholesterol ~ "continuous",
      Triglycerides ~ "continuous",
      HDL_C ~ "continuous",
      LDL_C ~ "continuous",
      Apo_A ~ "continuous",
      Apo_B ~ "continuous",
      Leptin_ng_ml ~ "continuous", 
      Adiponectin ~ "continuous",
      Adiponectin_Leptin_Index ~ "continuous",
      Ghrelin ~ "continuous",
      Creatinine ~ "continuous",
      Uric_Acid ~ "continuous",
      ALT ~ "continuous",
      AST ~ "continuous",
      GGT ~ "continuous",
      ALP ~ "continuous"
    ), 
    statistic = list(
      Glucose ~ "{mean} ({sd})", 
      Insulin_2 ~ "{mean} ({sd})",
      HOMA_IR ~ "{mean} ({sd})", 
      Glucagon ~ "{mean} ({sd})", 
      GIP ~ "{mean} ({sd})", 
      GLP_1 ~ "{mean} ({sd})", 
      C_Peptide ~ "{mean} ({sd})", 
      Total_Cholesterol ~ "{mean} ({sd})", 
      Triglycerides ~ "{mean} ({sd})",
      HDL_C ~ "{mean} ({sd})",
      LDL_C ~ "{mean} ({sd})",
      Apo_A ~ "{mean} ({sd})",
      Apo_B ~ "{mean} ({sd})",
      Leptin_ng_ml ~ "{mean} ({sd})", 
      Adiponectin ~ "{mean} ({sd})",
      Adiponectin_Leptin_Index ~ "{mean} ({sd})", 
      Ghrelin ~ "{mean} ({sd})",
      Creatinine ~ "{mean} ({sd})",
      Uric_Acid ~ "{mean} ({sd})",
      ALT ~ "{mean} ({sd})",
      AST ~ "{mean} ({sd})",
      GGT ~ "{mean} ({sd})",
      ALP ~ "{mean} ({sd})"
    ) 
  ) %>%
  add_overall() %>% 
  add_p() %>% 
  modify_header(label = "**Variable**") %>% 
  modify_caption("**Table: Clinical characteristics by insulin resistance status**") %>%
  as_gt() %>% 
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

## The following warnings were returned during `as_gt()`:
## ! For variable `ALP` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `ALP` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties
## ! For variable `Creatinine` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `Creatinine` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties
## ! For variable `GLP_1` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `GLP_1` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties
## ! For variable `Uric_Acid` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `Uric_Acid` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties

summary_table

**Table: Clinical characteristics by insulin resistance status**
Variable	Overall N = 19¹	Insulin resistance N = 10¹	Insulin sensitivity N = 9¹	p-value²
Glucose	100 (9)	103 (9)	95 (8)	0.11
Insulin_2	10.4 (6.3)	13.9 (6.9)	6.5 (1.5)	<0.001
HOMA_IR	2.59 (1.72)	3.56 (1.91)	1.52 (0.35)	<0.001
Glucagon	3,054 (415)	3,128 (220)	2,972 (565)	0.4
GIP	218 (326)	259 (416)	173 (201)	0.5
GLP_1	374 (77)	391 (87)	356 (64)	0.6
C_Peptide	2,108 (245)	2,289 (163)	1,907 (135)	<0.001
Total_Cholesterol	183 (42)	189 (53)	177 (27)	0.7
Triglycerides	159 (80)	188 (93)	126 (49)	0.11
HDL_C	44 (13)	40 (9)	48 (16)	0.2
LDL_C	114 (35)	119 (42)	108 (26)	0.6
Apo_A	146 (26)	149 (26)	143 (27)	0.7
Apo_B	112 (29)	119 (35)	104 (19)	0.3
Creatinine	0.77 (0.20)	0.76 (0.17)	0.79 (0.24)	>0.9
Uric_Acid	5.51 (1.41)	5.64 (1.31)	5.36 (1.59)	0.8
ALT	28 (13)	31 (15)	23 (10)	0.3
AST	24 (8)	25 (10)	23 (4)	>0.9
GGT	35 (17)	39 (20)	31 (14)	0.5
ALP	95 (17)	97 (19)	93 (15)	0.7
Leptin_ng_ml	2.51 (1.66)	3.66 (1.44)	1.23 (0.61)	<0.001
Adiponectin	9.7 (4.5)	8.0 (2.1)	11.5 (5.7)	0.2
Adiponectin_Leptin_Index	7 (9)	2 (1)	13 (10)	<0.001
Ghrelin	277 (169)	258 (88)	297 (234)	>0.9
¹ Mean (SD)
² Wilcoxon rank sum exact test; Wilcoxon rank sum test

selected_data <- data %>%
  select(
    Insulin_resistance, CRP, IL_1B, IL_6, IL_10, TNF_a, PAI_1, MCP_1, Resistin, Vaspin
  )

summary_table <- selected_data %>%
  tbl_summary(
    by = Insulin_resistance, 
    missing = "no", 
    type = list(
      CRP ~ "continuous",
      IL_1B ~ "continuous",
      IL_6 ~ "continuous",
      IL_10 ~ "continuous",
      TNF_a ~ "continuous",
      PAI_1 ~ "continuous",
      MCP_1 ~ "continuous",
      Resistin ~ "continuous",
      Vaspin ~ "continuous"
    ), 
    statistic = list(
      CRP ~ "{mean} ({sd})", 
      IL_1B ~ "{mean} ({sd})", 
      IL_6 ~ "{mean} ({sd})", 
      IL_10 ~ "{mean} ({sd})", 
      TNF_a ~ "{mean} ({sd})",
      PAI_1 ~ "{mean} ({sd})",
      MCP_1 ~ "{mean} ({sd})",
      Resistin ~ "{mean} ({sd})",
      Vaspin ~ "{mean} ({sd})"
    ) 
  ) %>%
  add_overall() %>% 
  add_p() %>% 
  modify_header(label = "**Variable**") %>% 
  modify_caption("**Table: Inflammatory markers and immune function characteristics by insulin resistance status**") %>%
  as_gt() %>% 
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

## The following warnings were returned during `as_gt()`:
## ! For variable `IL_10` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `IL_10` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties
## ! For variable `IL_1B` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `IL_1B` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties
## ! For variable `IL_6` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `IL_6` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties

summary_table

**Table: Inflammatory markers and immune function characteristics by insulin resistance status**
Variable	Overall N = 19¹	Insulin resistance N = 10¹	Insulin sensitivity N = 9¹	p-value²
CRP	2.90 (2.64)	3.54 (2.73)	2.19 (2.49)	0.2
IL_1B	1.31 (0.38)	1.28 (0.42)	1.33 (0.35)	>0.9
IL_6	4.50 (3.09)	5.08 (3.70)	3.85 (2.28)	0.3
IL_10	3.61 (1.22)	3.27 (0.92)	3.98 (1.45)	0.3
TNF_a	126,513 (74,386)	125,203 (84,729)	127,968 (66,098)	0.7
PAI_1	7,385 (2,423)	8,639 (2,590)	5,991 (1,228)	0.028
MCP_1	15.3 (4.7)	17.9 (4.3)	12.4 (3.5)	0.013
Resistin	6.3 (4.2)	6.5 (4.3)	6.0 (4.4)	0.8
Vaspin	8,308 (5,761)	8,712 (6,770)	7,860 (4,761)	0.8
¹ Mean (SD)
² Wilcoxon rank sum exact test; Wilcoxon rank sum test

#install.packages(c("ggpubr", "cowplot"))

library(ggpubr)
library(cowplot)
library(ggplot2)

data <- data %>%
  mutate(
    Glucose = as.numeric(as.character(Glucose )),
    Insulin_2 = as.numeric(as.character(Insulin_2)),
    HOMA_IR = as.numeric(as.character(HOMA_IR)),
    Glucagon = as.numeric(as.character(Glucagon)),
    GIP = as.numeric(as.character(GIP)),
    GLP_1 = as.numeric(as.character(GLP_1)),
    C_Peptide = as.numeric(as.character(C_Peptide)),
    Total_Cholesterol = as.numeric(as.character(Total_Cholesterol)),
    Triglycerides = as.numeric(as.character(Triglycerides)),
    HDL_C = as.numeric(as.character(HDL_C)),
    LDL_C = as.numeric(as.character(LDL_C)),
    Apo_A = as.numeric(as.character(Apo_A)),
    Apo_B = as.numeric(as.character(Apo_B)),
    Creatinine = as.numeric(as.character(Creatinine)),
    Uric_Acid = as.numeric(as.character(Uric_Acid)),
    ALT = as.numeric(as.character(ALT)),
    AST = as.numeric(as.character(AST)),
    GGT = as.numeric(as.character(GGT)),
    ALP = as.numeric(as.character(ALP)),
    Age = as.numeric(Age),
    BMI = as.numeric(BMI)
  )
 custom_colors <- c("Insulin sensitivity" = "#1f77b4", "Insulin resistance" = "#ff7f0e")

theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), 
        axis.text = element_text(size = 10), 
        legend.position = "none")

# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Insulin_resistance, y = !!sym(variable), fill = Insulin_resistance)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.x = 1.5, label.y = y_lim - 0.1*y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("Insulin sensitivity", "Insulin resistance")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +  # Ajustar la escala del eje y según la variable
    theme_custom
}
# Crear gráficos para cada variable
p1 <- create_boxplot(data, "Glucose", "", "Glucose (mg/dL)", 200)
p2 <- create_boxplot(data, "Insulin_2", "", "Insulin (μU/mL)", 50)
p3 <- create_boxplot(data, "HOMA_IR", "", "HOMA-IR", 15)
p4 <- create_boxplot(data, "Glucagon", "", "Glucagon (pg/mL)", 4000)
p5 <- create_boxplot(data, "GIP", "", "GIP (pg/mL)", 2000)
p6 <- create_boxplot(data, "GLP_1", "", "GLP-1 (pg/mL)", 600)
p7 <- create_boxplot(data, "C_Peptide", "", "C-Peptide (pmol/L)", 3000)
p8 <- create_boxplot(data, "Total_Cholesterol", "", "Total Cholesterol (mg/dL)", 300)
p9 <- create_boxplot(data, "Triglycerides", "", "Triglycerides (mg/dL)", 500)
p10 <- create_boxplot(data, "HDL_C", "", "HDL Cholesterol (mg/dL)", 100)
p11 <- create_boxplot(data, "LDL_C", "", "LDL Cholesterol (mg/dL)", 200)
p12 <- create_boxplot(data, "Apo_A", "", "Apo A (mg/dL)", 200)
p13 <- create_boxplot(data, "Apo_B", "", "Apo B (mg/dL)", 200)
p14 <- create_boxplot(data, "Leptin_ng_ml", "", "Leptin (ng/mL)", 100)
p15 <- create_boxplot(data, "Adiponectin", "", "Adiponectin (μg/mL)", 40)
p16 <- create_boxplot(data, "Adiponectin_Leptin_Index", "", "Adiponectin/Leptin Index", 50)
p17 <- create_boxplot(data, "Ghrelin", "G", "Ghrelin (pg/mL)", 1000)
p18 <- create_boxplot(data, "Creatinine", "", "Creatinine (mg/dL)", 2)
p19 <- create_boxplot(data, "Uric_Acid", "", "Uric Acid (mg/dL)", 10)
p20 <- create_boxplot(data, "ALT", "", "ALT (U/L)", 100)
p21 <- create_boxplot(data, "AST", "", "AST (U/L)", 100)
p22 <- create_boxplot(data, "GGT", "", "GGT (U/L)", 100)
p23 <- create_boxplot(data, "ALP", "", "ALP (U/L)", 150)
p24 <- create_boxplot(data, "CRP", "", "CRP (mg/L)", 10)
p25 <- create_boxplot(data, "IL_1B", "", "IL-1B (pg/mL)", 10)
p26 <- create_boxplot(data, "IL_6", "", "IL-6 (pg/mL)", 10)
p27 <- create_boxplot(data, "IL_10", "", "IL-10 (pg/mL)", 10)
p28 <- create_boxplot(data, "TNF_a", "", "TNF-α (pg/mL)", 500000)
p29 <- create_boxplot(data, "PAI_1", "", "PAI-1 (ng/mL)", 20000)
p30 <- create_boxplot(data, "MCP_1", "", "MCP-1 (pg/mL)", 100)
p31 <- create_boxplot(data, "Resistin", "", "Resistin (ng/mL)", 100)
p32 <- create_boxplot(data, "Vaspin", "", "Vaspin (ng/mL)", 30000)

print(p1)

print(p2)

print(p3)

print(p4)

print(p5)

print(p6)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)

print(p8)

print(p9)

print(p10)

print(p11)

print(p12)

print(p13)

print(p14)

print(p15)

print(p16)

print(p17)

print(p18)

print(p19)

print(p20)

print(p21)

print(p22)

print(p23)

print(p24)

print(p25)

print(p26)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p27)

print(p28)

print(p29)

print(p30)

print(p31)

print(p32)

custom_colors <- c("Insulin sensitivity" = "#1f77b4", "Insulin resistance" = "#ff7f0e")

theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), 
        axis.text = element_text(size = 10), 
        legend.position = "none")

# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Insulin_resistance, y = !!sym(variable), fill = Insulin_resistance)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.x = 1.5, label.y = y_lim - 0.1 * y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("Insulin sensitivity", "Insulin resistance")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +  # Ajustar la escala del eje y según la variable
    theme_custom
}

p1 <- create_boxplot(data, "hsa-miR-370-3p22", "", "hsa-miR-370-3p22 (normalized counts)", 500)
p2 <- create_boxplot(data, "hsa-miR-376c-3p20", "", "hsa-miR-376c-3p20 (normalized counts)", 500)
p3 <- create_boxplot(data, "hsa-miR-769-5p86", "", "hsa-miR-769-5p86 (normalized counts)", 15000)
p4 <- create_boxplot(data, "hsa-miR-3913-5p87", "", "hsa-miR-3913-5p87 (normalized counts)", 500)
p5 <- create_boxplot(data, "hsa-miR-338-5p01", "", "hsa-miR-338-5p01 (normalized counts)", 500)
p6 <- create_boxplot(data, "hsa-miR-203a-3p64", "", "hsa-miR-203a-3p64 (normalized counts)", 500)
p7 <- create_boxplot(data, "hsa-miR-16-5p69", "", "hsa-miR-16-5p69 (normalized counts)", 150000)
p8 <- create_boxplot(data, "hsa-miR-15a-5p68", "", "hsa-miR-15a-5p68 (normalized counts)", 15000)
p9 <- create_boxplot(data, "hsa-let-7a-5p62", "", "hsa-let-7a-5p62 (normalized counts)", 600000)
p10 <- create_boxplot(data, "hsa-miR-451047", "", "hsa-miR-451047 (normalized counts)", 100)

print(p1)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p2)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p3)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p4)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p5)

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p6)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)

print(p8)

print(p9)

print(p10)

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

data <- data %>%
  mutate(
    Arm_Circumference_BI = as.numeric(as.character(Arm_Circumference_BI)),
    Fat_BI = as.numeric(as.character(Fat_BI)),
    Lean_Mass_BI = as.numeric(as.character(Lean_Mass_BI)),
    Muscle_Mass_BI = as.numeric(as.character(Muscle_Mass_BI)),
    Fat_Percentage_BI = as.numeric(as.character(Fat_Percentage_BI)),
    Lean_Mass_Percentage_BI = as.numeric(as.character(Lean_Mass_Percentage_BI)),
    Muscle_Mass_Percentage_BI = as.numeric(as.character(Muscle_Mass_Percentage_BI))
  )

## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Arm_Circumference_BI =
##   as.numeric(as.character(Arm_Circumference_BI))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.

selected_data <- data %>%
  select(
    Insulin_resistance, BMI, Weight, Height, WHR, Arm_Circumference_BI, Fat_BI, Lean_Mass_BI, Muscle_Mass_BI, Fat_Percentage_BI, Lean_Mass_Percentage_BI, Muscle_Mass_Percentage_BI
  )

summary_table <- selected_data %>%
  tbl_summary(
    by = Insulin_resistance, 
    missing = "no", 
    type = list(
      BMI ~ "continuous",
      Weight ~ "continuous",
      Height ~ "continuous",
      WHR ~ "continuous",
      Arm_Circumference_BI ~ "continuous",
      Fat_BI ~ "continuous",
      Lean_Mass_BI ~ "continuous",
      Muscle_Mass_BI ~ "continuous",
      Fat_Percentage_BI ~ "continuous",
      Lean_Mass_Percentage_BI ~ "continuous",
      Muscle_Mass_Percentage_BI ~ "continuous"
    ), 
    statistic = list(
      BMI ~ "{mean} ({sd})", 
      Weight ~ "{mean} ({sd})", 
      Height ~ "{mean} ({sd})", 
      WHR ~ "{mean} ({sd})", 
      Arm_Circumference_BI ~ "{mean} ({sd})",
      Fat_BI ~ "{mean} ({sd})",
      Lean_Mass_BI ~ "{mean} ({sd})",
      Muscle_Mass_BI ~ "{mean} ({sd})",
      Fat_Percentage_BI ~ "{mean} ({sd})",
      Lean_Mass_Percentage_BI ~ "{mean} ({sd})",
      Muscle_Mass_Percentage_BI ~ "{mean} ({sd})"
    ) 
  ) %>%
  add_overall() %>% 
  add_p() %>% 
  modify_header(label = "**Variable**") %>% 
  modify_caption("**Table: Anthropometric measurements by insulin resistance status**") %>%
  as_gt() %>% 
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

## The following warnings were returned during `as_gt()`:
## ! For variable `Arm_Circumference_BI` (`Insulin_resistance`) and "estimate",
##   "statistic", "p.value", "conf.low", and "conf.high" statistics: cannot
##   compute exact p-value with ties
## ! For variable `Arm_Circumference_BI` (`Insulin_resistance`) and "estimate",
##   "statistic", "p.value", "conf.low", and "conf.high" statistics: cannot
##   compute exact confidence intervals with ties
## ! For variable `Muscle_Mass_BI` (`Insulin_resistance`) and "estimate",
##   "statistic", "p.value", "conf.low", and "conf.high" statistics: cannot
##   compute exact p-value with ties
## ! For variable `Muscle_Mass_BI` (`Insulin_resistance`) and "estimate",
##   "statistic", "p.value", "conf.low", and "conf.high" statistics: cannot
##   compute exact confidence intervals with ties
## ! For variable `WHR` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   p-value with ties
## ! For variable `WHR` (`Insulin_resistance`) and "estimate", "statistic",
##   "p.value", "conf.low", and "conf.high" statistics: cannot compute exact
##   confidence intervals with ties

summary_table

**Table: Anthropometric measurements by insulin resistance status**
Variable	Overall N = 19¹	Insulin resistance N = 10¹	Insulin sensitivity N = 9¹	p-value²
BMI	26.9 (3.9)	28.6 (4.4)	25.1 (2.4)	0.079
Weight	68 (13)	69 (17)	67 (5)	>0.9
Height	159 (11)	155 (12)	163 (9)	0.13
WHR	91 (10)	95 (11)	87 (6)	0.10
Arm_Circumference_BI	33.03 (3.25)	34.81 (3.49)	31.24 (1.75)	0.015
Fat_BI	25.6 (7.1)	29.0 (7.1)	22.2 (5.6)	0.063
Lean_Mass_BI	42 (11)	41 (13)	42 (8)	0.7
Muscle_Mass_BI	24.1 (6.7)	24.1 (8.3)	24.1 (5.3)	0.7
Fat_Percentage_BI	37 (9)	40 (9)	34 (9)	0.2
Lean_Mass_Percentage_BI	60 (8)	58 (8)	62 (8)	0.5
Muscle_Mass_Percentage_BI	34.8 (5.4)	33.6 (5.4)	36.0 (5.5)	0.5
¹ Mean (SD)
² Wilcoxon rank sum exact test; Wilcoxon rank sum test

data <- data %>%
  mutate(
    Arm_Circumference_BI = as.numeric(as.character(Arm_Circumference_BI)),
    Fat_BI = as.numeric(as.character(Fat_BI)),
    Lean_Mass_BI = as.numeric(as.character(Lean_Mass_BI)),
    Muscle_Mass_BI = as.numeric(as.character(Muscle_Mass_BI)),
    Fat_Percentage_BI = as.numeric(as.character(Fat_Percentage_BI)),
    Lean_Mass_Percentage_BI = as.numeric(as.character(Lean_Mass_Percentage_BI)),
    Muscle_Mass_Percentage_BI = as.numeric(as.character(Muscle_Mass_Percentage_BI))
  )
 custom_colors <- c("Insulin sensitivity" = "#1f77b4", "Insulin resistance" = "#ff7f0e")

theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), 
        axis.text = element_text(size = 10), 
        legend.position = "none")

create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Insulin_resistance, y = !!sym(variable), fill = Insulin_resistance)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.x = 1.5, label.y = y_lim - 0.1*y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("Insulin sensitivity", "Insulin resistance")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +  # Ajustar la escala del eje y según la variable
    theme_custom
}
p1 <- create_boxplot(data, "BMI", "", "BMI (kg/m²)", 50)
p2 <- create_boxplot(data, "Weight", "", "Weight (kg)", 150)
p3 <- create_boxplot(data, "Height", "", "Height (cm)", 200)
p4 <- create_boxplot(data, "WHR", "", "Waist-to-Hip Ratio", 1.5)
p5 <- create_boxplot(data, "Arm_Circumference_BI", "", "Arm Circumference (cm)", 50)
p6 <- create_boxplot(data, "Fat_BI", "", "Fat Mass (kg)", 100)
p7 <- create_boxplot(data, "Lean_Mass_BI", "", "Lean Mass (kg)", 100)
p8 <- create_boxplot(data, "Muscle_Mass_BI", "", "Muscle Mass (kg)", 100)
p9 <- create_boxplot(data, "Fat_Percentage_BI", "", "Fat Percentage (%)", 100)
p10 <- create_boxplot(data, "Lean_Mass_Percentage_BI", "", "Lean Mass Percentage (%)", 100)
p11 <- create_boxplot(data, "Muscle_Mass_Percentage_BI", "", "Muscle Mass Percentage (%)", 100)

print(p1)

print(p2)

print(p3)

print(p4)

## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error:
## ! argument "x" is missing, with no default

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

## Warning in min(d[d > tolerance]): no non-missing arguments to min; returning
## Inf

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

## Warning in stats::runif(length(x), -amount, amount): NAs produced

## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p5)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p6)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p8)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p9)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p10)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

print(p11)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#install.packages("factoextra")

library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# PCA para reducir la dimensión de las variables 
data <- data %>%
  mutate_at(vars(
    Glucose, Insulin_2, HOMA_IR, Glucagon, GIP, GLP_1, C_Peptide, 
    Total_Cholesterol, Triglycerides, HDL_C, LDL_C, Apo_A, Apo_B, 
    Creatinine, Uric_Acid, ALT, AST, GGT, ALP, 
    BMI, Weight, Height, WHR, Arm_Circumference_BI, Fat_BI, 
    Lean_Mass_BI, Muscle_Mass_BI, Fat_Percentage_BI, 
    Lean_Mass_Percentage_BI, Muscle_Mass_Percentage_BI, 
    CRP, IL_1B, IL_6, IL_10, TNF_a, PAI_1, MCP_1, Resistin, Vaspin), 
    as.numeric)

data_clean <- data %>%
  drop_na()
clinical_vars <- data_clean %>%
  select(Glucose, Insulin_2, HOMA_IR, Glucagon, GIP, GLP_1, C_Peptide, 
         Total_Cholesterol, Triglycerides, HDL_C, LDL_C, Apo_A, Apo_B, 
         Creatinine, Uric_Acid, ALT, AST, GGT, ALP)


anthropometric_vars <- data_clean %>%
  select(BMI, Weight, Height, WHR, Arm_Circumference_BI, Fat_BI, 
         Lean_Mass_BI, Muscle_Mass_BI, Fat_Percentage_BI, 
         Lean_Mass_Percentage_BI, Muscle_Mass_Percentage_BI)


inflammatory_vars <- data_clean %>%
  select(CRP, IL_1B, IL_6, IL_10, TNF_a, PAI_1, MCP_1, Resistin, Vaspin)


pca_clinical <- prcomp(clinical_vars, scale. = TRUE)
fviz_pca_var(pca_clinical, col.var = "contrib", repel = TRUE) + 
  labs(title = "PCA - Clinical Variables")

pca_anthropometric <- prcomp(anthropometric_vars, scale. = TRUE)
fviz_pca_var(pca_anthropometric, col.var = "contrib", repel = TRUE) + 
  labs(title = "PCA - Anthropometric Variables")

pca_inflammatory <- prcomp(inflammatory_vars, scale. = TRUE)
fviz_pca_var(pca_inflammatory, col.var = "contrib", repel = TRUE) + 
  labs(title = "PCA - Inflammatory Variables")

# Extraer las contribuciones de las variables en el primer componente
contrib_clinical <- get_pca_var(pca_clinical)$contrib
contrib_anthropometric <- get_pca_var(pca_anthropometric)$contrib
contrib_inflammatory <- get_pca_var(pca_inflammatory)$contrib

# Mostrar las contribuciones más significativas
print("Contribuciones significativas - Variables clínicas")

## [1] "Contribuciones significativas - Variables clínicas"

print(contrib_clinical)

##                         Dim.1      Dim.2        Dim.3       Dim.4        Dim.5
## Glucose            0.77669712 14.5456407 3.892845e-04  0.46085941 2.390421e-01
## Insulin_2         12.08644555  1.8069554 9.792895e-01  0.84409050 5.121913e+00
## HOMA_IR           11.46860544  2.7480306 8.563561e-01  0.79565219 5.331350e+00
## Glucagon           0.21989407  2.7739022 1.561723e+01  5.28836048 6.260512e-01
## GIP                0.05366267  0.1120226 9.353045e+00 25.79667919 1.357399e-03
## GLP_1              0.68263678  6.5155558 5.169339e+00 11.23200899 1.217692e+01
## C_Peptide          6.51429709  1.7372950 8.930233e+00  0.19735808 4.832149e+00
## Total_Cholesterol  7.50058943 13.6504187 2.115962e+00  1.28599526 7.916913e-01
## Triglycerides      8.54319132  0.2837430 2.352128e-01  8.30126252 1.148088e+01
## HDL_C              0.95647339 20.7714161 3.849401e-02  0.48111974 1.154606e+01
## LDL_C              8.43026579  7.4588210 4.621821e+00  0.50683336 1.754428e+00
## Apo_A              0.41958483 16.0832097 3.246179e-01  0.07203933 2.277051e+01
## Apo_B             11.39408078  3.8933828 1.117428e+00  1.56086984 5.415562e+00
## Creatinine         1.39821653  0.5116297 2.153387e+01  0.54059569 1.381175e+01
## Uric_Acid          2.09374952  4.1108205 1.350545e+01  0.44180812 5.814114e-01
## ALT                9.96440849  1.5836763 1.864740e+00  6.65664145 4.470664e-02
## AST                7.69485766  0.2046502 4.167069e+00  3.88548310 2.061951e+00
## GGT                9.27444163  0.6845802 1.704437e+00  1.89161686 1.412074e+00
## ALP                0.52790192  0.5242495 7.865016e+00 29.76072588 2.004833e-04
##                          Dim.6       Dim.7       Dim.8        Dim.9
## Glucose           36.448656340  2.63291657  1.58106903  0.008327375
## Insulin_2          0.005220409  0.34356379  0.23988610 13.742327489
## HOMA_IR            0.349877077  0.81936039  0.16958888 15.127880221
## Glucagon           7.621676595 24.53979899  4.15750697 10.639823042
## GIP               13.947104957  9.18117475  0.00175932  0.451276239
## GLP_1              6.081542828  0.07727763  3.01834025 21.954140762
## C_Peptide          2.283900666  2.96642727  0.74717895 14.866593471
## Total_Cholesterol  0.094682391  0.04157343  0.49013593  1.768490387
## Triglycerides      5.912221081  1.44346357  4.15155663  2.139770669
## HDL_C              0.041654269  5.53986474  0.58130076  1.829189151
## LDL_C              0.325957076  1.31614339  3.86149033  7.962154947
## Apo_A              3.321763855  0.76910624  0.81285563  0.011212960
## Apo_B              0.059710386  0.14954781  3.27074709  0.557596725
## Creatinine         0.768279416  1.56933810  1.69623041  0.005068993
## Uric_Acid          0.371164570  5.07291123 46.00367454  5.971284135
## ALT                3.350598155  1.45930470  7.84555996  2.110510541
## AST                4.704662310 21.69374179 10.09077416  0.029704425
## GGT               11.823642415 13.53325314  1.83081729  0.060223918
## ALP                2.487685204  6.85123247  9.44952778  0.764424549
##                         Dim.10       Dim.11     Dim.12       Dim.13
## Glucose           2.133081e+01  0.297544684  0.1081837 11.421850145
## Insulin_2         8.393718e-01  2.080012854  2.6493258  1.028689190
## HOMA_IR           6.527779e-04  2.475657055  3.4839458  0.252389091
## Glucagon          1.614339e+00  3.030447181 15.5138113  1.421811054
## GIP               7.408678e-01 16.228928043  4.0978266 15.049278651
## GLP_1             7.834551e+00  2.603600027  3.7630702 15.917291472
## C_Peptide         2.405522e+01  1.273023717  0.1209428 14.421234875
## Total_Cholesterol 3.078153e+00  1.332770295  0.1452860  0.001059504
## Triglycerides     1.934076e+00  1.692670579  0.8094084 19.399561489
## HDL_C             3.668402e+00 10.811108841  9.3068810  9.461471190
## LDL_C             3.983115e+00  0.384632434  1.1945952  0.095574919
## Apo_A             2.235464e+00 29.246420922  0.2164751  3.337296752
## Apo_B             2.455449e+00  0.510668625  2.2858479  1.328138933
## Creatinine        6.007625e+00  0.079485240 15.3973713  3.904261533
## Uric_Acid         2.347609e+00  0.009405086  5.0842973  0.074086275
## ALT               4.120823e+00  6.222654952  5.8057446  0.014110340
## AST               1.857061e+00  0.630746875 13.3553434  0.393286481
## GGT               5.872241e+00 10.492286322  8.1231088  0.861723131
## ALP               6.024174e+00 10.597936267  8.5385351  1.616884978
##                         Dim.14      Dim.15       Dim.16       Dim.17
## Glucose            0.099764783  7.90682013  0.768170587 3.598897e-02
## Insulin_2          0.164580832  7.94009056  0.243815232 6.775170e+00
## HOMA_IR            0.362378870  4.91994968  2.418861045 5.659492e+00
## Glucagon           0.083129917  6.08711000  0.001517696 6.932416e-01
## GIP                0.508219927  0.76513209  2.946383645 7.121974e-01
## GLP_1              0.004260783  0.68283988  0.590529630 1.447991e+00
## C_Peptide          4.000298522  2.32041761  9.982330677 6.856087e-01
## Total_Cholesterol  0.049674516  0.18035029  0.041343436 9.535719e+00
## Triglycerides      0.102594402  8.22249730 17.790510404 3.032162e+00
## HDL_C              5.872793854  3.96689438  9.405842307 1.923898e-02
## LDL_C              0.690282773  1.73697266  9.342538935 1.085644e+01
## Apo_A              0.157731387  5.60258680 11.447909325 2.948095e+00
## Apo_B              0.073706333  0.01125151  2.327918250 5.515372e+01
## Creatinine        19.081335703  4.46189075  8.434301265 7.986496e-01
## Uric_Acid          7.049694127  6.09435238  1.170374694 8.664746e-04
## ALT               43.836775665  4.20324427  0.762897134 1.501273e-01
## AST               15.471775429  1.19926694 11.447103518 1.088123e+00
## GGT                0.358975590 31.80124521  0.031635754 1.977862e-01
## ALP                2.032026588  1.89708756 10.846016467 2.093788e-01
##                         Dim.18
## Glucose           1.058604e+00
## Insulin_2         3.410863e+01
## HOMA_IR           3.383529e+01
## Glucagon          5.520914e-02
## GIP               4.266632e-02
## GLP_1             1.966165e-01
## C_Peptide         5.058239e-02
## Total_Cholesterol 2.043512e+01
## Triglycerides     5.295806e-01
## HDL_C             8.841999e-01
## LDL_C             1.916811e+00
## Apo_A             1.768003e-01
## Apo_B             6.632696e+00
## Creatinine        8.501754e-05
## Uric_Acid         1.311730e-02
## ALT               2.719408e-03
## AST               1.924827e-02
## GGT               3.655130e-02
## ALP               5.473681e-03

print("Contribuciones significativas - Variables antropométricas")

## [1] "Contribuciones significativas - Variables antropométricas"

print(contrib_anthropometric)

##                                 Dim.1       Dim.2     Dim.3       Dim.4
## BMI                        1.70821217 17.54217873 17.167621 21.21104468
## Weight                     4.48251378 15.05269400 15.130189  2.74660973
## Height                    15.58586404  0.09809484  1.273223 42.57472042
## WHR                        0.42616125 18.47026308 16.588642  0.03915646
## Arm_Circumference_BI       0.03005978 20.53372648 12.945179  4.58888290
## Fat_BI                     5.25899485 13.89383211  6.070417 17.06632732
## Lean_Mass_BI              13.58742439  4.61673308  1.639327  0.04168809
## Muscle_Mass_BI            13.59123465  4.70765406  1.151761  0.25505532
## Fat_Percentage_BI         15.09357918  2.12864020  4.562062  4.83456549
## Lean_Mass_Percentage_BI   14.63398802  2.00240917 12.347319  2.91541811
## Muscle_Mass_Percentage_BI 15.60196789  0.95377425 11.124260  3.72653150
##                                  Dim.5     Dim.6      Dim.7        Dim.8
## BMI                       4.029972e-01  3.050259 31.6955130  5.515348783
## Weight                    5.135106e-04  1.663216 11.7518344  3.549662261
## Height                    2.892643e-01 25.317902 13.3227782  0.267260979
## WHR                       6.209723e+01  1.383918  0.9719584  0.007088808
## Arm_Circumference_BI      9.957818e+00 30.784436  7.9936379  9.875349648
## Fat_BI                    1.218039e+01  7.302572  0.3033940 22.712705191
## Lean_Mass_BI              2.138148e+00 13.703353  1.2210888 16.508805493
## Muscle_Mass_BI            1.186410e+00  6.893031  4.9634803  1.612837949
## Fat_Percentage_BI         4.503889e+00  2.920276  4.9079490 10.946252682
## Lean_Mass_Percentage_BI   5.244270e+00  5.658495 21.9197034  0.231813009
## Muscle_Mass_Percentage_BI 1.999075e+00  1.322542  0.9486626 28.772875197
##                                  Dim.9       Dim.10       Dim.11
## BMI                        1.416457461  0.273307968 1.706020e-02
## Weight                     3.737736745 32.089422801 9.795608e+00
## Height                     0.830997882  0.405435408 3.445901e-02
## WHR                        0.014305610  0.001138472 1.389283e-04
## Arm_Circumference_BI       3.281265820  0.009472543 1.718035e-04
## Fat_BI                     4.185644106  8.152536091 2.873192e+00
## Lean_Mass_BI               0.003208223  1.426553874 4.511367e+01
## Muscle_Mass_BI            16.519473817 33.432721884 1.568634e+01
## Fat_Percentage_BI         29.570450278 15.330509218 5.201827e+00
## Lean_Mass_Percentage_BI   16.319801911  1.493672336 1.723311e+01
## Muscle_Mass_Percentage_BI 24.120658146  7.385229404 4.044424e+00

print("Contribuciones significativas - Variables inflamatorias")

## [1] "Contribuciones significativas - Variables inflamatorias"

print(contrib_inflammatory)

##                Dim.1       Dim.2       Dim.3        Dim.4     Dim.5      Dim.6
## CRP       2.65167546 25.13535550  0.48529426  0.002278190 32.522351 18.8835121
## IL_1B    27.54889479  0.01544771  0.08381446  0.985713923  3.471677  1.9132063
## IL_6     16.48252490 15.31897015  0.11359764  1.371064035  5.486749  1.1351918
## IL_10    17.04476240 10.61073249  0.06224741  0.036215805 20.198296  6.6021704
## TNF_a    15.79533174  1.09974599 16.47705269 17.373881843 10.011527  0.4973376
## PAI_1    13.95367223  3.44323637 12.26275926 11.100678148  8.877874 36.3790355
## MCP_1     0.09289895 14.06715437 51.43313901  0.003169943  2.692519 20.2098685
## Resistin  1.60200721 22.76337815 17.21108056  1.828561517  3.934144 11.1576891
## Vaspin    4.82823232  7.54597927  1.87101473 67.298436595 12.804864  3.2219887
##                Dim.7       Dim.8      Dim.9
## CRP       2.90846536  3.19468191 14.2163863
## IL_1B     0.06599334 59.21698262  6.6982700
## IL_6     10.75212563  2.02905973 47.3107168
## IL_10     0.01914710 31.73767142 13.6887572
## TNF_a    31.20601398  0.08596509  7.4531441
## PAI_1    12.83812639  0.67808273  0.4665351
## MCP_1     3.37461285  0.23078673  7.8958511
## Resistin 38.05271340  1.56789284  1.8825335
## Vaspin    0.78280194  1.25887693  0.3878060

library(ggplot2)
library(corrplot)

## corrplot 0.94 loaded

library(reshape2)

selected_vars <- data_clean %>%
  select(Insulin_2, HOMA_IR, Total_Cholesterol, Triglycerides, Apo_B,  # Clínicas
         Height, Fat_Percentage_BI, Muscle_Mass_Percentage_BI, Lean_Mass_BI, BMI,  # Antropométricas
         IL_1B, IL_6, CRP, TNF_a, PAI_1)  # Inflamatorias

# Calcular la matriz de correlación de Pearson
cor_matrix <- cor(selected_vars, method = "pearson", use = "complete.obs")
print(cor_matrix)

##                             Insulin_2     HOMA_IR Total_Cholesterol
## Insulin_2                  1.00000000  0.99320748        0.43965506
## HOMA_IR                    0.99320748  1.00000000        0.39280851
## Total_Cholesterol          0.43965506  0.39280851        1.00000000
## Triglycerides              0.53859263  0.52370442        0.61775183
## Apo_B                      0.59845828  0.56268900        0.91343052
## Height                     0.20313086  0.20153159        0.28707553
## Fat_Percentage_BI          0.01002150  0.02506200       -0.09317478
## Muscle_Mass_Percentage_BI  0.06039840  0.06570824       -0.02783750
## Lean_Mass_BI               0.56112852  0.57578062        0.28699401
## BMI                        0.64993630  0.66693418        0.23361641
## IL_1B                     -0.33073190 -0.31319967       -0.13574561
## IL_6                      -0.08026782 -0.04596279       -0.42019533
## CRP                        0.21882517  0.30127499       -0.25715225
## TNF_a                     -0.31716166 -0.27830900       -0.29344973
## PAI_1                      0.74909939  0.72467876        0.36593828
##                           Triglycerides       Apo_B      Height
## Insulin_2                    0.53859263  0.59845828  0.20313086
## HOMA_IR                      0.52370442  0.56268900  0.20153159
## Total_Cholesterol            0.61775183  0.91343052  0.28707553
## Triglycerides                1.00000000  0.76614387  0.23075883
## Apo_B                        0.76614387  1.00000000  0.26568309
## Height                       0.23075883  0.26568309  1.00000000
## Fat_Percentage_BI           -0.13338699 -0.09231182 -0.84550389
## Muscle_Mass_Percentage_BI    0.12238050  0.01295374  0.84093383
## Lean_Mass_BI                 0.35714507  0.39212097  0.86668261
## BMI                          0.33626177  0.40643000 -0.27786606
## IL_1B                       -0.29294837 -0.20847191 -0.38964925
## IL_6                        -0.19698430 -0.28064484 -0.39732188
## CRP                          0.08533578 -0.07082323  0.00581612
## TNF_a                       -0.44921705 -0.42928887 -0.13148370
## PAI_1                        0.65775658  0.53690375  0.16912183
##                           Fat_Percentage_BI Muscle_Mass_Percentage_BI
## Insulin_2                        0.01002150                0.06039840
## HOMA_IR                          0.02506200                0.06570824
## Total_Cholesterol               -0.09317478               -0.02783750
## Triglycerides                   -0.13338699                0.12238050
## Apo_B                           -0.09231182                0.01295374
## Height                          -0.84550389                0.84093383
## Fat_Percentage_BI                1.00000000               -0.92084965
## Muscle_Mass_Percentage_BI       -0.92084965                1.00000000
## Lean_Mass_BI                    -0.69165224                0.72784323
## BMI                              0.50618228               -0.51924169
## IL_1B                            0.41635510               -0.39872745
## IL_6                             0.35055307               -0.26975540
## CRP                              0.12564072                0.05693904
## TNF_a                            0.23665109               -0.01926692
## PAI_1                           -0.02806754                0.06841327
##                           Lean_Mass_BI         BMI       IL_1B        IL_6
## Insulin_2                    0.5611285  0.64993630 -0.33073190 -0.08026782
## HOMA_IR                      0.5757806  0.66693418 -0.31319967 -0.04596279
## Total_Cholesterol            0.2869940  0.23361641 -0.13574561 -0.42019533
## Triglycerides                0.3571451  0.33626177 -0.29294837 -0.19698430
## Apo_B                        0.3921210  0.40643000 -0.20847191 -0.28064484
## Height                       0.8666826 -0.27786606 -0.38964925 -0.39732188
## Fat_Percentage_BI           -0.6916522  0.50618228  0.41635510  0.35055307
## Muscle_Mass_Percentage_BI    0.7278432 -0.51924169 -0.39872745 -0.26975540
## Lean_Mass_BI                 1.0000000  0.14827306 -0.40283897 -0.19711086
## BMI                          0.1482731  1.00000000  0.03226654  0.29440551
## IL_1B                       -0.4028390  0.03226654  1.00000000  0.67339372
## IL_6                        -0.1971109  0.29440551  0.67339372  1.00000000
## CRP                          0.2637706  0.35906780  0.20324175  0.64046095
## TNF_a                       -0.2583524 -0.34407315  0.56724912  0.25983851
## PAI_1                        0.3273461  0.39889798 -0.52447928 -0.24496845
##                                    CRP       TNF_a        PAI_1
## Insulin_2                  0.218825166 -0.31716166  0.749099388
## HOMA_IR                    0.301274990 -0.27830900  0.724678757
## Total_Cholesterol         -0.257152247 -0.29344973  0.365938281
## Triglycerides              0.085335782 -0.44921705  0.657756577
## Apo_B                     -0.070823230 -0.42928887  0.536903753
## Height                     0.005816120 -0.13148370  0.169121830
## Fat_Percentage_BI          0.125640719  0.23665109 -0.028067539
## Muscle_Mass_Percentage_BI  0.056939039 -0.01926692  0.068413265
## Lean_Mass_BI               0.263770621 -0.25835236  0.327346106
## BMI                        0.359067797 -0.34407315  0.398897979
## IL_1B                      0.203241752  0.56724912 -0.524479280
## IL_6                       0.640460947  0.25983851 -0.244968446
## CRP                        1.000000000  0.12810197  0.009407812
## TNF_a                      0.128101966  1.00000000 -0.448337233
## PAI_1                      0.009407812 -0.44833723  1.000000000

corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45, title = "Pearson Correlation Matrix")

melted_cor_matrix <- melt(cor_matrix)

# Crear el gráfico de correlación (heatmap)
ggplot(data = melted_cor_matrix, aes(x=Var1, y=Var2, fill=value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 9, hjust = 1)) +
  coord_fixed() +
  labs(title = "Pearson Correlation Heatmap", x = "", y = "") +
  theme(plot.title = element_text(hjust = 0.5))

selected_vars <- data_clean %>%
  select("hsa-miR-370-3p22", "hsa-miR-376c-3p20", "hsa-miR-769-5p86", "hsa-miR-3913-5p87", "hsa-miR-338-5p01", "hsa-miR-203a-3p64", "hsa-miR-16-5p69", "hsa-miR-15a-5p68", "hsa-let-7a-5p62", "hsa-miR-451047")  # miRNAs

# Calcular la matriz de correlación de Pearson
cor_matrix <- cor(selected_vars, method = "pearson", use = "complete.obs")
print(cor_matrix)

##                   hsa-miR-370-3p22 hsa-miR-376c-3p20 hsa-miR-769-5p86
## hsa-miR-370-3p22        1.00000000         0.4947731        0.8998516
## hsa-miR-376c-3p20       0.49477306         1.0000000        0.5336335
## hsa-miR-769-5p86        0.89985158         0.5336335        1.0000000
## hsa-miR-3913-5p87       0.97719530         0.6099285        0.8582944
## hsa-miR-338-5p01        0.02292609         0.8191406        0.2029753
## hsa-miR-203a-3p64      -0.24658214        -0.2626302       -0.3174541
## hsa-miR-16-5p69        -0.19241559        -0.1986226       -0.2813044
## hsa-miR-15a-5p68       -0.18714437        -0.2016896       -0.2724654
## hsa-let-7a-5p62        -0.16263059        -0.2371070       -0.2221143
## hsa-miR-451047         -0.12618130        -0.1922451       -0.1782774
##                   hsa-miR-3913-5p87 hsa-miR-338-5p01 hsa-miR-203a-3p64
## hsa-miR-370-3p22          0.9771953       0.02292609       -0.24658214
## hsa-miR-376c-3p20         0.6099285       0.81914062       -0.26263015
## hsa-miR-769-5p86          0.8582944       0.20297533       -0.31745411
## hsa-miR-3913-5p87         1.0000000       0.16618025       -0.24211395
## hsa-miR-338-5p01          0.1661802       1.00000000       -0.21544154
## hsa-miR-203a-3p64        -0.2421140      -0.21544154        1.00000000
## hsa-miR-16-5p69          -0.2108774      -0.17242871       -0.14976968
## hsa-miR-15a-5p68         -0.2050342      -0.18088519       -0.09664505
## hsa-let-7a-5p62          -0.2279390      -0.21721258       -0.06635874
## hsa-miR-451047           -0.1914747      -0.17898116       -0.10221485
##                   hsa-miR-16-5p69 hsa-miR-15a-5p68 hsa-let-7a-5p62
## hsa-miR-370-3p22      -0.19241559      -0.18714437     -0.16263059
## hsa-miR-376c-3p20     -0.19862261      -0.20168963     -0.23710704
## hsa-miR-769-5p86      -0.28130438      -0.27246542     -0.22211430
## hsa-miR-3913-5p87     -0.21087735      -0.20503415     -0.22793897
## hsa-miR-338-5p01      -0.17242871      -0.18088519     -0.21721258
## hsa-miR-203a-3p64     -0.14976968      -0.09664505     -0.06635874
## hsa-miR-16-5p69        1.00000000       0.98460430     -0.04070848
## hsa-miR-15a-5p68       0.98460430       1.00000000     -0.06917140
## hsa-let-7a-5p62       -0.04070848      -0.06917140      1.00000000
## hsa-miR-451047        -0.08014196      -0.11042910      0.99249034
##                   hsa-miR-451047
## hsa-miR-370-3p22     -0.12618130
## hsa-miR-376c-3p20    -0.19224508
## hsa-miR-769-5p86     -0.17827745
## hsa-miR-3913-5p87    -0.19147468
## hsa-miR-338-5p01     -0.17898116
## hsa-miR-203a-3p64    -0.10221485
## hsa-miR-16-5p69      -0.08014196
## hsa-miR-15a-5p68     -0.11042910
## hsa-let-7a-5p62       0.99249034
## hsa-miR-451047        1.00000000

corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45, title = "Pearson Correlation Matrix")

melted_cor_matrix <- melt(cor_matrix)

# Crear el gráfico de correlación (heatmap)
ggplot(data = melted_cor_matrix, aes(x=Var1, y=Var2, fill=value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1,1), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 9, hjust = 1)) +
  coord_fixed() +
  labs(title = "Pearson Correlation Heatmap", x = "", y = "") +
  theme(plot.title = element_text(hjust = 0.5))

library(ggplot2)
library(ggpubr)
#install.packages("ggpmisc")
library(ggpmisc)

## Loading required package: ggpp

## Registered S3 methods overwritten by 'ggpp':
##   method                  from   
##   heightDetails.titleGrob ggplot2
##   widthDetails.titleGrob  ggplot2

## 
## Attaching package: 'ggpp'

## The following objects are masked from 'package:ggpubr':
## 
##     as_npc, as_npcx, as_npcy

## The following object is masked from 'package:ggplot2':
## 
##     annotate

library(Hmisc)
library(broom)

create_regression_plot <- function(data, x_var, y_var, x_label, y_label) {
  ggplot(data, aes_string(x = x_var, y = y_var)) +
    geom_point(size = 2, alpha = 0.7) +
    geom_smooth(method = "lm", se = TRUE, color = "blue") +  
    stat_fit_glance(method = "lm",
                    method.args = list(formula = y ~ x),
                    aes(label = paste("R² =", signif(..r.squared.., 3), ", p =", signif(..p.value.., 3))),
                    label.x = 3, label.y = max(data[[y_var]], na.rm = TRUE) * 0.95) +  # Mostrar R² y p-valor
    labs(x = x_label, y = y_label, title = paste("Regression between", x_label, "and", y_label)) +
    theme_minimal()
}

# Seleccionar las variables más significativas para la regresión
selected_vars_regression <- data_clean %>%
  select(Insulin_2, HOMA_IR, BMI, IL_6, PAI_1, Apo_B, Total_Cholesterol, Lean_Mass_BI, "hsa-miR-3913-5p87")

# Crear los gráficos de regresión para las correlaciones significativas
plot1 <- create_regression_plot(selected_vars_regression, "Insulin_2", "HOMA_IR", "Insulin (μU/mL)", "HOMA-IR")

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot2 <- create_regression_plot(selected_vars_regression, "Insulin_2", "BMI", "Insulin (μU/mL)", "BMI (kg/m²)")
plot3 <- create_regression_plot(selected_vars_regression, "Insulin_2", "IL_6", "Insulin (μU/mL)", "IL-6 (pg/mL)")
plot4 <- create_regression_plot(selected_vars_regression, "Insulin_2", "PAI_1", "Insulin (μU/mL)", "PAI-1 (ng/mL)")
plot5 <- create_regression_plot(selected_vars_regression, "HOMA_IR", "BMI", "HOMA-IR", "BMI (kg/m²)")
plot6 <- create_regression_plot(selected_vars_regression, "HOMA_IR", "IL_6", "HOMA-IR", "IL-6 (pg/mL)")
plot7 <- create_regression_plot(selected_vars_regression, "HOMA_IR", "PAI_1", "HOMA-IR", "PAI-1 (ng/mL)")
plot8 <- create_regression_plot(selected_vars_regression, "Apo_B", "Total_Cholesterol", "Apo B (mg/dL)", "Total Cholesterol (mg/dL)")
plot9 <- create_regression_plot(selected_vars_regression, "Lean_Mass_BI", "Total_Cholesterol", "Lean Mass (kg)", "Total Cholesterol (mg/dL)")
plot10 <- create_regression_plot(selected_vars_regression, "`hsa-miR-3913-5p87`", "Insulin_2", "hsa-miR-3913-5p87", "Insulin (μU/mL)")
plot11 <- create_regression_plot(selected_vars_regression, "`hsa-miR-3913-5p87`", "HOMA_IR", "hsa-miR-3913-5p87", "HOMA-IR")
plot12 <- create_regression_plot(selected_vars_regression, "`hsa-miR-3913-5p87`", "IL_6", "hsa-miR-3913-5p87", "IL_6")
plot13 <- create_regression_plot(selected_vars_regression, "`hsa-miR-3913-5p87`", "Apo_B", "hsa-miR-3913-5p87", "Apo_B")
plot14 <- create_regression_plot(selected_vars_regression, "`hsa-miR-3913-5p87`", "Total_Cholesterol", "hsa-miR-3913-5p87", "Total_Cholesterol")

print(plot1)

## Warning: The dot-dot notation (`..r.squared..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(r.squared)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_smooth()` using formula = 'y ~ x'

print(plot2)

## `geom_smooth()` using formula = 'y ~ x'

print(plot3)

## `geom_smooth()` using formula = 'y ~ x'

print(plot4)

## `geom_smooth()` using formula = 'y ~ x'

print(plot5)

## `geom_smooth()` using formula = 'y ~ x'

print(plot6)

## `geom_smooth()` using formula = 'y ~ x'

print(plot7)

## `geom_smooth()` using formula = 'y ~ x'

print(plot8)

## `geom_smooth()` using formula = 'y ~ x'

print(plot9)

## `geom_smooth()` using formula = 'y ~ x'

print(plot10)

## `geom_smooth()` using formula = 'y ~ x'

print(plot11)

## `geom_smooth()` using formula = 'y ~ x'

print(plot12)

## `geom_smooth()` using formula = 'y ~ x'

print(plot13)

## `geom_smooth()` using formula = 'y ~ x'

print(plot14)

## `geom_smooth()` using formula = 'y ~ x'

Analysis_KatiaAvina

Loranda Calderon

2024-09-19