LauraAnalisys

#install.packages("conflicted")
#install.packages("dplyr")

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)

setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("ARTICULO-RAGE-DM2.xlsx")
class(data)

## [1] "tbl_df"     "tbl"        "data.frame"

names(data)

##  [1] "IDGRUPOS"      "SEXO"          "EDAD"          "IMC"          
##  [5] "TFG"           "GLUCOSA"       "UREA"          "BUN"          
##  [9] "BUNCREA"       "CREATININA"    "COLESTEROL"    "HDL"          
## [13] "LDL"           "VLDL"          "TRIGLICERIDOS" "HbA1C"        
## [17] "FUMA"          "SXMET"         "RAGE624"       "RAGE625"      
## [21] "RAGE600"       "ALCOHOL"       "ACTFISICA"     "SISTOLE"      
## [25] "DIASTOLE"      "HTA"

# Asegúrate de que `data` sea un data frame
data <- as.data.frame(data)
# Recodificar las variables de interés
data <- data %>%
  mutate(
    Renal_Dysfunction = dplyr::recode(IDGRUPOS, `0` = "No Renal Dysfunction", `1` = "Renal Dysfunction"),
    Sex = dplyr::recode(SEXO, `0` = "Male", `1` = "Female"),
    Alcohol = dplyr::recode(ALCOHOL, `0` = "No", `1` = "Yes"),
    Physical_Activity = dplyr::recode(ACTFISICA, `0` = "No", `1` = "Yes"),
    Hypertension = dplyr::recode(HTA, `0` = "No", `1` = "Yes"),
    Metabolic_Syndrome = dplyr::recode(SXMET, `0` = "No", `1` = "Yes"),
    Smoking = dplyr::recode(FUMA, `0`= "No", `1` = "Yes")
  )

class(data)

## [1] "data.frame"

View(data)

# Verificar si las columnas existen y tienen datos
data <- as.data.frame(data)

selected_data <- data %>%
  dplyr::select(
    Renal_Dysfunction, Sex, `Age (years)` = EDAD, IMC, Smoking, Alcohol, Physical_Activity, Hypertension, Metabolic_Syndrome
  )

# Asegúrate de que IMC es numérico
selected_data$IMC <- as.numeric(selected_data$IMC)

## Warning: NAs introduced by coercion

# Crear la tabla de resumen
summary_table <- selected_data %>%
  tbl_summary(
    by = Renal_Dysfunction, # Agrupar por la variable Renal_Dysfunction
    missing = "no", # Excluir valores faltantes de la tabla
    type = list(IMC ~ "continuous"), # Especificar que IMC es una variable continua
    statistic = list(IMC ~ "{mean} ({sd})", Age = "{median} ({p25}, {p75})") # Mostrar la media y desviación estándar para IMC
  ) %>%
  add_overall() %>% # Añadir una columna con el resumen general
  add_p() %>% # Añadir valores p para comparar grupos
  modify_header(label = "**Variable**") %>% # Cambiar el encabezado
  modify_caption("**Table: Demographic and Clinical Characteristics by Renal Dysfunction Status**") %>%
  as_gt() %>% # Convertir la tabla a formato gt
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

summary_table

**Table: Demographic and Clinical Characteristics by Renal Dysfunction Status**
Variable	Overall N = 435¹	No Renal Dysfunction N = 217¹	Renal Dysfunction N = 218¹	p-value²
Sex				0.086
Female	257 (59%)	137 (63%)	120 (55%)
Male	178 (41%)	80 (37%)	98 (45%)
Age (years)	61 (54, 67)	57 (51, 64)	64 (59, 70)	<0.001
IMC	31.3 (15.9)	31.0 (5.8)	31.5 (21.7)	0.2
Smoking				0.6
NA	2 (0.5%)	0 (0%)	2 (0.9%)
No	359 (83%)	180 (83%)	179 (82%)
Yes	74 (17%)	37 (17%)	37 (17%)
Alcohol	129 (30%)	79 (36%)	50 (23%)	0.002
Physical_Activity				0.002
NA	8 (1.8%)	1 (0.5%)	7 (3.2%)
No	239 (55%)	135 (62%)	104 (48%)
Yes	188 (43%)	81 (37%)	107 (49%)
Hypertension				<0.001
NA	4 (0.9%)	0 (0%)	4 (1.8%)
No	123 (28%)	77 (35%)	46 (21%)
Yes	308 (71%)	140 (65%)	168 (77%)
Metabolic_Syndrome	316 (73%)	162 (75%)	154 (71%)	0.3
¹ n (%); Median (Q1, Q3); Mean (SD)
² Pearson’s Chi-squared test; Wilcoxon rank sum test; Fisher’s exact test

# Seleccionar solo las variables deseadas y asegurarse de que son numéricas
selected_data <- data %>%
  mutate(
    TFG = as.numeric(TFG),
    UREA = as.numeric(UREA),
    CREATININA = as.numeric(CREATININA),
    BUN = as.numeric(BUN),
    BUNCREA = as.numeric(BUNCREA),
    GLUCOSA = as.numeric(GLUCOSA),
    HbA1C = as.numeric(HbA1C),
    TRIGLICERIDOS = as.numeric(TRIGLICERIDOS),
    COLESTEROL = as.numeric(COLESTEROL),
    HDL = as.numeric(HDL),
    LDL = as.numeric(LDL),
    VLDL = as.numeric(VLDL)
  ) %>%
  # Seleccionar solo las variables de interés
  select(TFG, UREA, CREATININA, BUN, BUNCREA, GLUCOSA, HbA1C, TRIGLICERIDOS, COLESTEROL, HDL, LDL, VLDL, Renal_Dysfunction)

## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `TFG = as.numeric(TFG)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.

# Convertir Renal_Dysfunction a factor si no lo es
selected_data$Renal_Dysfunction <- as.factor(selected_data$Renal_Dysfunction)

# Crear la tabla de resumen
summary_table <- selected_data %>%
  tbl_summary(
    by = Renal_Dysfunction,  # Agrupar por la variable 'Renal_Dysfunction'
    missing = "no", # Excluir valores faltantes
    type = all_continuous() ~ "continuous", # Especificar que todas las variables son continuas
    statistic = all_continuous() ~ "{mean} ({sd})" # Mostrar la media y desviación estándar
  ) %>%
  add_overall() %>%  # Añadir una columna con el resumen general
  add_p() %>% # Añadir columna con valores p
  modify_header(label = "**Variable**") %>%
  as_gt() %>%
  gt::tab_header(
    title = gt::md("**Table: Clinical and Biochemical Parameters**")
  )

# Mostrar la tabla
summary_table

Variable	Overall N = 435¹	No Renal Dysfunction N = 217¹	Renal Dysfunction N = 218¹	p-value²
Table: Clinical and Biochemical Parameters
TFG	83 (37)	110 (21)	56 (29)	<0.001
UREA	52 (39)	36 (14)	68 (49)	<0.001
CREATININA	1.48 (2.23)	0.59 (0.18)	2.36 (2.88)	<0.001
BUN	24 (18)	17 (6)	32 (23)	<0.001
BUNCREA	27 (23)	34 (30)	19 (9)	<0.001
GLUCOSA	178 (95)	199 (104)	158 (80)	<0.001
HbA1C	7.73 (1.87)	8.13 (1.95)	7.33 (1.71)	<0.001
TRIGLICERIDOS	160 (87)	162 (83)	157 (91)	0.2
COLESTEROL	196 (82)	203 (99)	189 (59)	0.4
HDL	44 (19)	41 (15)	47 (21)	<0.001
LDL	121 (80)	130 (99)	112 (54)	0.090
VLDL	32 (17)	32 (17)	32 (18)	0.2
¹ Mean (SD)
² Wilcoxon rank sum test

#install.packages(c("ggpubr", "cowplot"))

library(ggpubr)
library(cowplot)
library(ggplot2)

data <- data %>%
  mutate(
    TFG = as.numeric(TFG),
    UREA = as.numeric(UREA),
    CREATININA = as.numeric(CREATININA),
    BUN = as.numeric(BUN),
    BUNCREA = as.numeric(BUNCREA),
    GLUCOSA = as.numeric(GLUCOSA),
    HbA1C = as.numeric(HbA1C),
    TRIGLICERIDOS = as.numeric(TRIGLICERIDOS),
    COLESTEROL = as.numeric(COLESTEROL),
    HDL = as.numeric(HDL),
    LDL = as.numeric(LDL),
    VLDL = as.numeric(VLDL)
  )

## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `TFG = as.numeric(TFG)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.

 custom_colors <- c("No Renal Dysfunction" = "#1f77b4", "Renal Dysfunction" = "#ff7f0e")

# Crear un tema personalizado
theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), 
        axis.text = element_text(size = 10), 
        legend.position = "none")

# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
  ggplot(data, aes(x = Renal_Dysfunction, y = !!sym(variable), fill = Renal_Dysfunction)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
    stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.x = 1.5, label.y = y_lim - 0.1*y_lim) +
    labs(title = var_label, y = y_label, fill = " ") +
    scale_x_discrete(labels = c("No Renal Dysfunction", "Renal Dysfunction")) +
    scale_fill_manual(values = custom_colors) +
    ylim(0, y_lim) +  # Ajustar la escala del eje y según la variable
    theme_custom
}

# Crear gráficos para cada variable
p1 <- create_boxplot(data, "TFG", " ", "GFR (mL/min/1.73 m²)", 150)
p2 <- create_boxplot(data, "UREA", "", "Urea (mg/dL)", 100)
p3 <- create_boxplot(data, "CREATININA", "", "Creatinine (mg/dL)", 5)
p4 <- create_boxplot(data, "BUN", " ", "BUN (mg/dL)", 40)
p5 <- create_boxplot(data, "BUNCREA", " ", "BUN/Creatinine Ratio", 50)
p6 <- create_boxplot(data, "GLUCOSA", "", "Glucose (mg/dL)", 400)
p7 <- create_boxplot(data, "HbA1C", "", "HbA1c (%)", 20)
p8 <- create_boxplot(data, "TRIGLICERIDOS", "", "Triglycerides (mg/dL)", 500)
p9 <- create_boxplot(data, "COLESTEROL", "", "Cholesterol (mg/dL)", 400)
p10 <- create_boxplot(data, "HDL", "", "HDL-C (mg/dL)", 100)
p11 <- create_boxplot(data, "LDL", "", "LDL-C (mg/dL)", 300)
p12 <- create_boxplot(data, "VLDL", "", "VLDL-C (mg/dL)", 100)

# Mostrar un gráfico de ejemplo
print(p1)

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p2)

## Warning: Removed 44 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 44 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 44 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p3)

## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p4)

## Warning: Removed 53 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 53 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 53 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p5)

## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p6)

## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p7)

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p8)

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p9)

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p10)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p11)

## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).

print(p12)

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#install.packages("https://cran.r-project.org/bin/macosx/big-sur-arm64/contrib/4.3/rms_6.8-1.tgz", repos = NULL, type = "binary")
library(rms)

## Loading required package: Hmisc

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:gt':
## 
##     html

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(SNPassoc)

## Registered S3 method overwritten by 'SNPassoc':
##   method            from       
##   summary.haplo.glm haplo.stats

str(data)

## 'data.frame':    435 obs. of  33 variables:
##  $ IDGRUPOS          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SEXO              : num  1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD              : num  65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC               : chr  "40.6" "33.799999999999997" "22.5" "25.5" ...
##  $ TFG               : num  95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA           : num  94 106 168 140 94 ...
##  $ UREA              : num  24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN               : num  11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA           : num  18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA        : num  0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL        : num  210 222 123 99 145 ...
##  $ HDL               : num  47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL               : num  131 142.4 79.2 52.6 71.6 ...
##  $ VLDL              : num  32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS     : num  160 133 89 87 147 ...
##  $ HbA1C             : num  7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA              : chr  "0" "0" "0" "0" ...
##  $ SXMET             : num  1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE624           : chr  "A/A" "A/T" "A/T" "A/A" ...
##  $ RAGE625           : chr  "A/A" "A/A" "A/A" "A/A" ...
##  $ RAGE600           : chr  "C/C" "C/C" "C/C" "C/C" ...
##  $ ALCOHOL           : num  0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA         : chr  "0" "1" "0" "0" ...
##  $ SISTOLE           : chr  "142" "122" "149" "145" ...
##  $ DIASTOLE          : chr  "87" "68" "92" "83" ...
##  $ HTA               : chr  "1" "1" "1" "1" ...
##  $ Renal_Dysfunction : chr  "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" ...
##  $ Sex               : chr  "Female" "Female" "Female" "Male" ...
##  $ Alcohol           : chr  "No" "No" "No" "Yes" ...
##  $ Physical_Activity : chr  "No" "Yes" "No" "No" ...
##  $ Hypertension      : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Metabolic_Syndrome: chr  "Yes" "Yes" "No" "Yes" ...
##  $ Smoking           : chr  "No" "No" "No" "No" ...

data$RAGE624 <- as.factor(data$RAGE624)
data$RAGE625 <- as.factor(data$RAGE625)
data$RAGE600 <- as.factor(data$RAGE600)
data$Renal_Dysfunction <- factor(data$Renal_Dysfunction)
data$Sex <- factor(data$Sex, levels = c("Female", "Male"))
data$Alcohol <- factor(data$Alcohol, levels = c("No", "Yes"))
data$Physical_Activity <- factor(data$Physical_Activity, levels = c("No", "Yes"))
data$Hypertension <- factor(data$Hypertension, levels = c("No", "Yes"))
data$Metabolic_Syndrome <- factor(data$Metabolic_Syndrome, levels = c("No", "Yes"))
data$Smoking <- factor(data$Smoking, levels = c("No", "Yes"))

snp_indices <- match(c("RAGE624", "RAGE625", "RAGE600"), colnames(data))
data <- data[complete.cases(data[, snp_indices]), ]
SNPs <- setupSNP(data, colSNPs = snp_indices, sep = "/")

plot(SNPs$RAGE624)

plot(SNPs$RAGE625)

plot(SNPs$RAGE600)

plot(SNPs$RAGE624, type = pie)

plot(SNPs$RAGE625, type = pie)

plot(SNPs$RAGE600, type = pie)

names(SNPs)

##  [1] "IDGRUPOS"           "SEXO"               "EDAD"              
##  [4] "IMC"                "TFG"                "GLUCOSA"           
##  [7] "UREA"               "BUN"                "BUNCREA"           
## [10] "CREATININA"         "COLESTEROL"         "HDL"               
## [13] "LDL"                "VLDL"               "TRIGLICERIDOS"     
## [16] "HbA1C"              "FUMA"               "SXMET"             
## [19] "ALCOHOL"            "ACTFISICA"          "SISTOLE"           
## [22] "DIASTOLE"           "HTA"                "Renal_Dysfunction" 
## [25] "Sex"                "Alcohol"            "Physical_Activity" 
## [28] "Hypertension"       "Metabolic_Syndrome" "Smoking"           
## [31] "RAGE624"            "RAGE625"            "RAGE600"

#Varaibles categoricas
data$RAGE624[data$RAGE624 == "NA"] <- NA
data$RAGE625[data$RAGE625 == "NA"] <- NA
data$RAGE600[data$RAGE600 == "NA"] <- NA
data$Renal_Dysfunction[data$Renal_Dysfunction == "NA"] <- NA
data$Smoking[data$Smoking == "NA"] <- NA
data$Alcohol[data$Alcohol == "NA"] <- NA

# Eliminar los niveles no utilizados (como "NA")
data$RAGE624 <- droplevels(data$RAGE624)
data$RAGE625 <- droplevels(data$RAGE625)
data$RAGE600 <- droplevels(data$RAGE600)
data$Renal_Dysfunction <- droplevels(data$Renal_Dysfunction)
data$Smoking <- droplevels(data$Smoking)
data$Alcohol <- droplevels(data$Alcohol)

# Verificar la estructura de los factores
str(data$RAGE624)

##  Factor w/ 3 levels "A/A","A/T","T/T": 1 2 2 1 3 2 1 2 2 1 ...

str(data$RAGE625)

##  Factor w/ 3 levels "A/A","A/G","G/G": 1 1 1 1 1 2 1 1 1 1 ...

str(data$RAGE600)

##  Factor w/ 3 levels "C/C","C/T","T/T": 1 1 1 1 1 1 1 1 1 1 ...

str(data$Renal_Dysfunction)

##  Factor w/ 2 levels "No Renal Dysfunction",..: 1 1 1 1 1 1 1 1 1 1 ...

str(data$Smoking)

##  Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

str(data$Alcohol)

##  Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 2 1 ...

sum(is.na(data$RAGE624))

## [1] 13

sum(is.na(data$RAGE625))

## [1] 19

sum(is.na(data$RAGE600))

## [1] 25

sum(is.na(data$Renal_Dysfunction))

## [1] 0

sum(is.na(data$Smoking))

## [1] 2

sum(is.na(data$Alcohol))

## [1] 0

dim(data)

## [1] 435  33

str(data)

## 'data.frame':    435 obs. of  33 variables:
##  $ IDGRUPOS          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SEXO              : num  1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD              : num  65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC               : chr  "40.6" "33.799999999999997" "22.5" "25.5" ...
##  $ TFG               : num  95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA           : num  94 106 168 140 94 ...
##  $ UREA              : num  24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN               : num  11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA           : num  18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA        : num  0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL        : num  210 222 123 99 145 ...
##  $ HDL               : num  47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL               : num  131 142.4 79.2 52.6 71.6 ...
##  $ VLDL              : num  32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS     : num  160 133 89 87 147 ...
##  $ HbA1C             : num  7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA              : chr  "0" "0" "0" "0" ...
##  $ SXMET             : num  1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE624           : Factor w/ 3 levels "A/A","A/T","T/T": 1 2 2 1 3 2 1 2 2 1 ...
##  $ RAGE625           : Factor w/ 3 levels "A/A","A/G","G/G": 1 1 1 1 1 2 1 1 1 1 ...
##  $ RAGE600           : Factor w/ 3 levels "C/C","C/T","T/T": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ALCOHOL           : num  0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA         : chr  "0" "1" "0" "0" ...
##  $ SISTOLE           : chr  "142" "122" "149" "145" ...
##  $ DIASTOLE          : chr  "87" "68" "92" "83" ...
##  $ HTA               : chr  "1" "1" "1" "1" ...
##  $ Renal_Dysfunction : Factor w/ 2 levels "No Renal Dysfunction",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sex               : Factor w/ 2 levels "Female","Male": 1 1 1 2 1 1 1 1 1 1 ...
##  $ Alcohol           : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 2 1 ...
##  $ Physical_Activity : Factor w/ 2 levels "No","Yes": 1 2 1 1 2 1 2 1 1 1 ...
##  $ Hypertension      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 1 1 1 ...
##  $ Metabolic_Syndrome: Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 1 1 1 1 ...
##  $ Smoking           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

SNPs <- setupSNP(data, colSNPs = match(c("RAGE624", "RAGE625", "RAGE600"), colnames(data)), sep = "/")

#Perform association analysis for each SNP
association(Renal_Dysfunction ~ RAGE624, data = SNPs)

## 
## SNP: RAGE624  adjusted by: 
##              No Renal Dysfunction    % Renal Dysfunction    %   OR lower upper
## Codominant                                                                    
## A/A                            80 37.7                94 44.8 1.00            
## A/T                           115 54.2                97 46.2 0.72  0.48  1.07
## T/T                            17  8.0                19  9.0 0.95  0.46  1.95
## Dominant                                                                      
## A/A                            80 37.7                94 44.8 1.00            
## A/T-T/T                       132 62.3               116 55.2 0.75  0.51  1.10
## Recessive                                                                     
## A/A-A/T                       195 92.0               191 91.0 1.00            
## T/T                            17  8.0                19  9.0 1.14  0.58  2.26
## Overdominant                                                                  
## A/A-T/T                        97 45.8               113 53.8 1.00            
## A/T                           115 54.2                97 46.2 0.72  0.49  1.06
## log-Additive                                                                  
## 0,1,2                         212 50.2               210 49.8 0.86  0.63  1.16
##              p-value   AIC
## Codominant                
## A/A          0.25164 588.2
## A/T                       
## T/T                       
## Dominant                  
## A/A          0.14246 586.9
## A/T-T/T                   
## Recessive                 
## A/A-A/T      0.70518 588.9
## T/T                       
## Overdominant              
## A/A-T/T      0.09781 586.3
## A/T                       
## log-Additive              
## 0,1,2        0.32414 588.0

association(Renal_Dysfunction ~ RAGE625, data = SNPs)

## 
## SNP: RAGE625  adjusted by: 
##              No Renal Dysfunction    % Renal Dysfunction    %   OR lower upper
## Codominant                                                                    
## A/A                           134 64.4               142 68.3 1.00            
## A/G                            56 26.9                52 25.0 0.88  0.56  1.37
## G/G                            18  8.7                14  6.7 0.73  0.35  1.53
## Dominant                                                                      
## A/A                           134 64.4               142 68.3 1.00            
## A/G-G/G                        74 35.6                66 31.7 0.84  0.56  1.26
## Recessive                                                                     
## A/A-A/G                       190 91.3               194 93.3 1.00            
## G/G                            18  8.7                14  6.7 0.76  0.37  1.58
## Overdominant                                                                  
## A/A-G/G                       152 73.1               156 75.0 1.00            
## A/G                            56 26.9                52 25.0 0.90  0.58  1.40
## log-Additive                                                                  
## 0,1,2                         208 50.0               208 50.0 0.86  0.64  1.17
##              p-value   AIC
## Codominant                
## A/A           0.6436 581.8
## A/G                       
## G/G                       
## Dominant                  
## A/A           0.4064 580.0
## A/G-G/G                   
## Recessive                 
## A/A-A/G       0.4612 580.2
## G/G                       
## Overdominant              
## A/A-G/G       0.6546 580.5
## A/G                       
## log-Additive              
## 0,1,2         0.3497 579.8

association(Renal_Dysfunction ~ RAGE600, data = SNPs)

## 
## SNP: RAGE600  adjusted by: 
##              No Renal Dysfunction    % Renal Dysfunction    %   OR lower upper
## Codominant                                                                    
## C/C                           174 84.5               183 89.7 1.00            
## C/T                            23 11.2                16  7.8 0.66  0.34  1.29
## T/T                             9  4.4                 5  2.5 0.53  0.17  1.61
## Dominant                                                                      
## C/C                           174 84.5               183 89.7 1.00            
## C/T-T/T                        32 15.5                21 10.3 0.62  0.35  1.12
## Recessive                                                                     
## C/C-C/T                       197 95.6               199 97.5 1.00            
## T/T                             9  4.4                 5  2.5 0.55  0.18  1.67
## Overdominant                                                                  
## C/C-T/T                       183 88.8               188 92.2 1.00            
## C/T                            23 11.2                16  7.8 0.68  0.35  1.32
## log-Additive                                                                  
## 0,1,2                         206 50.2               204 49.8 0.70  0.45  1.09
##              p-value   AIC
## Codominant                
## C/C           0.2672 571.7
## C/T                       
## T/T                       
## Dominant                  
## C/C           0.1126 569.9
## C/T-T/T                   
## Recessive                 
## C/C-C/T       0.2816 571.2
## T/T                       
## Overdominant              
## C/C-T/T       0.2504 571.1
## C/T                       
## log-Additive              
## 0,1,2         0.1074 569.8

# Reemplazar "NA" por NA en las variables numéricas
data$IMC[data$IMC == "NA"] <- NA
data$GLUCOSA[data$GLUCOSA == "NA"] <- NA
data$HbA1C[data$HbA1C == "NA"] <- NA

# Convertir columnas a numéricas
data$IMC <- as.numeric(as.character(data$IMC))
data$GLUCOSA <- as.numeric(as.character(data$GLUCOSA))
data$HbA1C <- as.numeric(as.character(data$HbA1C))

# Verificar qué filas tienen valores no numéricos en cada columna
str(data$IMC)

##  num [1:435] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...

str(data$GLUCOSA)

##  num [1:435] 94 106 168 140 94 ...

str(data$HbA1C)

##  num [1:435] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...

# Verificar cuántos NA hay en cada columna
sum(is.na(data$IMC))

## [1] 7

sum(is.na(data$GLUCOSA))

## [1] 1

sum(is.na(data$HbA1C))

## [1] 13

#install.packages("car")
library(car)

## Loading required package: carData

# Identificar las variables colineales
alias(lm(IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C + 
         Sex + EDAD + UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL + 
         TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension + 
         Metabolic_Syndrome + Smoking, data = data))

## Model :
## IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C + Sex + EDAD + 
##     UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + 
##     VLDL + TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension + 
##     Metabolic_Syndrome + Smoking

# Ajustar el modelo sin las variables colineales (ej. eliminando VLDL)
#Si el resultado de VIF extremadamente alto (mayores de 10) se deben eliminar del modelo
modelo_vif <- lm(IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C + 
                 Sex + EDAD + UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + 
                 TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension + 
                 Metabolic_Syndrome + Smoking, data = data)
car::vif(modelo_vif)

##                          GVIF Df GVIF^(1/(2*Df))
## RAGE624              1.143950  2        1.034193
## RAGE625              1.261052  2        1.059701
## RAGE600              1.244668  2        1.056242
## GLUCOSA              1.805214  1        1.343583
## HbA1C                1.699503  1        1.303650
## Sex                  1.243185  1        1.114982
## EDAD                 1.313350  1        1.146015
## UREA               889.056250  1       29.817046
## BUN                904.666055  1       30.077667
## BUNCREA              1.553039  1        1.246210
## CREATININA           4.299430  1        2.073507
## COLESTEROL         302.770679  1       17.400307
## HDL                 11.782347  1        3.432542
## LDL                292.039712  1       17.089169
## TRIGLICERIDOS       14.923733  1        3.863125
## Alcohol              1.104113  1        1.050768
## Physical_Activity    1.102385  1        1.049945
## Hypertension         1.408572  1        1.186833
## Metabolic_Syndrome   1.440063  1        1.200026
## Smoking              1.106460  1        1.051884

# Modelo nulo 
modelo_nulo <- glm(Renal_Dysfunction ~ 1, data = data, family = binomial, na.action = na.exclude)

# Modelo completo 
modelo_completo <- glm(Renal_Dysfunction ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C + 
                       Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
                       Hypertension + Metabolic_Syndrome + Smoking,
                       data = data, family = binomial, na.action = na.exclude)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

OR <- exp(coef(modelo_completo))
CI <- exp(confint(modelo_completo))

## Waiting for profiling to be done...

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

p_values <- summary(modelo_completo)$coefficients[, 4]
OR_completo <- cbind(OR, CI, p_values)
print(OR_completo)

##                                 OR        2.5 %       97.5 %     p_values
## (Intercept)           2.972046e-26 9.783181e-39 6.843469e-18 9.906762e-07
## RAGE624A/T            2.859967e-01 4.856785e-02 1.318830e+00 1.276636e-01
## RAGE624T/T            3.129414e+00 9.899068e-02 6.737206e+01 4.784303e-01
## RAGE625A/G            1.723430e+00 2.653582e-01 1.182592e+01 5.655929e-01
## RAGE625G/G            2.849784e-01 8.791180e-03 1.090587e+01 4.848850e-01
## RAGE600C/T            3.454271e+00 4.760231e-01 2.952137e+01 2.278095e-01
## RAGE600T/T            2.421563e-01 6.114366e-06 4.133697e+03 9.523641e-01
## GLUCOSA               1.004487e+00 9.937222e-01 1.016056e+00 4.147866e-01
## HbA1C                 7.866421e-01 4.299093e-01 1.319531e+00 3.901660e-01
## SexMale               3.966918e-04 6.291856e-06 7.657044e-03 9.864037e-06
## EDAD                  1.456307e+00 1.277406e+00 1.759164e+00 2.681540e-06
## CREATININA            3.863772e+21 5.049601e+14 8.210687e+31 4.918100e-07
## HDL                   1.020537e+00 9.627918e-01 1.085281e+00 4.985750e-01
## AlcoholYes            3.894235e+00 7.285602e-01 2.551642e+01 1.249508e-01
## Physical_ActivityYes  1.044713e+00 2.010186e-01 5.057610e+00 9.565082e-01
## HypertensionYes       4.706162e-01 5.853386e-02 3.402803e+00 4.574523e-01
## Metabolic_SyndromeYes 2.112725e+00 2.399846e-01 2.140624e+01 5.042935e-01
## SmokingYes            8.495121e-01 1.116317e-01 6.420907e+00 8.721074e-01

summary(modelo_completo)

## 
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE624 + RAGE625 + RAGE600 + 
##     GLUCOSA + HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + 
##     Physical_Activity + Hypertension + Metabolic_Syndrome + Smoking, 
##     family = binomial, data = data, na.action = na.exclude)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -58.777962  12.011481  -4.893 9.91e-07 ***
## RAGE624A/T             -1.251775   0.821709  -1.523    0.128    
## RAGE624T/T              1.140846   1.609479   0.709    0.478    
## RAGE625A/G              0.544317   0.947373   0.575    0.566    
## RAGE625G/G             -1.255342   1.797281  -0.698    0.485    
## RAGE600C/T              1.239611   1.027852   1.206    0.228    
## RAGE600T/T             -1.418172  23.739770  -0.060    0.952    
## GLUCOSA                 0.004477   0.005489   0.815    0.415    
## HbA1C                  -0.239982   0.279271  -0.859    0.390    
## SexMale                -7.832351   1.771972  -4.420 9.86e-06 ***
## EDAD                    0.375904   0.080085   4.694 2.68e-06 ***
## CREATININA             49.705931   9.882915   5.029 4.92e-07 ***
## HDL                     0.020329   0.030040   0.677    0.499    
## AlcoholYes              1.359497   0.886058   1.534    0.125    
## Physical_ActivityYes    0.043742   0.802083   0.055    0.957    
## HypertensionYes        -0.753712   1.014351  -0.743    0.457    
## Metabolic_SyndromeYes   0.747979   1.120149   0.668    0.504    
## SmokingYes             -0.163093   1.013112  -0.161    0.872    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 515.314  on 371  degrees of freedom
## Residual deviance:  58.573  on 354  degrees of freedom
##   (63 observations deleted due to missingness)
## AIC: 94.573
## 
## Number of Fisher Scoring iterations: 13

#Se reliazo el analisis por clasificaciones de herencia (recesiva, dominante y aditiva) para el RAGE600 porque fue el que salio significativo
data$RAGE600_recesivo <- ifelse(data$RAGE600 == "T/T", 1, 0)

modelo_recesivo_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_recesivo + GLUCOSA + HbA1C + 
                               Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
                               Hypertension + Metabolic_Syndrome + Smoking,
                               data = data, family = binomial, na.action = na.exclude)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(modelo_recesivo_RAGE600)

## 
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_recesivo + GLUCOSA + 
##     HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
##     Hypertension + Metabolic_Syndrome + Smoking, family = binomial, 
##     data = data, na.action = na.exclude)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -56.098169  10.320765  -5.435 5.47e-08 ***
## RAGE600_recesivo        0.947286   6.085391   0.156   0.8763    
## GLUCOSA                 0.003675   0.004872   0.754   0.4507    
## HbA1C                  -0.150671   0.227105  -0.663   0.5070    
## SexMale                -7.347348   1.641857  -4.475 7.64e-06 ***
## EDAD                    0.358672   0.068469   5.239 1.62e-07 ***
## CREATININA             46.161902   8.439030   5.470 4.50e-08 ***
## HDL                     0.019306   0.024350   0.793   0.4279    
## AlcoholYes              1.600715   0.827693   1.934   0.0531 .  
## Physical_ActivityYes    0.240050   0.721354   0.333   0.7393    
## HypertensionYes        -0.306667   0.925772  -0.331   0.7405    
## Metabolic_SyndromeYes   0.396152   0.894111   0.443   0.6577    
## SmokingYes             -0.306855   0.990452  -0.310   0.7567    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 535.016  on 385  degrees of freedom
## Residual deviance:  64.856  on 373  degrees of freedom
##   (49 observations deleted due to missingness)
## AIC: 90.856
## 
## Number of Fisher Scoring iterations: 12

# Modelo dominante para RAGE600: agrupar C/C vs C/T y T/T
data$RAGE600_dominante <- ifelse(data$RAGE600 == "C/C", 0, 1)

modelo_dominante_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_dominante + GLUCOSA + HbA1C + 
                                Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
                                Hypertension + Metabolic_Syndrome + Smoking,
                                data = data, family = binomial, na.action = na.exclude)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(modelo_dominante_RAGE600)

## 
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_dominante + GLUCOSA + 
##     HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
##     Hypertension + Metabolic_Syndrome + Smoking, family = binomial, 
##     data = data, na.action = na.exclude)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -56.286640  10.420771  -5.401 6.61e-08 ***
## RAGE600_dominante       1.231020   1.004151   1.226   0.2202    
## GLUCOSA                 0.003702   0.004920   0.752   0.4518    
## HbA1C                  -0.129895   0.230476  -0.564   0.5730    
## SexMale                -7.472919   1.667263  -4.482 7.39e-06 ***
## EDAD                    0.353649   0.067788   5.217 1.82e-07 ***
## CREATININA             46.605314   8.597387   5.421 5.93e-08 ***
## HDL                     0.018900   0.024599   0.768   0.4423    
## AlcoholYes              1.566438   0.844928   1.854   0.0637 .  
## Physical_ActivityYes   -0.026516   0.784222  -0.034   0.9730    
## HypertensionYes        -0.488811   0.939500  -0.520   0.6029    
## Metabolic_SyndromeYes   0.585157   0.932735   0.627   0.5304    
## SmokingYes             -0.344431   1.016006  -0.339   0.7346    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 535.016  on 385  degrees of freedom
## Residual deviance:  63.283  on 373  degrees of freedom
##   (49 observations deleted due to missingness)
## AIC: 89.283
## 
## Number of Fisher Scoring iterations: 12

data$RAGE600_aditivo <- factor(data$RAGE600, levels = c("C/C", "C/T", "T/T"))


modelo_aditivo_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_aditivo + GLUCOSA + HbA1C + 
                              Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
                              Hypertension + Metabolic_Syndrome + Smoking,
                              data = data, family = binomial, na.action = na.exclude)

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(modelo_aditivo_RAGE600)

## 
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_aditivo + GLUCOSA + 
##     HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity + 
##     Hypertension + Metabolic_Syndrome + Smoking, family = binomial, 
##     data = data, na.action = na.exclude)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -56.287835  10.420148  -5.402 6.60e-08 ***
## RAGE600_aditivoC/T      1.242163   1.022184   1.215   0.2243    
## RAGE600_aditivoT/T      0.884744   5.855126   0.151   0.8799    
## GLUCOSA                 0.003697   0.004920   0.752   0.4523    
## HbA1C                  -0.128788   0.231497  -0.556   0.5780    
## SexMale                -7.479302   1.669408  -4.480 7.46e-06 ***
## EDAD                    0.353463   0.067846   5.210 1.89e-07 ***
## CREATININA             46.618270   8.596997   5.423 5.87e-08 ***
## HDL                     0.018878   0.024601   0.767   0.4429    
## AlcoholYes              1.564820   0.845571   1.851   0.0642 .  
## Physical_ActivityYes   -0.032334   0.790741  -0.041   0.9674    
## HypertensionYes        -0.487613   0.939262  -0.519   0.6037    
## Metabolic_SyndromeYes   0.585040   0.932267   0.628   0.5303    
## SmokingYes             -0.346485   1.016937  -0.341   0.7333    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 535.016  on 385  degrees of freedom
## Residual deviance:  63.279  on 372  degrees of freedom
##   (49 observations deleted due to missingness)
## AIC: 91.279
## 
## Number of Fisher Scoring iterations: 12

library(broom)
tidy(modelo_aditivo_RAGE600)

## # A tibble: 14 × 5
##    term                   estimate std.error statistic      p.value
##    <chr>                     <dbl>     <dbl>     <dbl>        <dbl>
##  1 (Intercept)           -56.3      10.4       -5.40   0.0000000660
##  2 RAGE600_aditivoC/T      1.24      1.02       1.22   0.224       
##  3 RAGE600_aditivoT/T      0.885     5.86       0.151  0.880       
##  4 GLUCOSA                 0.00370   0.00492    0.752  0.452       
##  5 HbA1C                  -0.129     0.231     -0.556  0.578       
##  6 SexMale                -7.48      1.67      -4.48   0.00000746  
##  7 EDAD                    0.353     0.0678     5.21   0.000000189 
##  8 CREATININA             46.6       8.60       5.42   0.0000000587
##  9 HDL                     0.0189    0.0246     0.767  0.443       
## 10 AlcoholYes              1.56      0.846      1.85   0.0642      
## 11 Physical_ActivityYes   -0.0323    0.791     -0.0409 0.967       
## 12 HypertensionYes        -0.488     0.939     -0.519  0.604       
## 13 Metabolic_SyndromeYes   0.585     0.932      0.628  0.530       
## 14 SmokingYes             -0.346     1.02      -0.341  0.733

tidy(modelo_dominante_RAGE600)

## # A tibble: 13 × 5
##    term                   estimate std.error statistic      p.value
##    <chr>                     <dbl>     <dbl>     <dbl>        <dbl>
##  1 (Intercept)           -56.3      10.4       -5.40   0.0000000661
##  2 RAGE600_dominante       1.23      1.00       1.23   0.220       
##  3 GLUCOSA                 0.00370   0.00492    0.752  0.452       
##  4 HbA1C                  -0.130     0.230     -0.564  0.573       
##  5 SexMale                -7.47      1.67      -4.48   0.00000739  
##  6 EDAD                    0.354     0.0678     5.22   0.000000182 
##  7 CREATININA             46.6       8.60       5.42   0.0000000593
##  8 HDL                     0.0189    0.0246     0.768  0.442       
##  9 AlcoholYes              1.57      0.845      1.85   0.0637      
## 10 Physical_ActivityYes   -0.0265    0.784     -0.0338 0.973       
## 11 HypertensionYes        -0.489     0.939     -0.520  0.603       
## 12 Metabolic_SyndromeYes   0.585     0.933      0.627  0.530       
## 13 SmokingYes             -0.344     1.02      -0.339  0.735

tidy(modelo_recesivo_RAGE600)

## # A tibble: 13 × 5
##    term                   estimate std.error statistic      p.value
##    <chr>                     <dbl>     <dbl>     <dbl>        <dbl>
##  1 (Intercept)           -56.1      10.3        -5.44  0.0000000547
##  2 RAGE600_recesivo        0.947     6.09        0.156 0.876       
##  3 GLUCOSA                 0.00368   0.00487     0.754 0.451       
##  4 HbA1C                  -0.151     0.227      -0.663 0.507       
##  5 SexMale                -7.35      1.64       -4.48  0.00000764  
##  6 EDAD                    0.359     0.0685      5.24  0.000000162 
##  7 CREATININA             46.2       8.44        5.47  0.0000000450
##  8 HDL                     0.0193    0.0243      0.793 0.428       
##  9 AlcoholYes              1.60      0.828       1.93  0.0531      
## 10 Physical_ActivityYes    0.240     0.721       0.333 0.739       
## 11 HypertensionYes        -0.307     0.926      -0.331 0.740       
## 12 Metabolic_SyndromeYes   0.396     0.894       0.443 0.658       
## 13 SmokingYes             -0.307     0.990      -0.310 0.757

AIC(modelo_recesivo_RAGE600, modelo_dominante_RAGE600, modelo_aditivo_RAGE600)

##                          df      AIC
## modelo_recesivo_RAGE600  13 90.85568
## modelo_dominante_RAGE600 13 89.28253
## modelo_aditivo_RAGE600   14 91.27889

data$RAGE600 <- factor(data$RAGE600, levels = c("C/C", "C/T", "T/T"))

modelo_simple_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600, data = data, family = binomial, na.action = na.exclude)

OR_simple <- exp(coef(modelo_simple_RAGE600))
CI_simple <- exp(confint(modelo_simple_RAGE600))

## Waiting for profiling to be done...

OR_simple_completo <- c(1, OR_simple)
names(OR_simple_completo)[1] <- "RAGE600C/C"

CI_simple_completo <- rbind(c(1, 1), CI_simple)
rownames(CI_simple_completo)[1] <- "RAGE600C/C"

OR_simple_completo

##  RAGE600C/C (Intercept)  RAGE600C/T  RAGE600T/T 
##   1.0000000   1.0517241   0.6614398   0.5282332

CI_simple_completo

##                 2.5 %   97.5 %
## RAGE600C/C  1.0000000 1.000000
## (Intercept) 0.8546139 1.294766
## RAGE600C/T  0.3327802 1.285589
## RAGE600T/T  0.1596413 1.560152

data$RAGE624 <- factor(data$RAGE624, levels = c("A/A", "A/T", "T/T"))

modelo_simple_RAGE624 <- glm(Renal_Dysfunction ~ RAGE624, data = data, family = binomial, na.action = na.exclude)

OR_simple_RAGE624 <- exp(coef(modelo_simple_RAGE624))
CI_simple_RAGE624 <- exp(confint(modelo_simple_RAGE624))

## Waiting for profiling to be done...

OR_simple_RAGE624_completo <- c(1, OR_simple_RAGE624)
names(OR_simple_RAGE624_completo)[1] <- "RAGE624A/A"

CI_simple_RAGE624_completo <- rbind(c(1, 1), CI_simple_RAGE624)
rownames(CI_simple_RAGE624_completo)[1] <- "RAGE624A/A"

OR_simple_RAGE624_completo

##  RAGE624A/A (Intercept)  RAGE624A/T  RAGE624T/T 
##   1.0000000   1.1750000   0.7178538   0.9511890

CI_simple_RAGE624_completo

##                 2.5 %   97.5 %
## RAGE624A/A  1.0000000 1.000000
## (Intercept) 0.8726444 1.585907
## RAGE624A/T  0.4793512 1.072612
## RAGE624T/T  0.4628013 1.967292

data$RAGE625 <- factor(data$RAGE625, levels = c("A/A", "A/G", "G/G"))


modelo_simple_RAGE625 <- glm(Renal_Dysfunction ~ RAGE625, data = data, family = binomial, na.action = na.exclude)

OR_simple_RAGE625 <- exp(coef(modelo_simple_RAGE625))
CI_simple_RAGE625 <- exp(confint(modelo_simple_RAGE625))

## Waiting for profiling to be done...

OR_simple_RAGE625_completo <- c(1, OR_simple_RAGE625)
names(OR_simple_RAGE625_completo)[1] <- "RAGE625A/A"

CI_simple_RAGE625_completo <- rbind(c(1, 1), CI_simple_RAGE625)
rownames(CI_simple_RAGE625_completo)[1] <- "RAGE625A/A"

OR_simple_RAGE625_completo

##  RAGE625A/A (Intercept)  RAGE625A/G  RAGE625G/G 
##   1.0000000   1.0597015   0.8762575   0.7339593

CI_simple_RAGE625_completo

##                 2.5 %   97.5 %
## RAGE625A/A  1.0000000 1.000000
## (Intercept) 0.8368844 1.342568
## RAGE625A/G  0.5606131 1.367672
## RAGE625G/G  0.3457577 1.529281

table(data$RAGE625)

## 
## A/A A/G G/G 
## 276 108  32

table(data$RAGE624)

## 
## A/A A/T T/T 
## 174 212  36

table(data$RAGE600)

## 
## C/C C/T T/T 
## 357  39  14

LauraAnalisys

Loranda_Calderon

2024-09-19