install.packages(c("tidyverse","gtsummary", "readxl", "ggpubr","cowplot"))

## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)

# Cargar las librerías necesarias
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)
library(readxl)

library(readxl)
X16AGOSTOFINAL_DR_FIDEL <- read_excel("16AGOSTOFINAL -DR.FIDEL.xlsx")

## Warning: Expecting numeric in Y432 / R432C25: got a date

## Warning: Expecting numeric in Y433 / R433C25: got a date

## New names:
## • `` -> `...6`

#View(X16AGOSTOFINAL_DR_FIDEL)
data<-X16AGOSTOFINAL_DR_FIDEL

names(data)

##  [1] "Registro"              "NOmbre"                "code"                 
##  [4] "Renal"                 "Sexo"                  "...6"                 
##  [7] "Edad"                  "Peso (kg)"             "Talla (cm)"           
## [10] "Cintura (cm)"          "Cadera (cm)"           "IC/C"                 
## [13] "IMC"                   "TFG"                   "Categoria calculadora"
## [16] "Tabla KDIGO"           "Glucosa"               "Urea"                 
## [19] "BUN"                   "BUN/CREA"              "Creatinina"           
## [22] "Colesterol"            "HDL"                   "LDL"                  
## [25] "VLDL"                  "Trigliceridos"         "HBA1c"                
## [28] "Fuma"                  "ALCOHOL"               "ACTIVIDAD FISICA"     
## [31] "TA actual"             "DIASTOLE"              "SISTOLE"              
## [34] "HTA"                   "SXMET"                 "RAGE 624"             
## [37] "RAGE625"               "RAGE600"

# Ensure `data` is a data frame
data <- as.data.frame(data)

# Recodificar las variables de interés incluyendo "Renal"
data <- data %>%
  mutate(
    Renal_Dysfunction = recode(Renal, `0` = "No Renal Dysfunction", `1` = "Renal Dysfunction"),
    Sex = recode(Sexo, "M" = "Male", "F" = "Female"),
    Alcohol = recode(ALCOHOL, "SI" = "Yes", "NO" = "No"),
    Physical_Activity = recode(`ACTIVIDAD FISICA`, "SI" = "Yes", "NO" = "No"),
    Hypertension = recode(HTA, "SI" = "Yes", "NO" = "No"),
    Metabolic_Syndrome = recode(SXMET, "SI" = "Yes", "NO" = "No"),
    Smoking = recode(Fuma, "SI" = "Yes", "NO" = "No")
  )



#Variebles filtradas:


# Corregir la variable "Alcohol", reemplazando "N0" por "No"
data <- data %>%
  mutate(Alcohol = recode(Alcohol, "N0" = "No"))
# Filtrar y eliminar los registros con valores "-" y "/"
data <- data %>%
  filter(Physical_Activity != "-", Physical_Activity != "/")
data <- data %>%
  mutate(
    BP_Category = if_else(`TA actual` > "120/80", "Above 120/80", "Below or Equal 120/80")
  )


names(data)

##  [1] "Registro"              "NOmbre"                "code"                 
##  [4] "Renal"                 "Sexo"                  "...6"                 
##  [7] "Edad"                  "Peso (kg)"             "Talla (cm)"           
## [10] "Cintura (cm)"          "Cadera (cm)"           "IC/C"                 
## [13] "IMC"                   "TFG"                   "Categoria calculadora"
## [16] "Tabla KDIGO"           "Glucosa"               "Urea"                 
## [19] "BUN"                   "BUN/CREA"              "Creatinina"           
## [22] "Colesterol"            "HDL"                   "LDL"                  
## [25] "VLDL"                  "Trigliceridos"         "HBA1c"                
## [28] "Fuma"                  "ALCOHOL"               "ACTIVIDAD FISICA"     
## [31] "TA actual"             "DIASTOLE"              "SISTOLE"              
## [34] "HTA"                   "SXMET"                 "RAGE 624"             
## [37] "RAGE625"               "RAGE600"               "Renal_Dysfunction"    
## [40] "Sex"                   "Alcohol"               "Physical_Activity"    
## [43] "Hypertension"          "Metabolic_Syndrome"    "Smoking"              
## [46] "BP_Category"

# Crear una tabla de resumen con las variables especificadas, agrupando por disfunción renal
summary_table <- data %>%
  select(
    Renal_Dysfunction, Sex, `Age (years)` = Edad, `Weight (kg)` = `Peso (kg)`, `Height (cm)`= `Talla (cm)`, IMC, 
    `Waist Circumference (cm)`= `Cintura (cm)`, `Hip Circumference (cm)` = `Cadera (cm)`,
    IC_C = `IC/C`, Smoking, Alcohol, Physical_Activity, 
    BP_Category = `BP_Category`, Systole = SISTOLE, Diastole = DIASTOLE, 
    Hypertension, Metabolic_Syndrome
  ) %>%
  tbl_summary(
    by = Renal_Dysfunction, # Agrupar por la variable dependiente "Renal_Dysfunction"
    missing = "no" # Excluye valores faltantes
  ) %>% add_overall() %>% 
  add_p() %>%
  modify_header(label = "**Variable**") %>%
  modify_caption("**Table: Demographic and Clinical Characteristics by Renal Dysfunction Status**") %>%
  as_gt() %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels()
  )

summary_table

**Table: Demographic and Clinical Characteristics by Renal Dysfunction Status**
Variable	Overall N = 427¹	No Renal Dysfunction N = 208¹	Renal Dysfunction N = 219¹	p-value²
Sex				0.054
Female	253 (59%)	133 (64%)	120 (55%)
Male	174 (41%)	75 (36%)	99 (45%)
Age (years)	61 (54, 67)	57 (51, 64)	64 (59, 70)	<0.001
Weight (kg)	78 (68, 91)	79 (70, 94)	78 (65, 89)	0.016
Height (cm)	150 (2, 162)	158 (2, 167)	2 (2, 157)	<0.001
IMC	29.9 (26.7, 34.1)	30.1 (27.0, 34.6)	29.9 (26.5, 33.0)	0.2
Waist Circumference (cm)	101 (91, 110)	101 (94, 110)	100 (91, 109)	0.4
Hip Circumference (cm)	105 (99, 111)	106 (99, 112)	104 (98, 111)	0.2
IC_C	0.95 (0.89, 1.02)	0.95 (0.89, 1.02)	0.95 (0.90, 1.02)	0.9
Smoking	75 (18%)	36 (17%)	39 (18%)	0.9
Alcohol	130 (30%)	75 (36%)	55 (25%)	0.014
Physical_Activity	239 (56%)	129 (62%)	110 (50%)	0.014
BP_Category				0.004
Above 120/80	223 (70%)	104 (63%)	119 (77%)
Below or Equal 120/80	97 (30%)	62 (37%)	35 (23%)
Systole	80 (70, 84)	80 (70, 82)	80 (70, 86)	>0.9
Diastole	130 (120, 144)	130 (120, 139)	138 (122, 152)	<0.001
Hypertension	303 (72%)	134 (64%)	169 (79%)	0.001
Metabolic_Syndrome	307 (72%)	154 (74%)	153 (70%)	0.3
¹ n (%); Median (Q1, Q3)
² Pearson’s Chi-squared test; Wilcoxon rank sum test

# Convert relevant columns to numeric, removing any non-numeric characters
# Convertir las variables necesarias a numéricas y recodificar "Renal"
data <- data %>%
  mutate(
    Cholesterol = as.numeric(gsub("[^0-9.]", "", Colesterol)),
    HDL = as.numeric(gsub("[^0-9.]", "", HDL)),
    LDL = as.numeric(gsub("[^0-9.]", "", LDL)),
    VLDL = as.numeric(gsub("[^0-9.]", "", VLDL)),
    Triglycerides = as.numeric(gsub("[^0-9.]", "", Trigliceridos)),
    Renal_Dysfunction = recode(Renal, `0` = "No", `1` = "Yes")
  )

library(ggpubr)
library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

## The following object is masked from 'package:lubridate':
## 
##     stamp

# Crear un tema común para asegurarse de que todos los gráficos tengan el mismo tamaño de ejes
theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
        axis.text = element_text(size = 10), legend.position = "none")

# Crear las gráficas de boxplot para cada variable del perfil lipídico, agrupadas por Renal Dysfunction
p1 <- ggplot(data, aes(x = Renal_Dysfunction, y = Cholesterol, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
  labs(title = "Cholesterol", y = "Cholesterol (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +  # Asegurar la misma escala para el eje y
  theme_custom

p2 <- ggplot(data, aes(x = Renal_Dysfunction, y = HDL, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 250) +
  labs(title = "HDLs", y = "HDL (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 300) +  # Asegurar la misma escala para el eje y
  theme_custom

p3 <- ggplot(data, aes(x = Renal_Dysfunction, y = LDL, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
  labs(title = "LDL", y = "LDL (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +  # Asegurar la misma escala para el eje y
  theme_custom

p4 <- ggplot(data, aes(x = Renal_Dysfunction, y = VLDL, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
  labs(title = "VLDL", y = "VLDL (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +  # Asegurar la misma escala para el eje y
  theme_custom

p5 <- ggplot(data, aes(x = Renal_Dysfunction, y = Triglycerides, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
  labs(title = "Triglycerides", y = "Triglycerides (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +  # Asegurar la misma escala para el eje y
  theme_custom

# Crear la leyenda personalizada con el título "Renal Dysfunction"
legend <- get_legend(
  ggplot(data, aes(x = Renal_Dysfunction, y = Triglycerides, fill = Renal_Dysfunction)) +
    geom_boxplot() +
    labs(fill = "Renal Dysfunction") +  # Cambiar la etiqueta de la leyenda aquí
    theme(legend.position = "bottom")
)

## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.

# Ajustar las proporciones relativas de los gráficos para garantizar el mismo tamaño de las celdas
top_row <- plot_grid(p1, p2, labels = c('A', 'B'), label_size = 12, ncol = 2, rel_widths = c(1, 1))

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

bottom_row <- plot_grid(p3, p4, p5, labels = c('C', 'D', 'E'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Combinar ambas filas y agregar la leyenda
combined_plot <- plot_grid(top_row, bottom_row, legend, ncol = 1, rel_heights = c(1, 1, 0.2))

# Mostrar el gráfico
print(combined_plot)

# Crear un tema común para asegurarse de que todos los gráficos tengan el mismo tamaño de ejes
theme_custom <- theme_pubr() + 
  theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
        axis.text = element_text(size = 10), legend.position = "none")

# Definir las posiciones de la significancia para los gráficos (ajustadas para cada escala de y)
significance_y_position_large <- 900
significance_y_position_small <- 110
significance_y_position_hba1c <- 13

# Crear las gráficas de boxplot para cada variable, agrupadas por Renal Dysfunction
p1 <- ggplot(data, aes(x = Renal_Dysfunction, y = TFG, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
  labs(title = "TFG", y = "TFG (mL/min/1.73m²)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +
  theme_custom

p2 <- ggplot(data, aes(x = Renal_Dysfunction, y = Glucosa, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
  labs(title = "Glucosa", y = "Glucosa (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +
  theme_custom

p3 <- ggplot(data, aes(x = Renal_Dysfunction, y = Urea, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
  labs(title = "Urea", y = "Urea (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +
  theme_custom

p4 <- ggplot(data, aes(x = Renal_Dysfunction, y = BUN, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
  labs(title = "BUN", y = "BUN (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 1000) +
  theme_custom

p5 <- ggplot(data, aes(x = Renal_Dysfunction, y = `BUN/CREA`, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_small) +
  labs(title = "BUN/CREA", y = "BUN/CREA") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 150) +
  theme_custom

p6 <- ggplot(data, aes(x = Renal_Dysfunction, y = Creatinina, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_small) +
  labs(title = "Creatinina", y = "Creatinina (mg/dL)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 150) +
  theme_custom

p7 <- ggplot(data, aes(x = Renal_Dysfunction, y = HBA1c, fill = Renal_Dysfunction)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_hba1c) +
  labs(title = "HBA1c", y = "HBA1c (%)") +
  scale_x_discrete(labels = c("No", "Yes")) +
  ylim(0, 15) +
  theme_custom

# Crear la leyenda personalizada con el título "Renal Dysfunction"
legend <- get_legend(
  ggplot(data, aes(x = Renal_Dysfunction, y = TFG, fill = Renal_Dysfunction)) +
    geom_boxplot() +
    labs(fill = "Renal Dysfunction") +  # Cambiar la etiqueta de la leyenda aquí
    theme(legend.position = "bottom")
)

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.

# Ajustar las proporciones relativas de los gráficos para garantizar el mismo tamaño de las celdas
top_row <- plot_grid(p1, p2, p3, labels = c('A', 'B', 'C'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

middle_row <- plot_grid(p4, p5, p6, labels = c('D', 'E', 'F'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

bottom_row <- plot_grid(p7, NULL, NULL, labels = c('G', '', ''), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))

## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Combinar todas las filas y agregar la leyenda
combined_plot <- plot_grid(top_row, middle_row, bottom_row, legend, ncol = 1, rel_heights = c(1, 1, 1, 0.2))

# Mostrar el gráfico
print(combined_plot)

# Install and load SNPassoc
#install.packages("SNPassoc")
library(SNPassoc)

## Registered S3 method overwritten by 'SNPassoc':
##   method            from       
##   summary.haplo.glm haplo.stats

# Assuming your data is in a data frame called 'genotype_data'
# and you have loaded the data into R, with the genotype columns and Renal_Dysfunction status
# Example data loading
# genotype_data <- read.csv("your_data_file.csv")
str(data)

## 'data.frame':    427 obs. of  48 variables:
##  $ Registro             : chr  "AM208" "AM212" "AM218" "AM222" ...
##  $ NOmbre               : chr  "Nava Chaides Ana Margarita" "Ochoa MoreNO Ma Belem" "Corral Ramirez HoNOria" "Ernesto Alfonso Perez Lerma" ...
##  $ code                 : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Renal                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Sexo                 : chr  "F" "F" "F" "M" ...
##  $ ...6                 : chr  "1" "1" "1" "0" ...
##  $ Edad                 : num  65 74 67 66 66 64 62 65 60 67 ...
##  $ Peso (kg)            : num  94.9 77.1 54 77.1 67.2 76.5 73.2 80.2 68.4 65.9 ...
##  $ Talla (cm)           : num  153 151 154 174 160 ...
##  $ Cintura (cm)         : num  110.1 111 77.2 93 85 ...
##  $ Cadera (cm)          : num  135.1 109.5 85 100 98.5 ...
##  $ IC/C                 : num  0.815 1.014 0.908 0.93 0.863 ...
##  $ IMC                  : num  40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
##  $ TFG                  : num  95.7 92.3 94.3 98.3 95 ...
##  $ Categoria calculadora: chr  "Función renal NOrmal" "Función renal NOrmal" "Función renal NOrmal" "Función renal NOrmal" ...
##  $ Tabla KDIGO          : chr  "NO" "NO" "NO" "NO" ...
##  $ Glucosa              : num  94 106 168 140 94 ...
##  $ Urea                 : num  24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN                  : num  11.3 16.2 11.9 15.1 13.4 ...
##  $ BUN/CREA             : num  18.9 23.2 19.9 21.5 22.3 ...
##  $ Creatinina           : num  0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ Colesterol           : chr  "210" "222" "123" "99" ...
##  $ HDL                  : num  47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL                  : num  131 142.4 79.2 52.6 71.6 ...
##  $ VLDL                 : num  32 26.6 17.8 17.4 29.4 ...
##  $ Trigliceridos        : chr  "160" "133" "89" "87" ...
##  $ HBA1c                : num  7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ Fuma                 : chr  "NO" "NO" "NO" "NO" ...
##  $ ALCOHOL              : chr  "NO" "NO" "NO" "SI" ...
##  $ ACTIVIDAD FISICA     : chr  "SI" "NO" "SI" "SI" ...
##  $ TA actual            : chr  "142/87" "122/68" "149/92" "145/83" ...
##  $ DIASTOLE             : num  142 122 149 145 115 145 132 118 NA NA ...
##  $ SISTOLE              : num  87 68 92 83 68 81 76 75 NA NA ...
##  $ HTA                  : chr  "SI" "SI" "SI" "SI" ...
##  $ SXMET                : chr  "SI" "SI" "NO" "SI" ...
##  $ RAGE 624             : chr  "A/A" "A/T" "A/T" "A/A" ...
##  $ RAGE625              : chr  "A/A" "A/A" "A/A" "A/A" ...
##  $ RAGE600              : chr  "C/C" "C/C" "C/C" "C/C" ...
##  $ Renal_Dysfunction    : chr  "No" "No" "No" "No" ...
##  $ Sex                  : chr  "Female" "Female" "Female" "Male" ...
##  $ Alcohol              : chr  "No" "No" "No" "Yes" ...
##  $ Physical_Activity    : chr  "Yes" "No" "Yes" "Yes" ...
##  $ Hypertension         : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ Metabolic_Syndrome   : chr  "Yes" "Yes" "No" "Yes" ...
##  $ Smoking              : chr  "No" "No" "No" "No" ...
##  $ BP_Category          : chr  "Above 120/80" "Above 120/80" "Above 120/80" "Above 120/80" ...
##  $ Cholesterol          : num  210 222 123 99 145 ...
##  $ Triglycerides        : num  160 133 89 87 147 ...

# Convert the columns to factors
data$`RAGE 624` <- as.factor(data$`RAGE 624`)
data$RAGE625 <- as.factor(data$RAGE625)
data$RAGE600 <- as.factor(data$RAGE600)
data$Renal_Dysfunction <- factor(data$Renal_Dysfunction, levels = c("No", "Yes"))

# Hay Nas, en los SNP hay que eliminarlos
 #Remove NAs
data <- na.omit(data)

# Alternatively, if using column indices
# You can also identify the indices of these columns manually if needed:
# For example:
snp_indices <- match(c("RAGE 624", "RAGE625", "RAGE600"), colnames(data))

# Then use these indices in setupSNP
SNPs <- setupSNP(data, colSNPs = snp_indices, sep = "/")

Visualize SNP summary

plot(SNPs$`RAGE.624`)

plot(SNPs$RAGE600)

plot(SNPs$RAGE625)

# You can also change the plot type to pie chart
plot(SNPs$RAGE.624, type = pie)

plot(SNPs$RAGE600, type = pie)

plot(SNPs$RAGE625, type = pie)

Perform association analysis for each SNP

RAGE 624

association(Renal_Dysfunction ~ RAGE.624, data = SNPs)

## 
## SNP: RAGE.624  adjusted by: 
##               No    % Yes    %   OR lower upper p-value   AIC
## Codominant                                                   
## A/A           54 38.6  50 43.9 1.00              0.5723 354.3
## A/T           78 55.7  56 49.1 0.78  0.46  1.30              
## T/T            8  5.7   8  7.0 1.08  0.38  3.09              
## Dominant                                                     
## A/A           54 38.6  50 43.9 1.00              0.3941 352.7
## A/T-T/T       86 61.4  64 56.1 0.80  0.49  1.33              
## Recessive                                                    
## A/A-A/T      132 94.3 106 93.0 1.00              0.6714 353.3
## T/T            8  5.7   8  7.0 1.25  0.45  3.43              
## Overdominant                                                 
## A/A-T/T       62 44.3  58 50.9 1.00              0.2952 352.4
## A/T           78 55.7  56 49.1 0.77  0.47  1.26              
## log-Additive                                                 
## 0,1,2        140 55.1 114 44.9 0.89  0.59  1.36  0.5944 353.2

RAGE 625

# Perform association analysis for one SNP
association(Renal_Dysfunction ~ RAGE625, data = SNPs)

## 
## SNP: RAGE625  adjusted by: 
##               No    % Yes    %   OR lower upper p-value   AIC
## Codominant                                                   
## A/A           94 67.1  88 77.2 1.00             0.14437 351.6
## A/G           36 25.7  18 15.8 0.53  0.28  1.01              
## G/G           10  7.1   8  7.0 0.85  0.32  2.26              
## Dominant                                                     
## A/A           94 67.1  88 77.2 1.00             0.07541 350.3
## A/G-G/G       46 32.9  26 22.8 0.60  0.34  1.06              
## Recessive                                                    
## A/A-A/G      130 92.9 106 93.0 1.00             0.96911 353.5
## G/G           10  7.1   8  7.0 0.98  0.37  2.57              
## Overdominant                                                 
## A/A-G/G      104 74.3  96 84.2 1.00             0.05217 349.7
## A/G           36 25.7  18 15.8 0.54  0.29  1.02              
## log-Additive                                                 
## 0,1,2        140 55.1 114 44.9 0.76  0.50  1.15 0.18215 351.7

RAGE 600

# Perform association analysis for one SNP
association(Renal_Dysfunction ~ RAGE600, data = SNPs)

## 
## SNP: RAGE600  adjusted by: 
##               No    % Yes    %   OR lower upper p-value   AIC
## Codominant                                                   
## C/C          121 86.4  95 83.3 1.00              0.7411 354.9
## C/T           13  9.3  14 12.3 1.37  0.62  3.06              
## T/T            6  4.3   5  4.4 1.06  0.31  3.58              
## Dominant                                                     
## C/C          121 86.4  95 83.3 1.00              0.4925 353.0
## C/T-T/T       19 13.6  19 16.7 1.27  0.64  2.54              
## Recessive                                                    
## C/C-C/T      134 95.7 109 95.6 1.00              0.9689 353.5
## T/T            6  4.3   5  4.4 1.02  0.30  3.45              
## Overdominant                                                 
## C/C-T/T      127 90.7 100 87.7 1.00              0.4424 352.9
## C/T           13  9.3  14 12.3 1.37  0.62  3.04              
## log-Additive                                                 
## 0,1,2        140 55.1 114 44.9 1.14  0.69  1.88  0.6076 353.2

library(broom)

# Supongamos que tus datos están en un dataframe llamado `data`
# y que `RAGE624`, `RAGE625`, y `RAGE600` son variables en el dataframe
# con los genotipos categorizados como "GG", "GT", "TT".
# También asumo que tienes una variable llamada `Renal_Dysfunction` que indica "No" (Control) y "Yes" (Cases)

# Reestructuramos los datos para prepararlos para la tabla
data_long <- data %>%
  pivot_longer(cols = c(`RAGE 624`, RAGE625, RAGE600), names_to = "Genotype", values_to = "Value")

# Ahora creamos la tabla
table1 <- data_long %>%
  group_by(Genotype, Value, Renal_Dysfunction) %>%
  summarize(n = n(), .groups = "drop") %>%  # Añadido .groups = "drop" para evitar la advertencia
  pivot_wider(names_from = Renal_Dysfunction, values_from = n) %>%
  mutate(
    OR = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$estimate),
    CI = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$conf.low),
    CI_upper = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$conf.high),
    P_value = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$p.value)
  ) %>%
  unnest(c(OR, CI, CI_upper, P_value)) %>%
  mutate(OR_CI = paste0(OR, " (", CI, "-", CI_upper, ")")) %>%
  select(Genotype, Value, No, Yes, OR_CI, P_value)

# Mostrar la tabla
print(table1)

## # A tibble: 9 × 6
##   Genotype Value    No   Yes OR_CI                                       P_value
##   <chr>    <fct> <int> <int> <chr>                                         <dbl>
## 1 RAGE 624 A/A      54    50 1.16553194137239 (0.653118447334434-2.0837… 6.77e-1
## 2 RAGE 624 A/T      78    56 1.93518530449083 (1.16039783828394-3.24632… 1.02e-2
## 3 RAGE 624 T/T       8     8 1 (0.202690860106555-4.93362157264663)      1   e+0
## 4 RAGE600  T/T       6     5 1.41616878357433 (0.201946442814773-10.465… 1   e+0
## 5 RAGE600  C/C     121    95 1.62047333730265 (1.08987350254629-2.41528… 1.60e-2
## 6 RAGE600  C/T      13    14 0.864619774264054 (0.260006596799489-2.852… 1   e+0
## 7 RAGE625  A/A      94    88 1.14059575273659 (0.740743570893102-1.7578… 6.00e-1
## 8 RAGE625  A/G      36    18 3.94482767133564 (1.67432685133207-9.64125… 9.78e-4
## 9 RAGE625  G/G      10     8 1.54304784588958 (0.348842898178861-7.0843… 7.40e-1

HWE SNPs

summary(SNPs, print=FALSE)

##          alleles major.allele.freq HWE      missing (%)
## RAGE.624 A/T     67.3              0.001647 0          
## RAGE625  A/G     82.3              0.000059 0          
## RAGE600  C/T     90.4              0.000001 0

Adjusted

association(Renal_Dysfunction ~ RAGE600 +Sex+ Smoking + Alcohol + Hypertension , SNPs)

## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!

## 
## SNP: RAGE600  adjusted by: Sex Smoking Alcohol Hypertension 
##               No    % Yes    %   OR lower upper p-value   AIC
## Codominant                                                   
## C/C          121 86.4  95 83.3 1.00              0.7006 346.4
## C/T           13  9.3  14 12.3 1.39  0.61  3.18              
## T/T            6  4.3   5  4.4 0.84  0.24  2.94              
## Dominant                                                     
## C/C          121 86.4  95 83.3 1.00              0.6163 344.9
## C/T-T/T       19 13.6  19 16.7 1.20  0.59  2.46              
## Recessive                                                    
## C/C-C/T      134 95.7 109 95.6 1.00              0.7383 345.0
## T/T            6  4.3   5  4.4 0.81  0.23  2.80              
## Overdominant                                                 
## C/C-T/T      127 90.7 100 87.7 1.00              0.4234 344.5
## C/T           13  9.3  14 12.3 1.40  0.61  3.20              
## log-Additive                                                 
## 0,1,2        140 55.1 114 44.9 1.06  0.63  1.78  0.8241 345.1

#CONSTRUCCION DE MODELOS DE REGRESIÓN

GMDR Analysis

# Example of fitting a logistic regression model with interaction terms
fit <- glm(Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 * Alcohol + `RAGE600` * Smoking, data = data, family = binomial)

# Display the summary of the model
summary(fit)

## 
## Call:
## glm(formula = Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 * 
##     Alcohol + RAGE600 * Smoking, family = binomial, data = data)
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                 0.12778    0.26551   0.481   0.6303  
## `RAGE 624`A/T              -0.17054    0.29590  -0.576   0.5644  
## `RAGE 624`T/T               0.06785    0.60550   0.112   0.9108  
## SmokingYes                 -0.06934    0.58284  -0.119   0.9053  
## RAGE625A/G                 -0.72992    0.43431  -1.681   0.0928 .
## RAGE625G/G                 -0.70655    0.75448  -0.936   0.3490  
## AlcoholYes                 -0.24637    0.32704  -0.753   0.4512  
## RAGE600C/T                  0.42077    0.46585   0.903   0.3664  
## RAGE600T/T                 -0.25886    0.70162  -0.369   0.7122  
## `RAGE 624`A/T:SmokingYes   -0.37962    0.77627  -0.489   0.6248  
## `RAGE 624`T/T:SmokingYes  -15.44598 1029.12179  -0.015   0.9880  
## RAGE625A/G:AlcoholYes       0.25968    0.67510   0.385   0.7005  
## RAGE625G/G:AlcoholYes       0.99723    1.06591   0.936   0.3495  
## SmokingYes:RAGE600C/T      -0.32471    1.16103  -0.280   0.7797  
## SmokingYes:RAGE600T/T      31.10032 1782.49091   0.017   0.9861  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 349.45  on 253  degrees of freedom
## Residual deviance: 337.33  on 239  degrees of freedom
## AIC: 367.33
## 
## Number of Fisher Scoring iterations: 14

# Example: Convert SNPs and outcome to the appropriate format
data$`RAGE 624` <- as.factor(data$`RAGE 624`)
data$`RAGE625` <- as.factor(data$`RAGE625`)
data$`RAGE600` <- as.factor(data$`RAGE600`)
data$Renal_Dysfunction <- as.numeric(data$Renal_Dysfunction) - 1  # Convert Yes/No to 1/0

# Load necessary libraries
library(broom)
library(knitr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

# Fit the model
fit <- glm(formula = Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 * Alcohol + RAGE600 * Smoking, 
           family = binomial, data = data)

# Tidy the model output using broom
tidy_fit <- tidy(fit)

# Format the p-values
tidy_fit <- tidy_fit %>%
  mutate(p.value = ifelse(p.value < 0.001, "<0.001", round(p.value, 4)))

# Rename columns for clarity
tidy_fit <- tidy_fit %>%
  rename(
    Term = term,
    Estimate = estimate,
    `Std. Error` = std.error,
    `Z value` = statistic,
    `P-value` = p.value
  )

# Create the table using kable and kableExtra
table <- tidy_fit %>%
  kable("html", escape = FALSE, align = "c", caption = "Logistic Regression Results") %>%
  kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = T) %>%
  add_header_above(c(" " = 1, "Logistic Regression Model" = 4))

# Print the table
table

Logistic Regression Results
	Logistic Regression Model
Term	Estimate	Std. Error	Z value	P-value
(Intercept)	0.1277765	0.2655147	0.4812407	0.6303
`RAGE 624`A/T	-0.1705449	0.2959000	-0.5763601	0.5644
`RAGE 624`T/T	0.0678493	0.6055031	0.1120544	0.9108
SmokingYes	-0.0693445	0.5828440	-0.1189762	0.9053
RAGE625A/G	-0.7299224	0.4343058	-1.6806648	0.0928
RAGE625G/G	-0.7065462	0.7544832	-0.9364637	0.3490
AlcoholYes	-0.2463719	0.3270359	-0.7533481	0.4512
RAGE600C/T	0.4207702	0.4658480	0.9032350	0.3664
RAGE600T/T	-0.2588598	0.7016192	-0.3689462	0.7122
`RAGE 624`A/T:SmokingYes	-0.3796185	0.7762695	-0.4890293	0.6248
`RAGE 624`T/T:SmokingYes	-15.4459775	1029.1217914	-0.0150089	0.9880
RAGE625A/G:AlcoholYes	0.2596830	0.6750952	0.3846613	0.7005
RAGE625G/G:AlcoholYes	0.9972271	1.0659109	0.9355633	0.3495
SmokingYes:RAGE600C/T	-0.3247134	1.1610279	-0.2796775	0.7797
SmokingYes:RAGE600T/T	31.1003153	1782.4909102	0.0174477	0.9861

análisisLaura

2024-08-18

Visualize SNP summary

Perform association analysis for each SNP

RAGE 624

RAGE 625

RAGE 600

HWE SNPs

Adjusted

GMDR Analysis