install.packages(c("tidyverse","gtsummary", "readxl", "ggpubr","cowplot"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Cargar las librerías necesarias
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
library(readxl)
library(readxl)
X16AGOSTOFINAL_DR_FIDEL <- read_excel("16AGOSTOFINAL -DR.FIDEL.xlsx")
## Warning: Expecting numeric in Y432 / R432C25: got a date
## Warning: Expecting numeric in Y433 / R433C25: got a date
## New names:
## • `` -> `...6`
#View(X16AGOSTOFINAL_DR_FIDEL)
data<-X16AGOSTOFINAL_DR_FIDEL
names(data)
## [1] "Registro" "NOmbre" "code"
## [4] "Renal" "Sexo" "...6"
## [7] "Edad" "Peso (kg)" "Talla (cm)"
## [10] "Cintura (cm)" "Cadera (cm)" "IC/C"
## [13] "IMC" "TFG" "Categoria calculadora"
## [16] "Tabla KDIGO" "Glucosa" "Urea"
## [19] "BUN" "BUN/CREA" "Creatinina"
## [22] "Colesterol" "HDL" "LDL"
## [25] "VLDL" "Trigliceridos" "HBA1c"
## [28] "Fuma" "ALCOHOL" "ACTIVIDAD FISICA"
## [31] "TA actual" "DIASTOLE" "SISTOLE"
## [34] "HTA" "SXMET" "RAGE 624"
## [37] "RAGE625" "RAGE600"
# Ensure `data` is a data frame
data <- as.data.frame(data)
# Recodificar las variables de interés incluyendo "Renal"
data <- data %>%
mutate(
Renal_Dysfunction = recode(Renal, `0` = "No Renal Dysfunction", `1` = "Renal Dysfunction"),
Sex = recode(Sexo, "M" = "Male", "F" = "Female"),
Alcohol = recode(ALCOHOL, "SI" = "Yes", "NO" = "No"),
Physical_Activity = recode(`ACTIVIDAD FISICA`, "SI" = "Yes", "NO" = "No"),
Hypertension = recode(HTA, "SI" = "Yes", "NO" = "No"),
Metabolic_Syndrome = recode(SXMET, "SI" = "Yes", "NO" = "No"),
Smoking = recode(Fuma, "SI" = "Yes", "NO" = "No")
)
#Variebles filtradas:
# Corregir la variable "Alcohol", reemplazando "N0" por "No"
data <- data %>%
mutate(Alcohol = recode(Alcohol, "N0" = "No"))
# Filtrar y eliminar los registros con valores "-" y "/"
data <- data %>%
filter(Physical_Activity != "-", Physical_Activity != "/")
data <- data %>%
mutate(
BP_Category = if_else(`TA actual` > "120/80", "Above 120/80", "Below or Equal 120/80")
)
names(data)
## [1] "Registro" "NOmbre" "code"
## [4] "Renal" "Sexo" "...6"
## [7] "Edad" "Peso (kg)" "Talla (cm)"
## [10] "Cintura (cm)" "Cadera (cm)" "IC/C"
## [13] "IMC" "TFG" "Categoria calculadora"
## [16] "Tabla KDIGO" "Glucosa" "Urea"
## [19] "BUN" "BUN/CREA" "Creatinina"
## [22] "Colesterol" "HDL" "LDL"
## [25] "VLDL" "Trigliceridos" "HBA1c"
## [28] "Fuma" "ALCOHOL" "ACTIVIDAD FISICA"
## [31] "TA actual" "DIASTOLE" "SISTOLE"
## [34] "HTA" "SXMET" "RAGE 624"
## [37] "RAGE625" "RAGE600" "Renal_Dysfunction"
## [40] "Sex" "Alcohol" "Physical_Activity"
## [43] "Hypertension" "Metabolic_Syndrome" "Smoking"
## [46] "BP_Category"
# Crear una tabla de resumen con las variables especificadas, agrupando por disfunción renal
summary_table <- data %>%
select(
Renal_Dysfunction, Sex, `Age (years)` = Edad, `Weight (kg)` = `Peso (kg)`, `Height (cm)`= `Talla (cm)`, IMC,
`Waist Circumference (cm)`= `Cintura (cm)`, `Hip Circumference (cm)` = `Cadera (cm)`,
IC_C = `IC/C`, Smoking, Alcohol, Physical_Activity,
BP_Category = `BP_Category`, Systole = SISTOLE, Diastole = DIASTOLE,
Hypertension, Metabolic_Syndrome
) %>%
tbl_summary(
by = Renal_Dysfunction, # Agrupar por la variable dependiente "Renal_Dysfunction"
missing = "no" # Excluye valores faltantes
) %>% add_overall() %>%
add_p() %>%
modify_header(label = "**Variable**") %>%
modify_caption("**Table: Demographic and Clinical Characteristics by Renal Dysfunction Status**") %>%
as_gt() %>%
gt::tab_style(
style = gt::cell_text(weight = "bold"),
locations = gt::cells_column_labels()
)
summary_table
| Variable | Overall N = 4271 |
No Renal Dysfunction N = 2081 |
Renal Dysfunction N = 2191 |
p-value2 |
|---|---|---|---|---|
| Sex | 0.054 | |||
| Female | 253 (59%) | 133 (64%) | 120 (55%) | |
| Male | 174 (41%) | 75 (36%) | 99 (45%) | |
| Age (years) | 61 (54, 67) | 57 (51, 64) | 64 (59, 70) | <0.001 |
| Weight (kg) | 78 (68, 91) | 79 (70, 94) | 78 (65, 89) | 0.016 |
| Height (cm) | 150 (2, 162) | 158 (2, 167) | 2 (2, 157) | <0.001 |
| IMC | 29.9 (26.7, 34.1) | 30.1 (27.0, 34.6) | 29.9 (26.5, 33.0) | 0.2 |
| Waist Circumference (cm) | 101 (91, 110) | 101 (94, 110) | 100 (91, 109) | 0.4 |
| Hip Circumference (cm) | 105 (99, 111) | 106 (99, 112) | 104 (98, 111) | 0.2 |
| IC_C | 0.95 (0.89, 1.02) | 0.95 (0.89, 1.02) | 0.95 (0.90, 1.02) | 0.9 |
| Smoking | 75 (18%) | 36 (17%) | 39 (18%) | 0.9 |
| Alcohol | 130 (30%) | 75 (36%) | 55 (25%) | 0.014 |
| Physical_Activity | 239 (56%) | 129 (62%) | 110 (50%) | 0.014 |
| BP_Category | 0.004 | |||
| Above 120/80 | 223 (70%) | 104 (63%) | 119 (77%) | |
| Below or Equal 120/80 | 97 (30%) | 62 (37%) | 35 (23%) | |
| Systole | 80 (70, 84) | 80 (70, 82) | 80 (70, 86) | >0.9 |
| Diastole | 130 (120, 144) | 130 (120, 139) | 138 (122, 152) | <0.001 |
| Hypertension | 303 (72%) | 134 (64%) | 169 (79%) | 0.001 |
| Metabolic_Syndrome | 307 (72%) | 154 (74%) | 153 (70%) | 0.3 |
| 1 n (%); Median (Q1, Q3) | ||||
| 2 Pearson’s Chi-squared test; Wilcoxon rank sum test | ||||
# Convert relevant columns to numeric, removing any non-numeric characters
# Convertir las variables necesarias a numéricas y recodificar "Renal"
data <- data %>%
mutate(
Cholesterol = as.numeric(gsub("[^0-9.]", "", Colesterol)),
HDL = as.numeric(gsub("[^0-9.]", "", HDL)),
LDL = as.numeric(gsub("[^0-9.]", "", LDL)),
VLDL = as.numeric(gsub("[^0-9.]", "", VLDL)),
Triglycerides = as.numeric(gsub("[^0-9.]", "", Trigliceridos)),
Renal_Dysfunction = recode(Renal, `0` = "No", `1` = "Yes")
)
library(ggpubr)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## The following object is masked from 'package:lubridate':
##
## stamp
# Crear un tema común para asegurarse de que todos los gráficos tengan el mismo tamaño de ejes
theme_custom <- theme_pubr() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text = element_text(size = 10), legend.position = "none")
# Crear las gráficas de boxplot para cada variable del perfil lipídico, agrupadas por Renal Dysfunction
p1 <- ggplot(data, aes(x = Renal_Dysfunction, y = Cholesterol, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
labs(title = "Cholesterol", y = "Cholesterol (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) + # Asegurar la misma escala para el eje y
theme_custom
p2 <- ggplot(data, aes(x = Renal_Dysfunction, y = HDL, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 250) +
labs(title = "HDLs", y = "HDL (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 300) + # Asegurar la misma escala para el eje y
theme_custom
p3 <- ggplot(data, aes(x = Renal_Dysfunction, y = LDL, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
labs(title = "LDL", y = "LDL (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) + # Asegurar la misma escala para el eje y
theme_custom
p4 <- ggplot(data, aes(x = Renal_Dysfunction, y = VLDL, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
labs(title = "VLDL", y = "VLDL (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) + # Asegurar la misma escala para el eje y
theme_custom
p5 <- ggplot(data, aes(x = Renal_Dysfunction, y = Triglycerides, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = 900) +
labs(title = "Triglycerides", y = "Triglycerides (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) + # Asegurar la misma escala para el eje y
theme_custom
# Crear la leyenda personalizada con el título "Renal Dysfunction"
legend <- get_legend(
ggplot(data, aes(x = Renal_Dysfunction, y = Triglycerides, fill = Renal_Dysfunction)) +
geom_boxplot() +
labs(fill = "Renal Dysfunction") + # Cambiar la etiqueta de la leyenda aquí
theme(legend.position = "bottom")
)
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.
# Ajustar las proporciones relativas de los gráficos para garantizar el mismo tamaño de las celdas
top_row <- plot_grid(p1, p2, labels = c('A', 'B'), label_size = 12, ncol = 2, rel_widths = c(1, 1))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
bottom_row <- plot_grid(p3, p4, p5, labels = c('C', 'D', 'E'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Combinar ambas filas y agregar la leyenda
combined_plot <- plot_grid(top_row, bottom_row, legend, ncol = 1, rel_heights = c(1, 1, 0.2))
# Mostrar el gráfico
print(combined_plot)
# Crear un tema común para asegurarse de que todos los gráficos tengan el mismo tamaño de ejes
theme_custom <- theme_pubr() +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
axis.text = element_text(size = 10), legend.position = "none")
# Definir las posiciones de la significancia para los gráficos (ajustadas para cada escala de y)
significance_y_position_large <- 900
significance_y_position_small <- 110
significance_y_position_hba1c <- 13
# Crear las gráficas de boxplot para cada variable, agrupadas por Renal Dysfunction
p1 <- ggplot(data, aes(x = Renal_Dysfunction, y = TFG, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
labs(title = "TFG", y = "TFG (mL/min/1.73m²)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) +
theme_custom
p2 <- ggplot(data, aes(x = Renal_Dysfunction, y = Glucosa, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
labs(title = "Glucosa", y = "Glucosa (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) +
theme_custom
p3 <- ggplot(data, aes(x = Renal_Dysfunction, y = Urea, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
labs(title = "Urea", y = "Urea (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) +
theme_custom
p4 <- ggplot(data, aes(x = Renal_Dysfunction, y = BUN, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_large) +
labs(title = "BUN", y = "BUN (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 1000) +
theme_custom
p5 <- ggplot(data, aes(x = Renal_Dysfunction, y = `BUN/CREA`, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_small) +
labs(title = "BUN/CREA", y = "BUN/CREA") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 150) +
theme_custom
p6 <- ggplot(data, aes(x = Renal_Dysfunction, y = Creatinina, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_small) +
labs(title = "Creatinina", y = "Creatinina (mg/dL)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 150) +
theme_custom
p7 <- ggplot(data, aes(x = Renal_Dysfunction, y = HBA1c, fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5) +
stat_compare_means(aes(label = ..p.signif..), label.x = 1.5, label.y = significance_y_position_hba1c) +
labs(title = "HBA1c", y = "HBA1c (%)") +
scale_x_discrete(labels = c("No", "Yes")) +
ylim(0, 15) +
theme_custom
# Crear la leyenda personalizada con el título "Renal Dysfunction"
legend <- get_legend(
ggplot(data, aes(x = Renal_Dysfunction, y = TFG, fill = Renal_Dysfunction)) +
geom_boxplot() +
labs(fill = "Renal Dysfunction") + # Cambiar la etiqueta de la leyenda aquí
theme(legend.position = "bottom")
)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.
# Ajustar las proporciones relativas de los gráficos para garantizar el mismo tamaño de las celdas
top_row <- plot_grid(p1, p2, p3, labels = c('A', 'B', 'C'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
middle_row <- plot_grid(p4, p5, p6, labels = c('D', 'E', 'F'), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
bottom_row <- plot_grid(p7, NULL, NULL, labels = c('G', '', ''), label_size = 12, ncol = 3, rel_widths = c(1, 1, 1))
## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Combinar todas las filas y agregar la leyenda
combined_plot <- plot_grid(top_row, middle_row, bottom_row, legend, ncol = 1, rel_heights = c(1, 1, 1, 0.2))
# Mostrar el gráfico
print(combined_plot)
# Install and load SNPassoc
#install.packages("SNPassoc")
library(SNPassoc)
## Registered S3 method overwritten by 'SNPassoc':
## method from
## summary.haplo.glm haplo.stats
# Assuming your data is in a data frame called 'genotype_data'
# and you have loaded the data into R, with the genotype columns and Renal_Dysfunction status
# Example data loading
# genotype_data <- read.csv("your_data_file.csv")
str(data)
## 'data.frame': 427 obs. of 48 variables:
## $ Registro : chr "AM208" "AM212" "AM218" "AM222" ...
## $ NOmbre : chr "Nava Chaides Ana Margarita" "Ochoa MoreNO Ma Belem" "Corral Ramirez HoNOria" "Ernesto Alfonso Perez Lerma" ...
## $ code : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Renal : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Sexo : chr "F" "F" "F" "M" ...
## $ ...6 : chr "1" "1" "1" "0" ...
## $ Edad : num 65 74 67 66 66 64 62 65 60 67 ...
## $ Peso (kg) : num 94.9 77.1 54 77.1 67.2 76.5 73.2 80.2 68.4 65.9 ...
## $ Talla (cm) : num 153 151 154 174 160 ...
## $ Cintura (cm) : num 110.1 111 77.2 93 85 ...
## $ Cadera (cm) : num 135.1 109.5 85 100 98.5 ...
## $ IC/C : num 0.815 1.014 0.908 0.93 0.863 ...
## $ IMC : num 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
## $ TFG : num 95.7 92.3 94.3 98.3 95 ...
## $ Categoria calculadora: chr "Función renal NOrmal" "Función renal NOrmal" "Función renal NOrmal" "Función renal NOrmal" ...
## $ Tabla KDIGO : chr "NO" "NO" "NO" "NO" ...
## $ Glucosa : num 94 106 168 140 94 ...
## $ Urea : num 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num 11.3 16.2 11.9 15.1 13.4 ...
## $ BUN/CREA : num 18.9 23.2 19.9 21.5 22.3 ...
## $ Creatinina : num 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ Colesterol : chr "210" "222" "123" "99" ...
## $ HDL : num 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num 32 26.6 17.8 17.4 29.4 ...
## $ Trigliceridos : chr "160" "133" "89" "87" ...
## $ HBA1c : num 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ Fuma : chr "NO" "NO" "NO" "NO" ...
## $ ALCOHOL : chr "NO" "NO" "NO" "SI" ...
## $ ACTIVIDAD FISICA : chr "SI" "NO" "SI" "SI" ...
## $ TA actual : chr "142/87" "122/68" "149/92" "145/83" ...
## $ DIASTOLE : num 142 122 149 145 115 145 132 118 NA NA ...
## $ SISTOLE : num 87 68 92 83 68 81 76 75 NA NA ...
## $ HTA : chr "SI" "SI" "SI" "SI" ...
## $ SXMET : chr "SI" "SI" "NO" "SI" ...
## $ RAGE 624 : chr "A/A" "A/T" "A/T" "A/A" ...
## $ RAGE625 : chr "A/A" "A/A" "A/A" "A/A" ...
## $ RAGE600 : chr "C/C" "C/C" "C/C" "C/C" ...
## $ Renal_Dysfunction : chr "No" "No" "No" "No" ...
## $ Sex : chr "Female" "Female" "Female" "Male" ...
## $ Alcohol : chr "No" "No" "No" "Yes" ...
## $ Physical_Activity : chr "Yes" "No" "Yes" "Yes" ...
## $ Hypertension : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Metabolic_Syndrome : chr "Yes" "Yes" "No" "Yes" ...
## $ Smoking : chr "No" "No" "No" "No" ...
## $ BP_Category : chr "Above 120/80" "Above 120/80" "Above 120/80" "Above 120/80" ...
## $ Cholesterol : num 210 222 123 99 145 ...
## $ Triglycerides : num 160 133 89 87 147 ...
# Convert the columns to factors
data$`RAGE 624` <- as.factor(data$`RAGE 624`)
data$RAGE625 <- as.factor(data$RAGE625)
data$RAGE600 <- as.factor(data$RAGE600)
data$Renal_Dysfunction <- factor(data$Renal_Dysfunction, levels = c("No", "Yes"))
# Hay Nas, en los SNP hay que eliminarlos
#Remove NAs
data <- na.omit(data)
# Alternatively, if using column indices
# You can also identify the indices of these columns manually if needed:
# For example:
snp_indices <- match(c("RAGE 624", "RAGE625", "RAGE600"), colnames(data))
# Then use these indices in setupSNP
SNPs <- setupSNP(data, colSNPs = snp_indices, sep = "/")
plot(SNPs$`RAGE.624`)
plot(SNPs$RAGE600)
plot(SNPs$RAGE625)
# You can also change the plot type to pie chart
plot(SNPs$RAGE.624, type = pie)
plot(SNPs$RAGE600, type = pie)
plot(SNPs$RAGE625, type = pie)
association(Renal_Dysfunction ~ RAGE.624, data = SNPs)
##
## SNP: RAGE.624 adjusted by:
## No % Yes % OR lower upper p-value AIC
## Codominant
## A/A 54 38.6 50 43.9 1.00 0.5723 354.3
## A/T 78 55.7 56 49.1 0.78 0.46 1.30
## T/T 8 5.7 8 7.0 1.08 0.38 3.09
## Dominant
## A/A 54 38.6 50 43.9 1.00 0.3941 352.7
## A/T-T/T 86 61.4 64 56.1 0.80 0.49 1.33
## Recessive
## A/A-A/T 132 94.3 106 93.0 1.00 0.6714 353.3
## T/T 8 5.7 8 7.0 1.25 0.45 3.43
## Overdominant
## A/A-T/T 62 44.3 58 50.9 1.00 0.2952 352.4
## A/T 78 55.7 56 49.1 0.77 0.47 1.26
## log-Additive
## 0,1,2 140 55.1 114 44.9 0.89 0.59 1.36 0.5944 353.2
# Perform association analysis for one SNP
association(Renal_Dysfunction ~ RAGE625, data = SNPs)
##
## SNP: RAGE625 adjusted by:
## No % Yes % OR lower upper p-value AIC
## Codominant
## A/A 94 67.1 88 77.2 1.00 0.14437 351.6
## A/G 36 25.7 18 15.8 0.53 0.28 1.01
## G/G 10 7.1 8 7.0 0.85 0.32 2.26
## Dominant
## A/A 94 67.1 88 77.2 1.00 0.07541 350.3
## A/G-G/G 46 32.9 26 22.8 0.60 0.34 1.06
## Recessive
## A/A-A/G 130 92.9 106 93.0 1.00 0.96911 353.5
## G/G 10 7.1 8 7.0 0.98 0.37 2.57
## Overdominant
## A/A-G/G 104 74.3 96 84.2 1.00 0.05217 349.7
## A/G 36 25.7 18 15.8 0.54 0.29 1.02
## log-Additive
## 0,1,2 140 55.1 114 44.9 0.76 0.50 1.15 0.18215 351.7
# Perform association analysis for one SNP
association(Renal_Dysfunction ~ RAGE600, data = SNPs)
##
## SNP: RAGE600 adjusted by:
## No % Yes % OR lower upper p-value AIC
## Codominant
## C/C 121 86.4 95 83.3 1.00 0.7411 354.9
## C/T 13 9.3 14 12.3 1.37 0.62 3.06
## T/T 6 4.3 5 4.4 1.06 0.31 3.58
## Dominant
## C/C 121 86.4 95 83.3 1.00 0.4925 353.0
## C/T-T/T 19 13.6 19 16.7 1.27 0.64 2.54
## Recessive
## C/C-C/T 134 95.7 109 95.6 1.00 0.9689 353.5
## T/T 6 4.3 5 4.4 1.02 0.30 3.45
## Overdominant
## C/C-T/T 127 90.7 100 87.7 1.00 0.4424 352.9
## C/T 13 9.3 14 12.3 1.37 0.62 3.04
## log-Additive
## 0,1,2 140 55.1 114 44.9 1.14 0.69 1.88 0.6076 353.2
library(broom)
# Supongamos que tus datos están en un dataframe llamado `data`
# y que `RAGE624`, `RAGE625`, y `RAGE600` son variables en el dataframe
# con los genotipos categorizados como "GG", "GT", "TT".
# También asumo que tienes una variable llamada `Renal_Dysfunction` que indica "No" (Control) y "Yes" (Cases)
# Reestructuramos los datos para prepararlos para la tabla
data_long <- data %>%
pivot_longer(cols = c(`RAGE 624`, RAGE625, RAGE600), names_to = "Genotype", values_to = "Value")
# Ahora creamos la tabla
table1 <- data_long %>%
group_by(Genotype, Value, Renal_Dysfunction) %>%
summarize(n = n(), .groups = "drop") %>% # Añadido .groups = "drop" para evitar la advertencia
pivot_wider(names_from = Renal_Dysfunction, values_from = n) %>%
mutate(
OR = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$estimate),
CI = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$conf.low),
CI_upper = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$conf.high),
P_value = map2(No, Yes, ~broom::tidy(fisher.test(matrix(c(.x, sum(.x, .y) - .x, .y, sum(.x, .y) - .y), nrow = 2)))$p.value)
) %>%
unnest(c(OR, CI, CI_upper, P_value)) %>%
mutate(OR_CI = paste0(OR, " (", CI, "-", CI_upper, ")")) %>%
select(Genotype, Value, No, Yes, OR_CI, P_value)
# Mostrar la tabla
print(table1)
## # A tibble: 9 × 6
## Genotype Value No Yes OR_CI P_value
## <chr> <fct> <int> <int> <chr> <dbl>
## 1 RAGE 624 A/A 54 50 1.16553194137239 (0.653118447334434-2.0837… 6.77e-1
## 2 RAGE 624 A/T 78 56 1.93518530449083 (1.16039783828394-3.24632… 1.02e-2
## 3 RAGE 624 T/T 8 8 1 (0.202690860106555-4.93362157264663) 1 e+0
## 4 RAGE600 T/T 6 5 1.41616878357433 (0.201946442814773-10.465… 1 e+0
## 5 RAGE600 C/C 121 95 1.62047333730265 (1.08987350254629-2.41528… 1.60e-2
## 6 RAGE600 C/T 13 14 0.864619774264054 (0.260006596799489-2.852… 1 e+0
## 7 RAGE625 A/A 94 88 1.14059575273659 (0.740743570893102-1.7578… 6.00e-1
## 8 RAGE625 A/G 36 18 3.94482767133564 (1.67432685133207-9.64125… 9.78e-4
## 9 RAGE625 G/G 10 8 1.54304784588958 (0.348842898178861-7.0843… 7.40e-1
summary(SNPs, print=FALSE)
## alleles major.allele.freq HWE missing (%)
## RAGE.624 A/T 67.3 0.001647 0
## RAGE625 A/G 82.3 0.000059 0
## RAGE600 C/T 90.4 0.000001 0
association(Renal_Dysfunction ~ RAGE600 +Sex+ Smoking + Alcohol + Hypertension , SNPs)
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
## Warning in terms.formula(formula, data = data): 'varlist' has changed (from
## nvar=5) to new 6 after EncodeVars() -- should no longer happen!
##
## SNP: RAGE600 adjusted by: Sex Smoking Alcohol Hypertension
## No % Yes % OR lower upper p-value AIC
## Codominant
## C/C 121 86.4 95 83.3 1.00 0.7006 346.4
## C/T 13 9.3 14 12.3 1.39 0.61 3.18
## T/T 6 4.3 5 4.4 0.84 0.24 2.94
## Dominant
## C/C 121 86.4 95 83.3 1.00 0.6163 344.9
## C/T-T/T 19 13.6 19 16.7 1.20 0.59 2.46
## Recessive
## C/C-C/T 134 95.7 109 95.6 1.00 0.7383 345.0
## T/T 6 4.3 5 4.4 0.81 0.23 2.80
## Overdominant
## C/C-T/T 127 90.7 100 87.7 1.00 0.4234 344.5
## C/T 13 9.3 14 12.3 1.40 0.61 3.20
## log-Additive
## 0,1,2 140 55.1 114 44.9 1.06 0.63 1.78 0.8241 345.1
#CONSTRUCCION DE MODELOS DE REGRESIÓN
# Example of fitting a logistic regression model with interaction terms
fit <- glm(Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 * Alcohol + `RAGE600` * Smoking, data = data, family = binomial)
# Display the summary of the model
summary(fit)
##
## Call:
## glm(formula = Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 *
## Alcohol + RAGE600 * Smoking, family = binomial, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.12778 0.26551 0.481 0.6303
## `RAGE 624`A/T -0.17054 0.29590 -0.576 0.5644
## `RAGE 624`T/T 0.06785 0.60550 0.112 0.9108
## SmokingYes -0.06934 0.58284 -0.119 0.9053
## RAGE625A/G -0.72992 0.43431 -1.681 0.0928 .
## RAGE625G/G -0.70655 0.75448 -0.936 0.3490
## AlcoholYes -0.24637 0.32704 -0.753 0.4512
## RAGE600C/T 0.42077 0.46585 0.903 0.3664
## RAGE600T/T -0.25886 0.70162 -0.369 0.7122
## `RAGE 624`A/T:SmokingYes -0.37962 0.77627 -0.489 0.6248
## `RAGE 624`T/T:SmokingYes -15.44598 1029.12179 -0.015 0.9880
## RAGE625A/G:AlcoholYes 0.25968 0.67510 0.385 0.7005
## RAGE625G/G:AlcoholYes 0.99723 1.06591 0.936 0.3495
## SmokingYes:RAGE600C/T -0.32471 1.16103 -0.280 0.7797
## SmokingYes:RAGE600T/T 31.10032 1782.49091 0.017 0.9861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 349.45 on 253 degrees of freedom
## Residual deviance: 337.33 on 239 degrees of freedom
## AIC: 367.33
##
## Number of Fisher Scoring iterations: 14
# Example: Convert SNPs and outcome to the appropriate format
data$`RAGE 624` <- as.factor(data$`RAGE 624`)
data$`RAGE625` <- as.factor(data$`RAGE625`)
data$`RAGE600` <- as.factor(data$`RAGE600`)
data$Renal_Dysfunction <- as.numeric(data$Renal_Dysfunction) - 1 # Convert Yes/No to 1/0
# Load necessary libraries
library(broom)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Fit the model
fit <- glm(formula = Renal_Dysfunction ~ `RAGE 624` * Smoking + RAGE625 * Alcohol + RAGE600 * Smoking,
family = binomial, data = data)
# Tidy the model output using broom
tidy_fit <- tidy(fit)
# Format the p-values
tidy_fit <- tidy_fit %>%
mutate(p.value = ifelse(p.value < 0.001, "<0.001", round(p.value, 4)))
# Rename columns for clarity
tidy_fit <- tidy_fit %>%
rename(
Term = term,
Estimate = estimate,
`Std. Error` = std.error,
`Z value` = statistic,
`P-value` = p.value
)
# Create the table using kable and kableExtra
table <- tidy_fit %>%
kable("html", escape = FALSE, align = "c", caption = "Logistic Regression Results") %>%
kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = T) %>%
add_header_above(c(" " = 1, "Logistic Regression Model" = 4))
# Print the table
table
| Term | Estimate | Std. Error | Z value | P-value |
|---|---|---|---|---|
| (Intercept) | 0.1277765 | 0.2655147 | 0.4812407 | 0.6303 |
RAGE 624A/T
|
-0.1705449 | 0.2959000 | -0.5763601 | 0.5644 |
RAGE 624T/T
|
0.0678493 | 0.6055031 | 0.1120544 | 0.9108 |
| SmokingYes | -0.0693445 | 0.5828440 | -0.1189762 | 0.9053 |
| RAGE625A/G | -0.7299224 | 0.4343058 | -1.6806648 | 0.0928 |
| RAGE625G/G | -0.7065462 | 0.7544832 | -0.9364637 | 0.3490 |
| AlcoholYes | -0.2463719 | 0.3270359 | -0.7533481 | 0.4512 |
| RAGE600C/T | 0.4207702 | 0.4658480 | 0.9032350 | 0.3664 |
| RAGE600T/T | -0.2588598 | 0.7016192 | -0.3689462 | 0.7122 |
RAGE 624A/T:SmokingYes
|
-0.3796185 | 0.7762695 | -0.4890293 | 0.6248 |
RAGE 624T/T:SmokingYes
|
-15.4459775 | 1029.1217914 | -0.0150089 | 0.9880 |
| RAGE625A/G:AlcoholYes | 0.2596830 | 0.6750952 | 0.3846613 | 0.7005 |
| RAGE625G/G:AlcoholYes | 0.9972271 | 1.0659109 | 0.9355633 | 0.3495 |
| SmokingYes:RAGE600C/T | -0.3247134 | 1.1610279 | -0.2796775 | 0.7797 |
| SmokingYes:RAGE600T/T | 31.1003153 | 1782.4909102 | 0.0174477 | 0.9861 |