#install.packages("conflicted")
#install.packages("dplyr")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
library(gt)
library(readxl)
library(conflicted)
library(dplyr)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("ARTICULO-RAGE-DM2.xlsx")
knitr::opts_knit$set(root.dir = "/Users/lorandacalderonzamora/Downloads")
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "IDGRUPOS" "SEXO" "EDAD" "IMC"
## [5] "TFG" "GLUCOSA" "UREA" "BUN"
## [9] "BUNCREA" "CREATININA" "COLESTEROL" "HDL"
## [13] "LDL" "VLDL" "TRIGLICERIDOS" "HbA1C"
## [17] "FUMA" "SXMET" "RAGE624" "RAGE625"
## [21] "RAGE600" "ALCOHOL" "ACTFISICA" "SISTOLE"
## [25] "DIASTOLE" "HTA"
data <- as.data.frame(data)
data <- data %>%
mutate(
Renal_Dysfunction = dplyr::recode(IDGRUPOS, `0` = "No Renal Dysfunction", `1` = "Renal Dysfunction"),
Sex = dplyr::recode(SEXO, `0` = "Male", `1` = "Female"),
Alcohol = dplyr::recode(ALCOHOL, `0` = "No", `1` = "Yes"),
Physical_Activity = dplyr::recode(ACTFISICA, `0` = "No", `1` = "Yes"),
Hypertension = dplyr::recode(HTA, `0` = "No", `1` = "Yes"),
Metabolic_Syndrome = dplyr::recode(SXMET, `0` = "No", `1` = "Yes"),
Smoking = dplyr::recode(FUMA, `0`= "No", `1` = "Yes"),
Age = as.numeric(EDAD), # Crear o convertir Age a numérico
BMI = as.numeric(IMC)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `BMI = as.numeric(IMC)`.
## Caused by warning:
## ! NAs introduced by coercion
class(data)
## [1] "data.frame"
view(data)
str(data)
## 'data.frame': 435 obs. of 35 variables:
## $ IDGRUPOS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SEXO : num 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : chr "40.6" "33.799999999999997" "22.5" "25.5" ...
## $ TFG : chr "95.65" "92.35" "94.32" "98.34" ...
## $ GLUCOSA : chr "94" "106" "168" "140" ...
## $ UREA : num 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num 210 222 123 99 145 ...
## $ HDL : num 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS : num 160 133 89 87 147 ...
## $ HbA1C : chr "7.1" "6.8" "9.5" "7.4" ...
## $ FUMA : chr "0" "0" "0" "0" ...
## $ SXMET : num 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE624 : chr "A/A" "A/T" "A/T" "A/A" ...
## $ RAGE625 : chr "A/A" "A/A" "A/A" "A/A" ...
## $ RAGE600 : chr "C/C" "C/C" "C/C" "C/C" ...
## $ ALCOHOL : num 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : chr "0" "1" "0" "0" ...
## $ SISTOLE : chr "142" "122" "149" "145" ...
## $ DIASTOLE : chr "87" "68" "92" "83" ...
## $ HTA : chr "1" "1" "1" "1" ...
## $ Renal_Dysfunction : chr "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" ...
## $ Sex : chr "Female" "Female" "Female" "Male" ...
## $ Alcohol : chr "No" "No" "No" "Yes" ...
## $ Physical_Activity : chr "No" "Yes" "No" "No" ...
## $ Hypertension : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Metabolic_Syndrome: chr "Yes" "Yes" "No" "Yes" ...
## $ Smoking : chr "No" "No" "No" "No" ...
## $ Age : num 65 74 67 66 66 64 62 65 60 67 ...
## $ BMI : num 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
data <- as.data.frame(data)
selected_data <- data %>%
dplyr::select(
Renal_Dysfunction, Sex, Age, BMI, Smoking, Alcohol, Physical_Activity, Hypertension, Metabolic_Syndrome
)
summary_table <- selected_data %>%
tbl_summary(
by = Renal_Dysfunction,
missing = "no",
type = list(
Age ~ "continuous",
Sex ~ "categorical",
Smoking ~ "categorical",
Alcohol ~ "categorical",
Physical_Activity ~ "categorical",
Hypertension ~ "categorical",
Metabolic_Syndrome ~ "categorical",
BMI ~ "continuous"
),
statistic = list(
Age ~ "{median} ({p25} - {p75})",
BMI ~ "{median} ({p25} - {p75})",
all_categorical() ~ "{n} ({p}%)"
)
) %>%
add_overall() %>%
add_p(
test = all_categorical() ~ "fisher.test",
test.args = all_categorical() ~ list(simulate.p.value = TRUE)
) %>%
modify_header(label = "**Variable**") %>%
modify_caption("**Table: Sociodemographic and lifestyle characteristics by Renal Dysfunction**") %>%
as_gt() %>%
gt::tab_style(
style = gt::cell_text(weight = "bold"),
locations = gt::cells_column_labels()
)
summary_table
| Variable | Overall N = 4351 |
No Renal Dysfunction N = 2171 |
Renal Dysfunction N = 2181 |
p-value2 |
|---|---|---|---|---|
| Sex | 0.10 | |||
| Female | 257 (59%) | 137 (63%) | 120 (55%) | |
| Male | 178 (41%) | 80 (37%) | 98 (45%) | |
| Age | 61 (54 - 67) | 57 (51 - 64) | 64 (59 - 70) | <0.001 |
| BMI | 29.9 (26.7 - 34.1) | 30.1 (27.1 - 34.4) | 29.9 (26.4 - 33.1) | 0.2 |
| Smoking | 0.6 | |||
| NA | 2 (0.5%) | 0 (0%) | 2 (0.9%) | |
| No | 359 (83%) | 180 (83%) | 179 (82%) | |
| Yes | 74 (17%) | 37 (17%) | 37 (17%) | |
| Alcohol | 0.002 | |||
| No | 306 (70%) | 138 (64%) | 168 (77%) | |
| Yes | 129 (30%) | 79 (36%) | 50 (23%) | |
| Physical_Activity | 0.002 | |||
| NA | 8 (1.8%) | 1 (0.5%) | 7 (3.2%) | |
| No | 239 (55%) | 135 (62%) | 104 (48%) | |
| Yes | 188 (43%) | 81 (37%) | 107 (49%) | |
| Hypertension | 0.002 | |||
| NA | 4 (0.9%) | 0 (0%) | 4 (1.8%) | |
| No | 123 (28%) | 77 (35%) | 46 (21%) | |
| Yes | 308 (71%) | 140 (65%) | 168 (77%) | |
| Metabolic_Syndrome | 0.4 | |||
| No | 119 (27%) | 55 (25%) | 64 (29%) | |
| Yes | 316 (73%) | 162 (75%) | 154 (71%) | |
| 1 n (%); Median (Q1 - Q3) | ||||
| 2 Fisher’s exact test; Wilcoxon rank sum test; Fisher’s Exact Test for Count Data with simulated p-value (based on 2000 replicates) | ||||
# Seleccionar solo las variables deseadas y asegurarse de que son numéricas
selected_data <- data %>%
mutate(
TFG = as.numeric(TFG),
UREA = as.numeric(UREA),
CREATININA = as.numeric(CREATININA),
BUN = as.numeric(BUN),
BUNCREA = as.numeric(BUNCREA),
GLUCOSA = as.numeric(GLUCOSA),
HbA1C = as.numeric(HbA1C),
TRIGLICERIDOS = as.numeric(TRIGLICERIDOS),
COLESTEROL = as.numeric(COLESTEROL),
HDL = as.numeric(HDL),
LDL = as.numeric(LDL),
VLDL = as.numeric(VLDL)
) %>%
# Seleccionar solo las variables de interés
select(TFG, UREA, CREATININA, BUN, BUNCREA, GLUCOSA, HbA1C, TRIGLICERIDOS, COLESTEROL, HDL, LDL, VLDL, Renal_Dysfunction)
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `TFG = as.numeric(TFG)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
# Convertir Renal_Dysfunction a factor si no lo es
selected_data$Renal_Dysfunction <- as.factor(selected_data$Renal_Dysfunction)
# Crear la tabla de resumen
summary_table <- selected_data %>%
tbl_summary(
by = Renal_Dysfunction, # Agrupar por la variable 'Renal_Dysfunction'
missing = "no", # Excluir valores faltantes
type = all_continuous() ~ "continuous", # Especificar que todas las variables son continuas
statistic = all_continuous() ~ "{mean} ({p25}, {p75})" # Mostrar la media y desviación estándar
) %>%
add_overall() %>% # Añadir una columna con el resumen general
add_p() %>% # Añadir columna con valores p
modify_header(label = "**Variable**") %>%
as_gt() %>%
gt::tab_header(
title = gt::md("**Table: Clinical and Biochemical Parameters**")
)
# Mostrar la tabla
summary_table
| Table: Clinical and Biochemical Parameters | ||||
| Variable | Overall N = 4351 |
No Renal Dysfunction N = 2171 |
Renal Dysfunction N = 2181 |
p-value2 |
|---|---|---|---|---|
| TFG | 83 (65, 103) | 110 (97, 113) | 56 (38, 79) | <0.001 |
| UREA | 52 (30, 58) | 36 (26, 43) | 68 (36, 83) | <0.001 |
| CREATININA | 1.48 (0.60, 1.03) | 0.59 (0.50, 0.70) | 2.36 (0.86, 1.65) | <0.001 |
| BUN | 24 (14, 26) | 17 (12, 20) | 32 (17, 39) | <0.001 |
| BUNCREA | 27 (16, 29) | 34 (20, 36) | 19 (13, 24) | <0.001 |
| GLUCOSA | 178 (108, 225) | 199 (121, 256) | 158 (101, 188) | <0.001 |
| HbA1C | 7.73 (6.30, 8.90) | 8.13 (6.70, 9.30) | 7.33 (6.00, 8.47) | <0.001 |
| TRIGLICERIDOS | 160 (100, 191) | 162 (106, 187) | 157 (95, 197) | 0.2 |
| COLESTEROL | 196 (148, 226) | 203 (152, 223) | 189 (146, 227) | 0.4 |
| HDL | 44 (33, 53) | 41 (30, 50) | 47 (35, 56) | <0.001 |
| LDL | 121 (78, 148) | 130 (87, 149) | 112 (69, 146) | 0.090 |
| VLDL | 32 (20, 38) | 32 (21, 37) | 32 (19, 41) | 0.2 |
| 1 Mean (Q1, Q3) | ||||
| 2 Wilcoxon rank sum test | ||||
#install.packages(c("ggpubr", "cowplot"))
library(ggpubr)
library(cowplot)
library(ggplot2)
data <- data %>%
mutate(
TFG = as.numeric(TFG),
UREA = as.numeric(UREA),
CREATININA = as.numeric(CREATININA),
BUN = as.numeric(BUN),
BUNCREA = as.numeric(BUNCREA),
GLUCOSA = as.numeric(GLUCOSA),
HbA1C = as.numeric(HbA1C),
TRIGLICERIDOS = as.numeric(TRIGLICERIDOS),
COLESTEROL = as.numeric(COLESTEROL),
HDL = as.numeric(HDL),
LDL = as.numeric(LDL),
VLDL = as.numeric(VLDL)
)
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `TFG = as.numeric(TFG)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
custom_colors <- c("No Renal Dysfunction" = "#1f77b4", "Renal Dysfunction" = "#ff7f0e")
# Crear un tema personalizado
theme_custom <- theme_pubr() +
theme(axis.title.x = element_blank(),
axis.text = element_text(size = 10),
legend.position = "none")
# Función para crear gráficos de boxplot para cada variable
create_boxplot <- function(data, variable, var_label, y_label, y_lim) {
ggplot(data, aes(x = Renal_Dysfunction, y = !!sym(variable), fill = Renal_Dysfunction)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, color = "black") +
stat_compare_means(aes(label = ..p.signif..), method = "t.test", label.x = 1.5, label.y = y_lim - 0.1*y_lim) +
labs(title = var_label, y = y_label, fill = " ") +
scale_x_discrete(labels = c("No Renal Dysfunction", "Renal Dysfunction")) +
scale_fill_manual(values = custom_colors) +
ylim(0, y_lim) + # Ajustar la escala del eje y según la variable
theme_custom
}
# Crear gráficos para cada variable
p1 <- create_boxplot(data, "TFG", " ", "GFR (mL/min/1.73 m²)", 150)
p2 <- create_boxplot(data, "UREA", "", "Urea (mg/dL)", 100)
p3 <- create_boxplot(data, "CREATININA", "", "Creatinine (mg/dL)", 5)
p4 <- create_boxplot(data, "BUN", " ", "BUN (mg/dL)", 40)
p5 <- create_boxplot(data, "BUNCREA", " ", "BUN/Creatinine Ratio", 50)
p6 <- create_boxplot(data, "GLUCOSA", "", "Glucose (mg/dL)", 400)
p7 <- create_boxplot(data, "HbA1C", "", "HbA1c (%)", 20)
p8 <- create_boxplot(data, "TRIGLICERIDOS", "", "Triglycerides (mg/dL)", 500)
p9 <- create_boxplot(data, "COLESTEROL", "", "Cholesterol (mg/dL)", 400)
p10 <- create_boxplot(data, "HDL", "", "HDL-C (mg/dL)", 100)
p11 <- create_boxplot(data, "LDL", "", "LDL-C (mg/dL)", 300)
p12 <- create_boxplot(data, "VLDL", "", "VLDL-C (mg/dL)", 100)
# Mostrar un gráfico de ejemplo
print(p1)
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p2)
## Warning: Removed 44 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 44 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 44 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p3)
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 37 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p4)
## Warning: Removed 53 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 53 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 53 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p5)
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 31 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p6)
## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p7)
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 13 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p8)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p9)
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p10)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p11)
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).
print(p12)
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#install.packages("https://cran.r-project.org/bin/macosx/big-sur-arm64/contrib/4.3/rms_6.8-1.tgz", repos = NULL, type = "binary")
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:gt':
##
## html
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(SNPassoc)
## Registered S3 method overwritten by 'SNPassoc':
## method from
## summary.haplo.glm haplo.stats
str(data)
## 'data.frame': 435 obs. of 35 variables:
## $ IDGRUPOS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SEXO : num 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : chr "40.6" "33.799999999999997" "22.5" "25.5" ...
## $ TFG : num 95.7 92.3 94.3 98.3 95 ...
## $ GLUCOSA : num 94 106 168 140 94 ...
## $ UREA : num 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num 210 222 123 99 145 ...
## $ HDL : num 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS : num 160 133 89 87 147 ...
## $ HbA1C : num 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ FUMA : chr "0" "0" "0" "0" ...
## $ SXMET : num 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE624 : chr "A/A" "A/T" "A/T" "A/A" ...
## $ RAGE625 : chr "A/A" "A/A" "A/A" "A/A" ...
## $ RAGE600 : chr "C/C" "C/C" "C/C" "C/C" ...
## $ ALCOHOL : num 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : chr "0" "1" "0" "0" ...
## $ SISTOLE : chr "142" "122" "149" "145" ...
## $ DIASTOLE : chr "87" "68" "92" "83" ...
## $ HTA : chr "1" "1" "1" "1" ...
## $ Renal_Dysfunction : chr "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" "No Renal Dysfunction" ...
## $ Sex : chr "Female" "Female" "Female" "Male" ...
## $ Alcohol : chr "No" "No" "No" "Yes" ...
## $ Physical_Activity : chr "No" "Yes" "No" "No" ...
## $ Hypertension : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Metabolic_Syndrome: chr "Yes" "Yes" "No" "Yes" ...
## $ Smoking : chr "No" "No" "No" "No" ...
## $ Age : num 65 74 67 66 66 64 62 65 60 67 ...
## $ BMI : num 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
data$RAGE624 <- as.factor(data$RAGE624)
data$RAGE625 <- as.factor(data$RAGE625)
data$RAGE600 <- as.factor(data$RAGE600)
data$Renal_Dysfunction <- factor(data$Renal_Dysfunction)
data$Sex <- factor(data$Sex, levels = c("Female", "Male"))
data$Alcohol <- factor(data$Alcohol, levels = c("No", "Yes"))
data$Physical_Activity <- factor(data$Physical_Activity, levels = c("No", "Yes"))
data$Hypertension <- factor(data$Hypertension, levels = c("No", "Yes"))
data$Metabolic_Syndrome <- factor(data$Metabolic_Syndrome, levels = c("No", "Yes"))
data$Smoking <- factor(data$Smoking, levels = c("No", "Yes"))
snp_indices <- match(c("RAGE624", "RAGE625", "RAGE600"), colnames(data))
data <- data[complete.cases(data[, snp_indices]), ]
SNPs <- setupSNP(data, colSNPs = snp_indices, sep = "/")
plot(SNPs$RAGE624)
plot(SNPs$RAGE625)
plot(SNPs$RAGE600)
plot(SNPs$RAGE624, type = pie)
plot(SNPs$RAGE625, type = pie)
plot(SNPs$RAGE600, type = pie)
names(SNPs)
## [1] "IDGRUPOS" "SEXO" "EDAD"
## [4] "IMC" "TFG" "GLUCOSA"
## [7] "UREA" "BUN" "BUNCREA"
## [10] "CREATININA" "COLESTEROL" "HDL"
## [13] "LDL" "VLDL" "TRIGLICERIDOS"
## [16] "HbA1C" "FUMA" "SXMET"
## [19] "ALCOHOL" "ACTFISICA" "SISTOLE"
## [22] "DIASTOLE" "HTA" "Renal_Dysfunction"
## [25] "Sex" "Alcohol" "Physical_Activity"
## [28] "Hypertension" "Metabolic_Syndrome" "Smoking"
## [31] "Age" "BMI" "RAGE624"
## [34] "RAGE625" "RAGE600"
#Varaibles categoricas
data$RAGE624[data$RAGE624 == "NA"] <- NA
data$RAGE625[data$RAGE625 == "NA"] <- NA
data$RAGE600[data$RAGE600 == "NA"] <- NA
data$Renal_Dysfunction[data$Renal_Dysfunction == "NA"] <- NA
data$Smoking[data$Smoking == "NA"] <- NA
data$Alcohol[data$Alcohol == "NA"] <- NA
# Eliminar los niveles no utilizados (como "NA")
data$RAGE624 <- droplevels(data$RAGE624)
data$RAGE625 <- droplevels(data$RAGE625)
data$RAGE600 <- droplevels(data$RAGE600)
data$Renal_Dysfunction <- droplevels(data$Renal_Dysfunction)
data$Smoking <- droplevels(data$Smoking)
data$Alcohol <- droplevels(data$Alcohol)
# Verificar la estructura de los factores
str(data$RAGE624)
## Factor w/ 3 levels "A/A","A/T","T/T": 1 2 2 1 3 2 1 2 2 1 ...
str(data$RAGE625)
## Factor w/ 3 levels "A/A","A/G","G/G": 1 1 1 1 1 2 1 1 1 1 ...
str(data$RAGE600)
## Factor w/ 3 levels "C/C","C/T","T/T": 1 1 1 1 1 1 1 1 1 1 ...
str(data$Renal_Dysfunction)
## Factor w/ 2 levels "No Renal Dysfunction",..: 1 1 1 1 1 1 1 1 1 1 ...
str(data$Smoking)
## Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
str(data$Alcohol)
## Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 2 1 ...
sum(is.na(data$RAGE624))
## [1] 13
sum(is.na(data$RAGE625))
## [1] 19
sum(is.na(data$RAGE600))
## [1] 25
sum(is.na(data$Renal_Dysfunction))
## [1] 0
sum(is.na(data$Smoking))
## [1] 2
sum(is.na(data$Alcohol))
## [1] 0
dim(data)
## [1] 435 35
str(data)
## 'data.frame': 435 obs. of 35 variables:
## $ IDGRUPOS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SEXO : num 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : chr "40.6" "33.799999999999997" "22.5" "25.5" ...
## $ TFG : num 95.7 92.3 94.3 98.3 95 ...
## $ GLUCOSA : num 94 106 168 140 94 ...
## $ UREA : num 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num 210 222 123 99 145 ...
## $ HDL : num 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS : num 160 133 89 87 147 ...
## $ HbA1C : num 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ FUMA : chr "0" "0" "0" "0" ...
## $ SXMET : num 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE624 : Factor w/ 3 levels "A/A","A/T","T/T": 1 2 2 1 3 2 1 2 2 1 ...
## $ RAGE625 : Factor w/ 3 levels "A/A","A/G","G/G": 1 1 1 1 1 2 1 1 1 1 ...
## $ RAGE600 : Factor w/ 3 levels "C/C","C/T","T/T": 1 1 1 1 1 1 1 1 1 1 ...
## $ ALCOHOL : num 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : chr "0" "1" "0" "0" ...
## $ SISTOLE : chr "142" "122" "149" "145" ...
## $ DIASTOLE : chr "87" "68" "92" "83" ...
## $ HTA : chr "1" "1" "1" "1" ...
## $ Renal_Dysfunction : Factor w/ 2 levels "No Renal Dysfunction",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Sex : Factor w/ 2 levels "Female","Male": 1 1 1 2 1 1 1 1 1 1 ...
## $ Alcohol : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 2 1 1 2 1 ...
## $ Physical_Activity : Factor w/ 2 levels "No","Yes": 1 2 1 1 2 1 2 1 1 1 ...
## $ Hypertension : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 1 1 1 1 ...
## $ Metabolic_Syndrome: Factor w/ 2 levels "No","Yes": 2 2 1 2 1 2 1 1 1 1 ...
## $ Smoking : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Age : num 65 74 67 66 66 64 62 65 60 67 ...
## $ BMI : num 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
SNPs <- setupSNP(data, colSNPs = match(c("RAGE624", "RAGE625", "RAGE600"), colnames(data)), sep = "/")
#Perform association analysis for each SNP
association(Renal_Dysfunction ~ RAGE624, data = SNPs)
##
## SNP: RAGE624 adjusted by:
## No Renal Dysfunction % Renal Dysfunction % OR lower upper
## Codominant
## A/A 80 37.7 94 44.8 1.00
## A/T 115 54.2 97 46.2 0.72 0.48 1.07
## T/T 17 8.0 19 9.0 0.95 0.46 1.95
## Dominant
## A/A 80 37.7 94 44.8 1.00
## A/T-T/T 132 62.3 116 55.2 0.75 0.51 1.10
## Recessive
## A/A-A/T 195 92.0 191 91.0 1.00
## T/T 17 8.0 19 9.0 1.14 0.58 2.26
## Overdominant
## A/A-T/T 97 45.8 113 53.8 1.00
## A/T 115 54.2 97 46.2 0.72 0.49 1.06
## log-Additive
## 0,1,2 212 50.2 210 49.8 0.86 0.63 1.16
## p-value AIC
## Codominant
## A/A 0.25164 588.2
## A/T
## T/T
## Dominant
## A/A 0.14246 586.9
## A/T-T/T
## Recessive
## A/A-A/T 0.70518 588.9
## T/T
## Overdominant
## A/A-T/T 0.09781 586.3
## A/T
## log-Additive
## 0,1,2 0.32414 588.0
association(Renal_Dysfunction ~ RAGE625, data = SNPs)
##
## SNP: RAGE625 adjusted by:
## No Renal Dysfunction % Renal Dysfunction % OR lower upper
## Codominant
## A/A 134 64.4 142 68.3 1.00
## A/G 56 26.9 52 25.0 0.88 0.56 1.37
## G/G 18 8.7 14 6.7 0.73 0.35 1.53
## Dominant
## A/A 134 64.4 142 68.3 1.00
## A/G-G/G 74 35.6 66 31.7 0.84 0.56 1.26
## Recessive
## A/A-A/G 190 91.3 194 93.3 1.00
## G/G 18 8.7 14 6.7 0.76 0.37 1.58
## Overdominant
## A/A-G/G 152 73.1 156 75.0 1.00
## A/G 56 26.9 52 25.0 0.90 0.58 1.40
## log-Additive
## 0,1,2 208 50.0 208 50.0 0.86 0.64 1.17
## p-value AIC
## Codominant
## A/A 0.6436 581.8
## A/G
## G/G
## Dominant
## A/A 0.4064 580.0
## A/G-G/G
## Recessive
## A/A-A/G 0.4612 580.2
## G/G
## Overdominant
## A/A-G/G 0.6546 580.5
## A/G
## log-Additive
## 0,1,2 0.3497 579.8
association(Renal_Dysfunction ~ RAGE600, data = SNPs)
##
## SNP: RAGE600 adjusted by:
## No Renal Dysfunction % Renal Dysfunction % OR lower upper
## Codominant
## C/C 174 84.5 183 89.7 1.00
## C/T 23 11.2 16 7.8 0.66 0.34 1.29
## T/T 9 4.4 5 2.5 0.53 0.17 1.61
## Dominant
## C/C 174 84.5 183 89.7 1.00
## C/T-T/T 32 15.5 21 10.3 0.62 0.35 1.12
## Recessive
## C/C-C/T 197 95.6 199 97.5 1.00
## T/T 9 4.4 5 2.5 0.55 0.18 1.67
## Overdominant
## C/C-T/T 183 88.8 188 92.2 1.00
## C/T 23 11.2 16 7.8 0.68 0.35 1.32
## log-Additive
## 0,1,2 206 50.2 204 49.8 0.70 0.45 1.09
## p-value AIC
## Codominant
## C/C 0.2672 571.7
## C/T
## T/T
## Dominant
## C/C 0.1126 569.9
## C/T-T/T
## Recessive
## C/C-C/T 0.2816 571.2
## T/T
## Overdominant
## C/C-T/T 0.2504 571.1
## C/T
## log-Additive
## 0,1,2 0.1074 569.8
# Reemplazar "NA" por NA en las variables numéricas
data$IMC[data$IMC == "NA"] <- NA
data$GLUCOSA[data$GLUCOSA == "NA"] <- NA
data$HbA1C[data$HbA1C == "NA"] <- NA
# Convertir columnas a numéricas
data$IMC <- as.numeric(as.character(data$IMC))
data$GLUCOSA <- as.numeric(as.character(data$GLUCOSA))
data$HbA1C <- as.numeric(as.character(data$HbA1C))
# Verificar qué filas tienen valores no numéricos en cada columna
str(data$IMC)
## num [1:435] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
str(data$GLUCOSA)
## num [1:435] 94 106 168 140 94 ...
str(data$HbA1C)
## num [1:435] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
# Verificar cuántos NA hay en cada columna
sum(is.na(data$IMC))
## [1] 7
sum(is.na(data$GLUCOSA))
## [1] 1
sum(is.na(data$HbA1C))
## [1] 13
#install.packages("car")
library(car)
## Loading required package: carData
# Identificar las variables colineales
alias(lm(IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C +
Sex + EDAD + UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL +
TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension +
Metabolic_Syndrome + Smoking, data = data))
## Model :
## IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C + Sex + EDAD +
## UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL +
## VLDL + TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension +
## Metabolic_Syndrome + Smoking
# Ajustar el modelo sin las variables colineales (ej. eliminando VLDL)
#Si el resultado de VIF extremadamente alto (mayores de 10) se deben eliminar del modelo
modelo_vif <- lm(IMC ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C +
Sex + EDAD + UREA + BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL +
TRIGLICERIDOS + Alcohol + Physical_Activity + Hypertension +
Metabolic_Syndrome + Smoking, data = data)
car::vif(modelo_vif)
## GVIF Df GVIF^(1/(2*Df))
## RAGE624 1.143950 2 1.034193
## RAGE625 1.261052 2 1.059701
## RAGE600 1.244668 2 1.056242
## GLUCOSA 1.805214 1 1.343583
## HbA1C 1.699503 1 1.303650
## Sex 1.243185 1 1.114982
## EDAD 1.313350 1 1.146015
## UREA 889.056250 1 29.817046
## BUN 904.666055 1 30.077667
## BUNCREA 1.553039 1 1.246210
## CREATININA 4.299430 1 2.073507
## COLESTEROL 302.770679 1 17.400307
## HDL 11.782347 1 3.432542
## LDL 292.039712 1 17.089169
## TRIGLICERIDOS 14.923733 1 3.863125
## Alcohol 1.104113 1 1.050768
## Physical_Activity 1.102385 1 1.049945
## Hypertension 1.408572 1 1.186833
## Metabolic_Syndrome 1.440063 1 1.200026
## Smoking 1.106460 1 1.051884
# Modelo nulo
modelo_nulo <- glm(Renal_Dysfunction ~ 1, data = data, family = binomial, na.action = na.exclude)
# Modelo completo
modelo_completo <- glm(Renal_Dysfunction ~ RAGE624 + RAGE625 + RAGE600 + GLUCOSA + HbA1C +
Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
Hypertension + Metabolic_Syndrome + Smoking,
data = data, family = binomial, na.action = na.exclude)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
OR <- exp(coef(modelo_completo))
CI <- exp(confint(modelo_completo))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
p_values <- summary(modelo_completo)$coefficients[, 4]
OR_completo <- cbind(OR, CI, p_values)
print(OR_completo)
## OR 2.5 % 97.5 % p_values
## (Intercept) 2.972046e-26 9.783181e-39 6.843469e-18 9.906762e-07
## RAGE624A/T 2.859967e-01 4.856785e-02 1.318830e+00 1.276636e-01
## RAGE624T/T 3.129414e+00 9.899068e-02 6.737206e+01 4.784303e-01
## RAGE625A/G 1.723430e+00 2.653582e-01 1.182592e+01 5.655929e-01
## RAGE625G/G 2.849784e-01 8.791180e-03 1.090587e+01 4.848850e-01
## RAGE600C/T 3.454271e+00 4.760231e-01 2.952137e+01 2.278095e-01
## RAGE600T/T 2.421563e-01 6.114366e-06 4.133697e+03 9.523641e-01
## GLUCOSA 1.004487e+00 9.937222e-01 1.016056e+00 4.147866e-01
## HbA1C 7.866421e-01 4.299093e-01 1.319531e+00 3.901660e-01
## SexMale 3.966918e-04 6.291856e-06 7.657044e-03 9.864037e-06
## EDAD 1.456307e+00 1.277406e+00 1.759164e+00 2.681540e-06
## CREATININA 3.863772e+21 5.049601e+14 8.210687e+31 4.918100e-07
## HDL 1.020537e+00 9.627918e-01 1.085281e+00 4.985750e-01
## AlcoholYes 3.894235e+00 7.285602e-01 2.551642e+01 1.249508e-01
## Physical_ActivityYes 1.044713e+00 2.010186e-01 5.057610e+00 9.565082e-01
## HypertensionYes 4.706162e-01 5.853386e-02 3.402803e+00 4.574523e-01
## Metabolic_SyndromeYes 2.112725e+00 2.399846e-01 2.140624e+01 5.042935e-01
## SmokingYes 8.495121e-01 1.116317e-01 6.420907e+00 8.721074e-01
summary(modelo_completo)
##
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE624 + RAGE625 + RAGE600 +
## GLUCOSA + HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol +
## Physical_Activity + Hypertension + Metabolic_Syndrome + Smoking,
## family = binomial, data = data, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -58.777962 12.011481 -4.893 9.91e-07 ***
## RAGE624A/T -1.251775 0.821709 -1.523 0.128
## RAGE624T/T 1.140846 1.609479 0.709 0.478
## RAGE625A/G 0.544317 0.947373 0.575 0.566
## RAGE625G/G -1.255342 1.797281 -0.698 0.485
## RAGE600C/T 1.239611 1.027852 1.206 0.228
## RAGE600T/T -1.418172 23.739770 -0.060 0.952
## GLUCOSA 0.004477 0.005489 0.815 0.415
## HbA1C -0.239982 0.279271 -0.859 0.390
## SexMale -7.832351 1.771972 -4.420 9.86e-06 ***
## EDAD 0.375904 0.080085 4.694 2.68e-06 ***
## CREATININA 49.705931 9.882915 5.029 4.92e-07 ***
## HDL 0.020329 0.030040 0.677 0.499
## AlcoholYes 1.359497 0.886058 1.534 0.125
## Physical_ActivityYes 0.043742 0.802083 0.055 0.957
## HypertensionYes -0.753712 1.014351 -0.743 0.457
## Metabolic_SyndromeYes 0.747979 1.120149 0.668 0.504
## SmokingYes -0.163093 1.013112 -0.161 0.872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 515.314 on 371 degrees of freedom
## Residual deviance: 58.573 on 354 degrees of freedom
## (63 observations deleted due to missingness)
## AIC: 94.573
##
## Number of Fisher Scoring iterations: 13
#Se reliazo el analisis por clasificaciones de herencia (recesiva, dominante y aditiva) para el RAGE600 porque fue el que salio significativo
data$RAGE600_recesivo <- ifelse(data$RAGE600 == "T/T", 1, 0)
modelo_recesivo_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_recesivo + GLUCOSA + HbA1C +
Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
Hypertension + Metabolic_Syndrome + Smoking,
data = data, family = binomial, na.action = na.exclude)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelo_recesivo_RAGE600)
##
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_recesivo + GLUCOSA +
## HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
## Hypertension + Metabolic_Syndrome + Smoking, family = binomial,
## data = data, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -56.098169 10.320765 -5.435 5.47e-08 ***
## RAGE600_recesivo 0.947286 6.085391 0.156 0.8763
## GLUCOSA 0.003675 0.004872 0.754 0.4507
## HbA1C -0.150671 0.227105 -0.663 0.5070
## SexMale -7.347348 1.641857 -4.475 7.64e-06 ***
## EDAD 0.358672 0.068469 5.239 1.62e-07 ***
## CREATININA 46.161902 8.439030 5.470 4.50e-08 ***
## HDL 0.019306 0.024350 0.793 0.4279
## AlcoholYes 1.600715 0.827693 1.934 0.0531 .
## Physical_ActivityYes 0.240050 0.721354 0.333 0.7393
## HypertensionYes -0.306667 0.925772 -0.331 0.7405
## Metabolic_SyndromeYes 0.396152 0.894111 0.443 0.6577
## SmokingYes -0.306855 0.990452 -0.310 0.7567
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 535.016 on 385 degrees of freedom
## Residual deviance: 64.856 on 373 degrees of freedom
## (49 observations deleted due to missingness)
## AIC: 90.856
##
## Number of Fisher Scoring iterations: 12
# Modelo dominante para RAGE600: agrupar C/C vs C/T y T/T
data$RAGE600_dominante <- ifelse(data$RAGE600 == "C/C", 0, 1)
modelo_dominante_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_dominante + GLUCOSA + HbA1C +
Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
Hypertension + Metabolic_Syndrome + Smoking,
data = data, family = binomial, na.action = na.exclude)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelo_dominante_RAGE600)
##
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_dominante + GLUCOSA +
## HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
## Hypertension + Metabolic_Syndrome + Smoking, family = binomial,
## data = data, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -56.286640 10.420771 -5.401 6.61e-08 ***
## RAGE600_dominante 1.231020 1.004151 1.226 0.2202
## GLUCOSA 0.003702 0.004920 0.752 0.4518
## HbA1C -0.129895 0.230476 -0.564 0.5730
## SexMale -7.472919 1.667263 -4.482 7.39e-06 ***
## EDAD 0.353649 0.067788 5.217 1.82e-07 ***
## CREATININA 46.605314 8.597387 5.421 5.93e-08 ***
## HDL 0.018900 0.024599 0.768 0.4423
## AlcoholYes 1.566438 0.844928 1.854 0.0637 .
## Physical_ActivityYes -0.026516 0.784222 -0.034 0.9730
## HypertensionYes -0.488811 0.939500 -0.520 0.6029
## Metabolic_SyndromeYes 0.585157 0.932735 0.627 0.5304
## SmokingYes -0.344431 1.016006 -0.339 0.7346
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 535.016 on 385 degrees of freedom
## Residual deviance: 63.283 on 373 degrees of freedom
## (49 observations deleted due to missingness)
## AIC: 89.283
##
## Number of Fisher Scoring iterations: 12
data$RAGE600_aditivo <- factor(data$RAGE600, levels = c("C/C", "C/T", "T/T"))
modelo_aditivo_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600_aditivo + GLUCOSA + HbA1C +
Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
Hypertension + Metabolic_Syndrome + Smoking,
data = data, family = binomial, na.action = na.exclude)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(modelo_aditivo_RAGE600)
##
## Call:
## glm(formula = Renal_Dysfunction ~ RAGE600_aditivo + GLUCOSA +
## HbA1C + Sex + EDAD + CREATININA + HDL + Alcohol + Physical_Activity +
## Hypertension + Metabolic_Syndrome + Smoking, family = binomial,
## data = data, na.action = na.exclude)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -56.287835 10.420148 -5.402 6.60e-08 ***
## RAGE600_aditivoC/T 1.242163 1.022184 1.215 0.2243
## RAGE600_aditivoT/T 0.884744 5.855126 0.151 0.8799
## GLUCOSA 0.003697 0.004920 0.752 0.4523
## HbA1C -0.128788 0.231497 -0.556 0.5780
## SexMale -7.479302 1.669408 -4.480 7.46e-06 ***
## EDAD 0.353463 0.067846 5.210 1.89e-07 ***
## CREATININA 46.618270 8.596997 5.423 5.87e-08 ***
## HDL 0.018878 0.024601 0.767 0.4429
## AlcoholYes 1.564820 0.845571 1.851 0.0642 .
## Physical_ActivityYes -0.032334 0.790741 -0.041 0.9674
## HypertensionYes -0.487613 0.939262 -0.519 0.6037
## Metabolic_SyndromeYes 0.585040 0.932267 0.628 0.5303
## SmokingYes -0.346485 1.016937 -0.341 0.7333
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 535.016 on 385 degrees of freedom
## Residual deviance: 63.279 on 372 degrees of freedom
## (49 observations deleted due to missingness)
## AIC: 91.279
##
## Number of Fisher Scoring iterations: 12
library(broom)
tidy(modelo_aditivo_RAGE600)
## # A tibble: 14 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -56.3 10.4 -5.40 0.0000000660
## 2 RAGE600_aditivoC/T 1.24 1.02 1.22 0.224
## 3 RAGE600_aditivoT/T 0.885 5.86 0.151 0.880
## 4 GLUCOSA 0.00370 0.00492 0.752 0.452
## 5 HbA1C -0.129 0.231 -0.556 0.578
## 6 SexMale -7.48 1.67 -4.48 0.00000746
## 7 EDAD 0.353 0.0678 5.21 0.000000189
## 8 CREATININA 46.6 8.60 5.42 0.0000000587
## 9 HDL 0.0189 0.0246 0.767 0.443
## 10 AlcoholYes 1.56 0.846 1.85 0.0642
## 11 Physical_ActivityYes -0.0323 0.791 -0.0409 0.967
## 12 HypertensionYes -0.488 0.939 -0.519 0.604
## 13 Metabolic_SyndromeYes 0.585 0.932 0.628 0.530
## 14 SmokingYes -0.346 1.02 -0.341 0.733
tidy(modelo_dominante_RAGE600)
## # A tibble: 13 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -56.3 10.4 -5.40 0.0000000661
## 2 RAGE600_dominante 1.23 1.00 1.23 0.220
## 3 GLUCOSA 0.00370 0.00492 0.752 0.452
## 4 HbA1C -0.130 0.230 -0.564 0.573
## 5 SexMale -7.47 1.67 -4.48 0.00000739
## 6 EDAD 0.354 0.0678 5.22 0.000000182
## 7 CREATININA 46.6 8.60 5.42 0.0000000593
## 8 HDL 0.0189 0.0246 0.768 0.442
## 9 AlcoholYes 1.57 0.845 1.85 0.0637
## 10 Physical_ActivityYes -0.0265 0.784 -0.0338 0.973
## 11 HypertensionYes -0.489 0.939 -0.520 0.603
## 12 Metabolic_SyndromeYes 0.585 0.933 0.627 0.530
## 13 SmokingYes -0.344 1.02 -0.339 0.735
tidy(modelo_recesivo_RAGE600)
## # A tibble: 13 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -56.1 10.3 -5.44 0.0000000547
## 2 RAGE600_recesivo 0.947 6.09 0.156 0.876
## 3 GLUCOSA 0.00368 0.00487 0.754 0.451
## 4 HbA1C -0.151 0.227 -0.663 0.507
## 5 SexMale -7.35 1.64 -4.48 0.00000764
## 6 EDAD 0.359 0.0685 5.24 0.000000162
## 7 CREATININA 46.2 8.44 5.47 0.0000000450
## 8 HDL 0.0193 0.0243 0.793 0.428
## 9 AlcoholYes 1.60 0.828 1.93 0.0531
## 10 Physical_ActivityYes 0.240 0.721 0.333 0.739
## 11 HypertensionYes -0.307 0.926 -0.331 0.740
## 12 Metabolic_SyndromeYes 0.396 0.894 0.443 0.658
## 13 SmokingYes -0.307 0.990 -0.310 0.757
AIC(modelo_recesivo_RAGE600, modelo_dominante_RAGE600, modelo_aditivo_RAGE600)
## df AIC
## modelo_recesivo_RAGE600 13 90.85568
## modelo_dominante_RAGE600 13 89.28253
## modelo_aditivo_RAGE600 14 91.27889
data$RAGE600 <- factor(data$RAGE600, levels = c("C/C", "C/T", "T/T"))
modelo_simple_RAGE600 <- glm(Renal_Dysfunction ~ RAGE600, data = data, family = binomial, na.action = na.exclude)
OR_simple <- exp(coef(modelo_simple_RAGE600))
CI_simple <- exp(confint(modelo_simple_RAGE600))
## Waiting for profiling to be done...
OR_simple_completo <- c(1, OR_simple)
names(OR_simple_completo)[1] <- "RAGE600C/C"
CI_simple_completo <- rbind(c(1, 1), CI_simple)
rownames(CI_simple_completo)[1] <- "RAGE600C/C"
OR_simple_completo
## RAGE600C/C (Intercept) RAGE600C/T RAGE600T/T
## 1.0000000 1.0517241 0.6614398 0.5282332
CI_simple_completo
## 2.5 % 97.5 %
## RAGE600C/C 1.0000000 1.000000
## (Intercept) 0.8546139 1.294766
## RAGE600C/T 0.3327802 1.285589
## RAGE600T/T 0.1596413 1.560152
data$RAGE624 <- factor(data$RAGE624, levels = c("A/A", "A/T", "T/T"))
modelo_simple_RAGE624 <- glm(Renal_Dysfunction ~ RAGE624, data = data, family = binomial, na.action = na.exclude)
OR_simple_RAGE624 <- exp(coef(modelo_simple_RAGE624))
CI_simple_RAGE624 <- exp(confint(modelo_simple_RAGE624))
## Waiting for profiling to be done...
OR_simple_RAGE624_completo <- c(1, OR_simple_RAGE624)
names(OR_simple_RAGE624_completo)[1] <- "RAGE624A/A"
CI_simple_RAGE624_completo <- rbind(c(1, 1), CI_simple_RAGE624)
rownames(CI_simple_RAGE624_completo)[1] <- "RAGE624A/A"
OR_simple_RAGE624_completo
## RAGE624A/A (Intercept) RAGE624A/T RAGE624T/T
## 1.0000000 1.1750000 0.7178538 0.9511890
CI_simple_RAGE624_completo
## 2.5 % 97.5 %
## RAGE624A/A 1.0000000 1.000000
## (Intercept) 0.8726444 1.585907
## RAGE624A/T 0.4793512 1.072612
## RAGE624T/T 0.4628013 1.967292
data$RAGE625 <- factor(data$RAGE625, levels = c("A/A", "A/G", "G/G"))
modelo_simple_RAGE625 <- glm(Renal_Dysfunction ~ RAGE625, data = data, family = binomial, na.action = na.exclude)
OR_simple_RAGE625 <- exp(coef(modelo_simple_RAGE625))
CI_simple_RAGE625 <- exp(confint(modelo_simple_RAGE625))
## Waiting for profiling to be done...
OR_simple_RAGE625_completo <- c(1, OR_simple_RAGE625)
names(OR_simple_RAGE625_completo)[1] <- "RAGE625A/A"
CI_simple_RAGE625_completo <- rbind(c(1, 1), CI_simple_RAGE625)
rownames(CI_simple_RAGE625_completo)[1] <- "RAGE625A/A"
OR_simple_RAGE625_completo
## RAGE625A/A (Intercept) RAGE625A/G RAGE625G/G
## 1.0000000 1.0597015 0.8762575 0.7339593
CI_simple_RAGE625_completo
## 2.5 % 97.5 %
## RAGE625A/A 1.0000000 1.000000
## (Intercept) 0.8368844 1.342568
## RAGE625A/G 0.5606131 1.367672
## RAGE625G/G 0.3457577 1.529281
table(data$RAGE625)
##
## A/A A/G G/G
## 276 108 32
table(data$RAGE624)
##
## A/A A/T T/T
## 174 212 36
table(data$RAGE600)
##
## C/C C/T T/T
## 357 39 14