library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "SEXO" "EDAD" "IMC" "TFG"
## [5] "GLUCOSA" "UREA" "BUN" "BUNCREA"
## [9] "CREATININA" "COLESTEROL" "HDL" "LDL"
## [13] "VLDL" "TRIGLICERIDOS" "HbA1C" "FUMA"
## [17] "ALCOHOL" "ACTFISICA" "HTA" "SXMET"
## [21] "RAGE 624" "RAGE625" "RAGE600"
head(data)
## # A tibble: 6 × 23
## SEXO EDAD IMC TFG GLUCOSA UREA BUN BUNCREA CREATININA COLESTEROL
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 65 40.6 95.6 94 24.3 11.3 18.9 0.6 210
## 2 1 74 33.8 92.4 106 34.8 16.2 23.2 0.7 222
## 3 1 67 22.5 94.3 168 25.6 11.9 19.9 0.6 123
## 4 0 66 25.5 98.3 140 32.3 15.1 21.5 0.7 99
## 5 1 66 24.4 95.0 94 28.7 13.4 22.3 0.6 145
## 6 1 64 31 102. 118 25.5 11.9 23.8 0.5 171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## # HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## # SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(haplo.stats)
## Loading required package: arsenal
##
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
##
## %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
## $ SEXO : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
## $ TFG : num [1:382] 95.7 92.3 94.3 98.3 95 ...
## $ GLUCOSA : num [1:382] 94 106 168 140 94 ...
## $ UREA : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num [1:382] 210 222 123 99 145 ...
## $ HDL : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num [1:382] 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num [1:382] 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
## $ HbA1C : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ FUMA : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
## $ ALCOHOL : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
## $ HTA : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
## $ SXMET : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE 624 : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
## $ RAGE625 : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
## $ RAGE600 : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Crear data.frame de genotipos con dos alelos
geno <- data.frame(
RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)
# Etiquetas para los loci (SNPs)
label <- c("RAGE624", "RAGE625", "RAGE600")
# Remover filas con datos faltantes o ceros
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]
# Definir la variable respuesta
y <- data$TFG # Variable continua
# Configuración de genotipos
geno <- setupGeno(geno, miss.val = c(0, NA))
# Crear el data.frame para el análisis
my.data <- data.frame(geno = geno, Sex = as.factor(data$SEXO), HBA1c = data$HbA1C, y = y)
# Realizar el análisis de haplotipos
fit.gaus <- haplo.glm(y ~ Sex + HBA1c + geno, family = gaussian,
na.action = "na.geno.keep",
allele.lev = attributes(geno)$unique.alleles,
data = my.data, locus.label = label,
control = haplo.glm.control(haplo.freq.min = 0.02))
# Resumen del modelo
summary(fit.gaus)
##
## Call:
## haplo.glm(formula = y ~ Sex + HBA1c + geno, family = gaussian,
## data = my.data, na.action = "na.geno.keep", locus.label = label,
## control = haplo.glm.control(haplo.freq.min = 0.02), allele.lev = attributes(geno)$unique.alleles)
##
## Coefficients:
## coef se t.stat pval
## (Intercept) 23.9084 8.7274 2.7395 0.006
## Sex1 9.5017 3.7308 2.5468 0.011
## HBA1c 6.4450 0.9692 6.6500 0.000
## geno.1 -0.5992 2.5345 -0.2364 0.813
## geno.2 5.6455 5.2233 1.0808 0.280
## geno.4 2.7329 3.4751 0.7864 0.432
## geno.6 -3.2162 5.6312 -0.5711 0.568
## geno.11 1.1491 3.1021 0.3704 0.711
## geno.13 11.5301 6.5195 1.7686 0.078
## geno.16 1.6691 4.5168 0.3695 0.712
## geno.rare 5.2972 3.1265 1.6943 0.091
##
## (Dispersion parameter for gaussian family taken to be 1273.201)
##
## Null deviance: 550318 on 381 degrees of freedom
## Residual deviance: 472358 on 371 degrees of freedom
## AIC: 3827.9
##
## Number of Fisher Scoring iterations: 2
##
##
## Haplotypes:
## RAGE624 RAGE625 RAGE600 hap.freq
## geno.1 1 1 1 0.23298
## geno.2 1 1 2 0.03403
## geno.4 1 2 1 0.08901
## geno.6 1 3 1 0.02880
## geno.11 2 2 1 0.12042
## geno.13 2 3 1 0.02094
## geno.16 3 1 1 0.04712
## geno.rare * * * 0.11780
## haplo.base 2 1 1 0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)
# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype) # Muestra las combinaciones de alelos en cada haplotipo
## loc-1 loc-2 loc-3
## 1 1 1 1
## 2 1 1 2
## 3 1 1 3
## 4 1 2 1
## 5 1 2 2
## 6 1 3 1
## 7 1 3 2
## 8 2 1 1
## 9 2 1 2
## 10 2 1 3
## 11 2 2 1
## 12 2 2 2
## 13 2 3 1
## 14 2 3 2
## 15 2 3 3
## 16 3 1 1
## 17 3 1 2
## 18 3 1 3
## 19 3 2 1
## 20 3 2 2
## 21 3 3 1
## 22 3 3 3
print(haplos$hap.prob) # Muestra las frecuencias haplotípicas de cada haplotipo
## [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
## [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801