library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "SEXO" "EDAD" "IMC" "TFG"
## [5] "GLUCOSA" "UREA" "BUN" "BUNCREA"
## [9] "CREATININA" "COLESTEROL" "HDL" "LDL"
## [13] "VLDL" "TRIGLICERIDOS" "HbA1C" "FUMA"
## [17] "ALCOHOL" "ACTFISICA" "HTA" "SXMET"
## [21] "RAGE 624" "RAGE625" "RAGE600"
head(data)
## # A tibble: 6 × 23
## SEXO EDAD IMC TFG GLUCOSA UREA BUN BUNCREA CREATININA COLESTEROL
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 65 40.6 95.6 94 24.3 11.3 18.9 0.6 210
## 2 1 74 33.8 92.4 106 34.8 16.2 23.2 0.7 222
## 3 1 67 22.5 94.3 168 25.6 11.9 19.9 0.6 123
## 4 0 66 25.5 98.3 140 32.3 15.1 21.5 0.7 99
## 5 1 66 24.4 95.0 94 28.7 13.4 22.3 0.6 145
## 6 1 64 31 102. 118 25.5 11.9 23.8 0.5 171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## # HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## # SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(haplo.stats)
## Loading required package: arsenal
##
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
##
## %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
## $ SEXO : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
## $ TFG : num [1:382] 95.7 92.3 94.3 98.3 95 ...
## $ GLUCOSA : num [1:382] 94 106 168 140 94 ...
## $ UREA : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num [1:382] 210 222 123 99 145 ...
## $ HDL : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num [1:382] 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num [1:382] 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
## $ HbA1C : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ FUMA : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
## $ ALCOHOL : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
## $ HTA : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
## $ SXMET : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE 624 : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
## $ RAGE625 : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
## $ RAGE600 : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Duplicar las columnas de genotipos para reflejar dos alelos
geno <- data.frame(
RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)
# Etiquetas para los loci (SNPs)
label <- c("RAGE 624", "RAGE625", "RAGE600")
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]
# Variable de respuesta
y <- data$TFG
geno <- setupGeno(geno, miss.val = c(0, NA))
# Preparar los datos en un data.frame para el análisis
my.data <- data.frame(geno = geno,
SEXO = data$SEXO, EDAD = data$EDAD, IMC = data$IMC,
GLUCOSA = data$GLUCOSA, UREA = data$UREA, BUN = data$BUN,
BUNCREA = data$BUNCREA, CREATININA = data$CREATININA,
COLESTEROL = data$COLESTEROL, HDL = data$HDL, LDL = data$LDL,
VLDL = data$VLDL, TRIGLICERIDOS = data$TRIGLICERIDOS,
HbA1C = data$HbA1C, FUMA = data$FUMA, ALCOHOL = data$ALCOHOL,
ACTFISICA = data$ACTFISICA, HTA = data$HTA, y = y)
# Ejecutar el modelo
fit.gaus <- haplo.glm(y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + BUN + BUNCREA + CREATININA +
COLESTEROL + HDL + LDL + VLDL + TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL +
ACTFISICA + HTA + geno,
family = gaussian,
na.action = "na.geno.keep",
allele.lev = attributes(geno)$unique.alleles,
data = my.data, locus.label = label,
control = haplo.glm.control(haplo.freq.min = 0.02))
summary(fit.gaus)
##
## Call:
## haplo.glm(formula = y ~ SEXO + EDAD + IMC + GLUCOSA + UREA +
## BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL +
## TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL + ACTFISICA + HTA +
## geno, family = gaussian, data = my.data, na.action = "na.geno.keep",
## locus.label = label, control = haplo.glm.control(haplo.freq.min = 0.02),
## allele.lev = attributes(geno)$unique.alleles)
##
## Coefficients:
## coef se t.stat pval
## (Intercept) 1.528e+02 1.004e-01 1.521e+03 0.000
## SEXO -4.920e+00 1.577e+00 -3.120e+00 0.002
## EDAD -9.108e-01 6.352e-02 -1.434e+01 0.000
## IMC 6.843e-02 1.198e-01 5.713e-01 0.568
## GLUCOSA 9.804e-03 1.084e-02 9.047e-01 0.366
## UREA -4.239e-01 5.473e-01 -7.745e-01 0.439
## BUN 1.085e-02 1.185e+00 9.158e-03 0.993
## BUNCREA 7.236e-01 3.878e-02 1.866e+01 0.000
## CREATININA -3.406e+00 6.740e-01 -5.053e+00 0.000
## COLESTEROL 6.575e-02 1.580e-01 4.161e-01 0.678
## HDL -1.609e-01 1.384e-01 -1.162e+00 0.246
## LDL -5.556e-02 1.581e-01 -3.514e-01 0.725
## VLDL 7.424e-05 2.419e-04 3.069e-01 0.759
## TRIGLICERIDOS -1.171e-02 3.302e-02 -3.546e-01 0.723
## HbA1C -7.159e-01 4.728e-01 -1.514e+00 0.131
## FUMA -1.144e+00 6.919e-01 -1.653e+00 0.099
## ALCOHOL 1.561e+00 1.561e+00 1.000e+00 0.318
## ACTFISICA 2.905e+00 1.534e+00 1.893e+00 0.059
## HTA -2.691e+00 1.721e+00 -1.564e+00 0.119
## geno.1 -6.269e-01 9.094e-01 -6.894e-01 0.491
## geno.2 1.555e+00 5.609e-01 2.772e+00 0.006
## geno.4 1.803e-01 1.316e+00 1.370e-01 0.891
## geno.6 -5.448e+00 2.834e-01 -1.923e+01 0.000
## geno.11 -1.261e+00 1.165e+00 -1.082e+00 0.280
## geno.13 -1.614e+00 2.068e-01 -7.803e+00 0.000
## geno.16 1.530e+00 9.080e-01 1.685e+00 0.093
## geno.rare 1.406e+00 1.199e+00 1.173e+00 0.242
##
## (Dispersion parameter for gaussian family taken to be 225.3871)
##
## Null deviance: 551004 on 381 degrees of freedom
## Residual deviance: 80012 on 355 degrees of freedom
## AIC: 3181.7
##
## Number of Fisher Scoring iterations: 2
##
##
## Haplotypes:
## RAGE.624 RAGE625 RAGE600 hap.freq
## geno.1 1 1 1 0.23298
## geno.2 1 1 2 0.03403
## geno.4 1 2 1 0.08901
## geno.6 1 3 1 0.02880
## geno.11 2 2 1 0.12042
## geno.13 2 3 1 0.02094
## geno.16 3 1 1 0.04712
## geno.rare * * * 0.11780
## haplo.base 2 1 1 0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)
# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype) # Muestra las combinaciones de alelos en cada haplotipo
## loc-1 loc-2 loc-3
## 1 1 1 1
## 2 1 1 2
## 3 1 1 3
## 4 1 2 1
## 5 1 2 2
## 6 1 3 1
## 7 1 3 2
## 8 2 1 1
## 9 2 1 2
## 10 2 1 3
## 11 2 2 1
## 12 2 2 2
## 13 2 3 1
## 14 2 3 2
## 15 2 3 3
## 16 3 1 1
## 17 3 1 2
## 18 3 1 3
## 19 3 2 1
## 20 3 2 2
## 21 3 3 1
## 22 3 3 3
print(haplos$hap.prob) # Muestra las frecuencias haplotípicas de cada haplotipo
## [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
## [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801