library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "SEXO" "EDAD" "IMC" "TFG"
## [5] "GLUCOSA" "UREA" "BUN" "BUNCREA"
## [9] "CREATININA" "COLESTEROL" "HDL" "LDL"
## [13] "VLDL" "TRIGLICERIDOS" "HbA1C" "FUMA"
## [17] "ALCOHOL" "ACTFISICA" "HTA" "SXMET"
## [21] "RAGE 624" "RAGE625" "RAGE600"
head(data)
## # A tibble: 6 × 23
## SEXO EDAD IMC TFG GLUCOSA UREA BUN BUNCREA CREATININA COLESTEROL
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 65 40.6 95.6 94 24.3 11.3 18.9 0.6 210
## 2 1 74 33.8 92.4 106 34.8 16.2 23.2 0.7 222
## 3 1 67 22.5 94.3 168 25.6 11.9 19.9 0.6 123
## 4 0 66 25.5 98.3 140 32.3 15.1 21.5 0.7 99
## 5 1 66 24.4 95.0 94 28.7 13.4 22.3 0.6 145
## 6 1 64 31 102. 118 25.5 11.9 23.8 0.5 171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## # HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## # SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(haplo.stats)
## Loading required package: arsenal
##
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
##
## %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
## $ SEXO : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
## $ EDAD : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
## $ IMC : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
## $ TFG : num [1:382] 95.7 92.3 94.3 98.3 95 ...
## $ GLUCOSA : num [1:382] 94 106 168 140 94 ...
## $ UREA : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
## $ BUN : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
## $ BUNCREA : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
## $ CREATININA : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
## $ COLESTEROL : num [1:382] 210 222 123 99 145 ...
## $ HDL : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
## $ LDL : num [1:382] 131 142.4 79.2 52.6 71.6 ...
## $ VLDL : num [1:382] 32 26.6 17.8 17.4 29.4 ...
## $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
## $ HbA1C : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
## $ FUMA : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
## $ ALCOHOL : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
## $ ACTFISICA : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
## $ HTA : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
## $ SXMET : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
## $ RAGE 624 : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
## $ RAGE625 : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
## $ RAGE600 : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Duplicar las columnas de genotipos para reflejar dos alelos
geno <- data.frame(
RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)
# Etiquetas para los loci (SNPs)
label <- c("RAGE 624", "RAGE625", "RAGE600")
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]
# Variable de respuesta
y <- data$TFG
geno <- setupGeno(geno, miss.val = c(0, NA))
# Preparar los datos en un data.frame para el análisis
my.data <- data.frame(geno = geno,
SEXO = data$SEXO, EDAD = data$EDAD, IMC = data$IMC,
GLUCOSA = data$GLUCOSA, UREA = data$UREA, BUN = data$BUN,
BUNCREA = data$BUNCREA, CREATININA = data$CREATININA,
COLESTEROL = data$COLESTEROL, HDL = data$HDL, LDL = data$LDL,
VLDL = data$VLDL, TRIGLICERIDOS = data$TRIGLICERIDOS,
HbA1C = data$HbA1C, FUMA = data$FUMA, ALCOHOL = data$ALCOHOL,
ACTFISICA = data$ACTFISICA, HTA = data$HTA, y = y)
# Ejecutar el modelo
fit.gaus <- haplo.glm(y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + BUN + BUNCREA + CREATININA +
COLESTEROL + HDL + LDL + VLDL + TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL +
ACTFISICA + HTA + geno,
family = gaussian,
na.action = "na.geno.keep",
allele.lev = attributes(geno)$unique.alleles,
data = my.data, locus.label = label,
control = haplo.glm.control(haplo.freq.min = 0.02))
summary(fit.gaus)
##
## Call:
## haplo.glm(formula = y ~ SEXO + EDAD + IMC + GLUCOSA + UREA +
## BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL +
## TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL + ACTFISICA + HTA +
## geno, family = gaussian, data = my.data, na.action = "na.geno.keep",
## locus.label = label, control = haplo.glm.control(haplo.freq.min = 0.02),
## allele.lev = attributes(geno)$unique.alleles)
##
## Coefficients:
## coef se t.stat pval
## (Intercept) 1.530e+02 9.996e-02 1.530e+03 0.000
## SEXO -4.687e+00 1.574e+00 -2.977e+00 0.003
## EDAD -9.129e-01 6.343e-02 -1.439e+01 0.000
## IMC 6.782e-02 1.195e-01 5.676e-01 0.571
## GLUCOSA 9.883e-03 1.082e-02 9.134e-01 0.362
## UREA -4.253e-01 5.464e-01 -7.782e-01 0.437
## BUN 1.622e-02 1.183e+00 1.371e-02 0.989
## BUNCREA 7.228e-01 3.872e-02 1.867e+01 0.000
## CREATININA -3.412e+00 6.730e-01 -5.069e+00 0.000
## COLESTEROL 6.335e-02 1.578e-01 4.015e-01 0.688
## HDL -1.602e-01 1.382e-01 -1.159e+00 0.247
## LDL -5.342e-02 1.578e-01 -3.385e-01 0.735
## VLDL 7.717e-05 2.415e-04 3.196e-01 0.749
## TRIGLICERIDOS -1.122e-02 3.296e-02 -3.404e-01 0.734
## HbA1C -7.317e-01 4.720e-01 -1.550e+00 0.122
## FUMA -1.092e+00 6.861e-01 -1.591e+00 0.113
## ALCOHOL 1.608e+00 1.560e+00 1.031e+00 0.303
## ACTFISICA 2.894e+00 1.532e+00 1.889e+00 0.060
## HTA -2.733e+00 1.718e+00 -1.591e+00 0.113
## geno.1 -5.945e-01 9.079e-01 -6.548e-01 0.513
## geno.2 1.586e+00 5.684e-01 2.790e+00 0.006
## geno.4 2.126e-01 1.313e+00 1.619e-01 0.872
## geno.6 -5.402e+00 2.848e-01 -1.897e+01 0.000
## geno.11 -1.230e+00 1.164e+00 -1.057e+00 0.291
## geno.13 -1.587e+00 2.061e-01 -7.701e+00 0.000
## geno.16 1.559e+00 9.104e-01 1.713e+00 0.088
## geno.rare 1.434e+00 1.197e+00 1.197e+00 0.232
##
## (Dispersion parameter for gaussian family taken to be 224.6813)
##
## Null deviance: 550318 on 381 degrees of freedom
## Residual deviance: 79762 on 355 degrees of freedom
## AIC: 3180.5
##
## Number of Fisher Scoring iterations: 2
##
##
## Haplotypes:
## RAGE.624 RAGE625 RAGE600 hap.freq
## geno.1 1 1 1 0.23298
## geno.2 1 1 2 0.03403
## geno.4 1 2 1 0.08901
## geno.6 1 3 1 0.02880
## geno.11 2 2 1 0.12042
## geno.13 2 3 1 0.02094
## geno.16 3 1 1 0.04712
## geno.rare * * * 0.11780
## haplo.base 2 1 1 0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)
# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype) # Muestra las combinaciones de alelos en cada haplotipo
## loc-1 loc-2 loc-3
## 1 1 1 1
## 2 1 1 2
## 3 1 1 3
## 4 1 2 1
## 5 1 2 2
## 6 1 3 1
## 7 1 3 2
## 8 2 1 1
## 9 2 1 2
## 10 2 1 3
## 11 2 2 1
## 12 2 2 2
## 13 2 3 1
## 14 2 3 2
## 15 2 3 3
## 16 3 1 1
## 17 3 1 2
## 18 3 1 3
## 19 3 2 1
## 20 3 2 2
## 21 3 3 1
## 22 3 3 3
print(haplos$hap.prob) # Muestra las frecuencias haplotípicas de cada haplotipo
## [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
## [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801