library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("datos_haplotipos.xlsx")
class(data)
## [1] "tbl_df" "tbl" "data.frame"
names(data)
## [1] "RAGE624" "RAGE625" "RAGE600" "TFG"
## [5] "HBA1c" "Sex" "Disfuncion_renal"
head(data)
## # A tibble: 6 × 7
## RAGE624 RAGE625 RAGE600 TFG HBA1c Sex Disfuncion_renal
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 1 95.6 7.1 0 0
## 2 2 1 1 92.4 6.8 0 0
## 3 2 1 1 94.3 9.5 0 0
## 4 1 1 1 98.3 7.4 1 0
## 5 3 1 1 95.0 6.9 0 0
## 6 2 2 1 102. 5.5 0 0
library(rms)
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(haplo.stats)
## Loading required package: arsenal
##
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
##
## %nin%
# Duplicar las columnas de genotipos para reflejar dos alelos (haplotipos diploides)
geno <- data.frame(
RAGE624.a1 = data$RAGE624, RAGE624.a2 = data$RAGE624,
RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)
# Etiquetas para los loci (SNPs)
label <- c("RAGE624", "RAGE625", "RAGE600")
# Remover filas con datos faltantes o ceros
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]
# Definir la variable respuesta
y <- data$TFG # Variable continua para evitar advertencias
geno <- setupGeno(geno, miss.val = c(0, NA))
my.data <- data.frame(geno = geno, Sex = data$Sex, HBA1c = data$HBA1c, y = y)
# Realizar el análisis de haplotipos con haplo.glm
fit.gaus <- haplo.glm(y ~ Sex + HBA1c + geno, family = gaussian,
na.action = "na.geno.keep",
allele.lev = attributes(geno)$unique.alleles,
data = my.data, locus.label = label,
control = haplo.glm.control(haplo.freq.min = 0.02))
summary(fit.gaus)
##
## Call:
## haplo.glm(formula = y ~ Sex + HBA1c + geno, family = gaussian,
## data = my.data, na.action = "na.geno.keep", locus.label = label,
## control = haplo.glm.control(haplo.freq.min = 0.02), allele.lev = attributes(geno)$unique.alleles)
##
## Coefficients:
## coef se t.stat pval
## (Intercept) 39.0472 8.6689 4.5043 0.000
## Sex -8.9212 3.7507 -2.3785 0.018
## HBA1c 5.7648 0.9938 5.8006 0.000
## geno.1 -1.0887 2.4502 -0.4443 0.657
## geno.2 5.2599 5.0543 1.0407 0.299
## geno.4 2.4258 3.3597 0.7220 0.471
## geno.9 0.6136 2.9768 0.2061 0.837
## geno.11 1.3542 4.3688 0.3100 0.757
## geno.rare 3.7093 3.3207 1.1170 0.265
##
## (Dispersion parameter for gaussian family taken to be 1192.314)
##
## Null deviance: 466767 on 354 degrees of freedom
## Residual deviance: 412541 on 346 degrees of freedom
## AIC: 3533
##
## Number of Fisher Scoring iterations: 2
##
##
## Haplotypes:
## RAGE624 RAGE625 RAGE600 hap.freq
## geno.1 1 1 1 0.25070
## geno.2 1 1 2 0.03662
## geno.4 1 2 1 0.09577
## geno.9 2 2 1 0.13239
## geno.11 3 1 1 0.05070
## geno.rare * * * 0.09859
## haplo.base 2 1 1 0.33521
# Calcular las frecuencias haplotípicas usando haplo.em
haplos <- haplo.em(geno)
# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype) # Muestra las combinaciones de alelos en cada haplotipo
## loc-1 loc-2 loc-3
## 1 1 1 1
## 2 1 1 2
## 3 1 1 3
## 4 1 2 1
## 5 1 2 2
## 6 2 1 1
## 7 2 1 2
## 8 2 1 3
## 9 2 2 1
## 10 2 2 2
## 11 3 1 1
## 12 3 1 2
## 13 3 1 3
## 14 3 2 1
## 15 3 2 2
print(haplos$hap.prob) # Muestra las frecuencias haplotípicas de cada haplotipo
## [1] 0.250704225 0.036619718 0.002816901 0.095774648 0.014084507 0.335211268
## [7] 0.016901408 0.011267606 0.132394366 0.014084507 0.050704225 0.008450704
## [13] 0.005633803 0.019718310 0.005633803