library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("datos_haplotipos.xlsx")
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
## [1] "RAGE624"          "RAGE625"          "RAGE600"          "TFG"             
## [5] "HBA1c"            "Sex"              "Disfuncion_renal"
head(data)
## # A tibble: 6 × 7
##   RAGE624 RAGE625 RAGE600   TFG HBA1c   Sex Disfuncion_renal
##     <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl>            <dbl>
## 1       1       1       1  95.6   7.1     0                0
## 2       2       1       1  92.4   6.8     0                0
## 3       2       1       1  94.3   9.5     0                0
## 4       1       1       1  98.3   7.4     1                0
## 5       3       1       1  95.0   6.9     0                0
## 6       2       2       1 102.    5.5     0                0
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(haplo.stats)
## Loading required package: arsenal
## 
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
# Duplicar las columnas de genotipos para reflejar dos alelos (haplotipos diploides)
geno <- data.frame(
  RAGE624.a1 = data$RAGE624, RAGE624.a2 = data$RAGE624,
  RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
  RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)

# Etiquetas para los loci (SNPs)
label <- c("RAGE624", "RAGE625", "RAGE600")

# Remover filas con datos faltantes o ceros
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]

# Definir la variable respuesta 
y <- data$TFG  # Variable continua para evitar advertencias 
geno <- setupGeno(geno, miss.val = c(0, NA))
my.data <- data.frame(geno = geno, Sex = data$Sex, HBA1c = data$HBA1c, y = y)

# Realizar el análisis de haplotipos con haplo.glm
fit.gaus <- haplo.glm(y ~ Sex + HBA1c + geno, family = gaussian,
                      na.action = "na.geno.keep",
                      allele.lev = attributes(geno)$unique.alleles,
                      data = my.data, locus.label = label,
                      control = haplo.glm.control(haplo.freq.min = 0.02))

summary(fit.gaus)
## 
## Call:
## haplo.glm(formula = y ~ Sex + HBA1c + geno, family = gaussian, 
##     data = my.data, na.action = "na.geno.keep", locus.label = label, 
##     control = haplo.glm.control(haplo.freq.min = 0.02), allele.lev = attributes(geno)$unique.alleles)
## 
## Coefficients:
##                coef      se  t.stat  pval
## (Intercept) 39.0472  8.6689  4.5043 0.000
## Sex         -8.9212  3.7507 -2.3785 0.018
## HBA1c        5.7648  0.9938  5.8006 0.000
## geno.1      -1.0887  2.4502 -0.4443 0.657
## geno.2       5.2599  5.0543  1.0407 0.299
## geno.4       2.4258  3.3597  0.7220 0.471
## geno.9       0.6136  2.9768  0.2061 0.837
## geno.11      1.3542  4.3688  0.3100 0.757
## geno.rare    3.7093  3.3207  1.1170 0.265
## 
## (Dispersion parameter for gaussian family taken to be 1192.314)
## 
##     Null deviance: 466767  on 354  degrees of freedom
## Residual deviance: 412541  on 346  degrees of freedom
## AIC: 3533
## 
## Number of Fisher Scoring iterations: 2
## 
## 
## Haplotypes:
##            RAGE624 RAGE625 RAGE600 hap.freq
## geno.1           1       1       1  0.25070
## geno.2           1       1       2  0.03662
## geno.4           1       2       1  0.09577
## geno.9           2       2       1  0.13239
## geno.11          3       1       1  0.05070
## geno.rare        *       *       *  0.09859
## haplo.base       2       1       1  0.33521
# Calcular las frecuencias haplotípicas usando haplo.em
haplos <- haplo.em(geno)

# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype)  # Muestra las combinaciones de alelos en cada haplotipo
##    loc-1 loc-2 loc-3
## 1      1     1     1
## 2      1     1     2
## 3      1     1     3
## 4      1     2     1
## 5      1     2     2
## 6      2     1     1
## 7      2     1     2
## 8      2     1     3
## 9      2     2     1
## 10     2     2     2
## 11     3     1     1
## 12     3     1     2
## 13     3     1     3
## 14     3     2     1
## 15     3     2     2
print(haplos$hap.prob)   # Muestra las frecuencias haplotípicas de cada haplotipo
##  [1] 0.250704225 0.036619718 0.002816901 0.095774648 0.014084507 0.335211268
##  [7] 0.016901408 0.011267606 0.132394366 0.014084507 0.050704225 0.008450704
## [13] 0.005633803 0.019718310 0.005633803