library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
##  [1] "SEXO"          "EDAD"          "IMC"           "TFG"          
##  [5] "GLUCOSA"       "UREA"          "BUN"           "BUNCREA"      
##  [9] "CREATININA"    "COLESTEROL"    "HDL"           "LDL"          
## [13] "VLDL"          "TRIGLICERIDOS" "HbA1C"         "FUMA"         
## [17] "ALCOHOL"       "ACTFISICA"     "HTA"           "SXMET"        
## [21] "RAGE 624"      "RAGE625"       "RAGE600"
head(data)
## # A tibble: 6 × 23
##    SEXO  EDAD   IMC   TFG GLUCOSA  UREA   BUN BUNCREA CREATININA COLESTEROL
##   <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>   <dbl>      <dbl>      <dbl>
## 1     1    65  40.6  95.6      94  24.3  11.3    18.9        0.6        210
## 2     1    74  33.8  92.4     106  34.8  16.2    23.2        0.7        222
## 3     1    67  22.5  94.3     168  25.6  11.9    19.9        0.6        123
## 4     0    66  25.5  98.3     140  32.3  15.1    21.5        0.7         99
## 5     1    66  24.4  95.0      94  28.7  13.4    22.3        0.6        145
## 6     1    64  31   102.      118  25.5  11.9    23.8        0.5        171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## #   HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## #   SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(haplo.stats)
## Loading required package: arsenal
## 
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
##  $ SEXO         : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD         : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC          : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
##  $ TFG          : num [1:382] 95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA      : num [1:382] 94 106 168 140 94 ...
##  $ UREA         : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN          : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA      : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA   : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL   : num [1:382] 210 222 123 99 145 ...
##  $ HDL          : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL          : num [1:382] 131 142.4 79.2 52.6 71.6 ...
##  $ VLDL         : num [1:382] 32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
##  $ HbA1C        : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA         : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ALCOHOL      : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA    : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
##  $ HTA          : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
##  $ SXMET        : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE 624     : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
##  $ RAGE625      : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
##  $ RAGE600      : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Crear data.frame de genotipos con dos alelos
geno <- data.frame(
  RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
  RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
  RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)

# Etiquetas para los loci (SNPs)
label <- c("RAGE624", "RAGE625", "RAGE600")

# Remover filas con datos faltantes o ceros
keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]

# Definir la variable respuesta
y <- data$TFG  # Variable continua

# Configuración de genotipos
geno <- setupGeno(geno, miss.val = c(0, NA))

# Crear el data.frame para el análisis
my.data <- data.frame(geno = geno, Sex = as.factor(data$SEXO), HBA1c = data$HbA1C, y = y)

# Realizar el análisis de haplotipos
fit.gaus <- haplo.glm(y ~ Sex + HBA1c + geno, family = gaussian,
                      na.action = "na.geno.keep",
                      allele.lev = attributes(geno)$unique.alleles,
                      data = my.data, locus.label = label,
                      control = haplo.glm.control(haplo.freq.min = 0.02))

# Resumen del modelo
summary(fit.gaus)
## 
## Call:
## haplo.glm(formula = y ~ Sex + HBA1c + geno, family = gaussian, 
##     data = my.data, na.action = "na.geno.keep", locus.label = label, 
##     control = haplo.glm.control(haplo.freq.min = 0.02), allele.lev = attributes(geno)$unique.alleles)
## 
## Coefficients:
##                coef      se  t.stat  pval
## (Intercept) 23.9084  8.7274  2.7395 0.006
## Sex1         9.5017  3.7308  2.5468 0.011
## HBA1c        6.4450  0.9692  6.6500 0.000
## geno.1      -0.5992  2.5345 -0.2364 0.813
## geno.2       5.6455  5.2233  1.0808 0.280
## geno.4       2.7329  3.4751  0.7864 0.432
## geno.6      -3.2162  5.6312 -0.5711 0.568
## geno.11      1.1491  3.1021  0.3704 0.711
## geno.13     11.5301  6.5195  1.7686 0.078
## geno.16      1.6691  4.5168  0.3695 0.712
## geno.rare    5.2972  3.1265  1.6943 0.091
## 
## (Dispersion parameter for gaussian family taken to be 1273.201)
## 
##     Null deviance: 550318  on 381  degrees of freedom
## Residual deviance: 472358  on 371  degrees of freedom
## AIC: 3827.9
## 
## Number of Fisher Scoring iterations: 2
## 
## 
## Haplotypes:
##            RAGE624 RAGE625 RAGE600 hap.freq
## geno.1           1       1       1  0.23298
## geno.2           1       1       2  0.03403
## geno.4           1       2       1  0.08901
## geno.6           1       3       1  0.02880
## geno.11          2       2       1  0.12042
## geno.13          2       3       1  0.02094
## geno.16          3       1       1  0.04712
## geno.rare        *       *       *  0.11780
## haplo.base       2       1       1  0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)

# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype)  # Muestra las combinaciones de alelos en cada haplotipo
##    loc-1 loc-2 loc-3
## 1      1     1     1
## 2      1     1     2
## 3      1     1     3
## 4      1     2     1
## 5      1     2     2
## 6      1     3     1
## 7      1     3     2
## 8      2     1     1
## 9      2     1     2
## 10     2     1     3
## 11     2     2     1
## 12     2     2     2
## 13     2     3     1
## 14     2     3     2
## 15     2     3     3
## 16     3     1     1
## 17     3     1     2
## 18     3     1     3
## 19     3     2     1
## 20     3     2     2
## 21     3     3     1
## 22     3     3     3
print(haplos$hap.prob)   # Muestra las frecuencias haplotípicas de cada haplotipo
##  [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
##  [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801