library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
##  [1] "SEXO"          "EDAD"          "IMC"           "TFG"          
##  [5] "GLUCOSA"       "UREA"          "BUN"           "BUNCREA"      
##  [9] "CREATININA"    "COLESTEROL"    "HDL"           "LDL"          
## [13] "VLDL"          "TRIGLICERIDOS" "HbA1C"         "FUMA"         
## [17] "ALCOHOL"       "ACTFISICA"     "HTA"           "SXMET"        
## [21] "RAGE 624"      "RAGE625"       "RAGE600"
head(data)
## # A tibble: 6 × 23
##    SEXO  EDAD   IMC   TFG GLUCOSA  UREA   BUN BUNCREA CREATININA COLESTEROL
##   <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>   <dbl>      <dbl>      <dbl>
## 1     1    65  40.6  95.6      94  24.3  11.3    18.9        0.6        210
## 2     1    74  33.8  92.4     106  34.8  16.2    23.2        0.7        222
## 3     1    67  22.5  94.3     168  25.6  11.9    19.9        0.6        123
## 4     0    66  25.5  98.3     140  32.3  15.1    21.5        0.7         99
## 5     1    66  24.4  95.0      94  28.7  13.4    22.3        0.6        145
## 6     1    64  31   102.      118  25.5  11.9    23.8        0.5        171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## #   HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## #   SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(haplo.stats)
## Loading required package: arsenal
## 
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
##  $ SEXO         : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD         : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC          : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
##  $ TFG          : num [1:382] 95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA      : num [1:382] 94 106 168 140 94 ...
##  $ UREA         : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN          : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA      : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA   : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL   : num [1:382] 210 222 123 99 145 ...
##  $ HDL          : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL          : num [1:382] 131 142.4 79.2 52.6 71.6 ...
##  $ VLDL         : num [1:382] 32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
##  $ HbA1C        : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA         : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ALCOHOL      : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA    : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
##  $ HTA          : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
##  $ SXMET        : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE 624     : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
##  $ RAGE625      : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
##  $ RAGE600      : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Duplicar las columnas de genotipos para reflejar dos alelos 
geno <- data.frame(
  RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
  RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
  RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)

# Etiquetas para los loci (SNPs)
label <- c("RAGE 624", "RAGE625", "RAGE600")

keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]

# Variable de respuesta 
y <- data$TFG 
geno <- setupGeno(geno, miss.val = c(0, NA))

# Preparar los datos en un data.frame para el análisis
my.data <- data.frame(geno = geno,
                      SEXO = data$SEXO, EDAD = data$EDAD, IMC = data$IMC,
                      GLUCOSA = data$GLUCOSA, UREA = data$UREA, BUN = data$BUN,
                      BUNCREA = data$BUNCREA, CREATININA = data$CREATININA,
                      COLESTEROL = data$COLESTEROL, HDL = data$HDL, LDL = data$LDL,
                      VLDL = data$VLDL, TRIGLICERIDOS = data$TRIGLICERIDOS,
                      HbA1C = data$HbA1C, FUMA = data$FUMA, ALCOHOL = data$ALCOHOL,
                      ACTFISICA = data$ACTFISICA, HTA = data$HTA, y = y)


# Ejecutar el modelo
fit.gaus <- haplo.glm(y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + BUN + BUNCREA + CREATININA +
                      COLESTEROL + HDL + LDL + VLDL + TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL +
                      ACTFISICA + HTA + geno,
                      family = gaussian,
                      na.action = "na.geno.keep",
                      allele.lev = attributes(geno)$unique.alleles,  
                      data = my.data, locus.label = label,
                      control = haplo.glm.control(haplo.freq.min = 0.02))

summary(fit.gaus)
## 
## Call:
## haplo.glm(formula = y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + 
##     BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL + 
##     TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL + ACTFISICA + HTA + 
##     geno, family = gaussian, data = my.data, na.action = "na.geno.keep", 
##     locus.label = label, control = haplo.glm.control(haplo.freq.min = 0.02), 
##     allele.lev = attributes(geno)$unique.alleles)
## 
## Coefficients:
##                     coef         se     t.stat  pval
## (Intercept)    1.530e+02  9.996e-02  1.530e+03 0.000
## SEXO          -4.687e+00  1.574e+00 -2.977e+00 0.003
## EDAD          -9.129e-01  6.343e-02 -1.439e+01 0.000
## IMC            6.782e-02  1.195e-01  5.676e-01 0.571
## GLUCOSA        9.883e-03  1.082e-02  9.134e-01 0.362
## UREA          -4.253e-01  5.464e-01 -7.782e-01 0.437
## BUN            1.622e-02  1.183e+00  1.371e-02 0.989
## BUNCREA        7.228e-01  3.872e-02  1.867e+01 0.000
## CREATININA    -3.412e+00  6.730e-01 -5.069e+00 0.000
## COLESTEROL     6.335e-02  1.578e-01  4.015e-01 0.688
## HDL           -1.602e-01  1.382e-01 -1.159e+00 0.247
## LDL           -5.342e-02  1.578e-01 -3.385e-01 0.735
## VLDL           7.717e-05  2.415e-04  3.196e-01 0.749
## TRIGLICERIDOS -1.122e-02  3.296e-02 -3.404e-01 0.734
## HbA1C         -7.317e-01  4.720e-01 -1.550e+00 0.122
## FUMA          -1.092e+00  6.861e-01 -1.591e+00 0.113
## ALCOHOL        1.608e+00  1.560e+00  1.031e+00 0.303
## ACTFISICA      2.894e+00  1.532e+00  1.889e+00 0.060
## HTA           -2.733e+00  1.718e+00 -1.591e+00 0.113
## geno.1        -5.945e-01  9.079e-01 -6.548e-01 0.513
## geno.2         1.586e+00  5.684e-01  2.790e+00 0.006
## geno.4         2.126e-01  1.313e+00  1.619e-01 0.872
## geno.6        -5.402e+00  2.848e-01 -1.897e+01 0.000
## geno.11       -1.230e+00  1.164e+00 -1.057e+00 0.291
## geno.13       -1.587e+00  2.061e-01 -7.701e+00 0.000
## geno.16        1.559e+00  9.104e-01  1.713e+00 0.088
## geno.rare      1.434e+00  1.197e+00  1.197e+00 0.232
## 
## (Dispersion parameter for gaussian family taken to be 224.6813)
## 
##     Null deviance: 550318  on 381  degrees of freedom
## Residual deviance:  79762  on 355  degrees of freedom
## AIC: 3180.5
## 
## Number of Fisher Scoring iterations: 2
## 
## 
## Haplotypes:
##            RAGE.624 RAGE625 RAGE600 hap.freq
## geno.1            1       1       1  0.23298
## geno.2            1       1       2  0.03403
## geno.4            1       2       1  0.08901
## geno.6            1       3       1  0.02880
## geno.11           2       2       1  0.12042
## geno.13           2       3       1  0.02094
## geno.16           3       1       1  0.04712
## geno.rare         *       *       *  0.11780
## haplo.base        2       1       1  0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)

# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype)  # Muestra las combinaciones de alelos en cada haplotipo
##    loc-1 loc-2 loc-3
## 1      1     1     1
## 2      1     1     2
## 3      1     1     3
## 4      1     2     1
## 5      1     2     2
## 6      1     3     1
## 7      1     3     2
## 8      2     1     1
## 9      2     1     2
## 10     2     1     3
## 11     2     2     1
## 12     2     2     2
## 13     2     3     1
## 14     2     3     2
## 15     2     3     3
## 16     3     1     1
## 17     3     1     2
## 18     3     1     3
## 19     3     2     1
## 20     3     2     2
## 21     3     3     1
## 22     3     3     3
print(haplos$hap.prob)   # Muestra las frecuencias haplotípicas de cada haplotipo
##  [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
##  [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801