library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
##  [1] "SEXO"          "EDAD"          "IMC"           "TFG"          
##  [5] "GLUCOSA"       "UREA"          "BUN"           "BUNCREA"      
##  [9] "CREATININA"    "COLESTEROL"    "HDL"           "LDL"          
## [13] "VLDL"          "TRIGLICERIDOS" "HbA1C"         "FUMA"         
## [17] "ALCOHOL"       "ACTFISICA"     "HTA"           "SXMET"        
## [21] "RAGE 624"      "RAGE625"       "RAGE600"
head(data)
## # A tibble: 6 × 23
##    SEXO  EDAD   IMC   TFG GLUCOSA  UREA   BUN BUNCREA CREATININA COLESTEROL
##   <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>   <dbl>      <dbl>      <dbl>
## 1     1    65  40.6  95.6      94  24.3  11.3    18.9        0.6        210
## 2     1    74  33.8  92.4     106  34.8  16.2    23.2        0.7        222
## 3     1    67  22.5  94.3     168  25.6  11.9    19.9        0.6        123
## 4     0    66  25.5  98.3     140  32.3  15.1    21.5        0.7         99
## 5     1    66  24.4  95.0      94  28.7  13.4    22.3        0.6        145
## 6     1    64  31   102.      118  25.5  11.9    23.8        0.5        171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## #   HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## #   SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(haplo.stats)
## Loading required package: arsenal
## 
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
##  $ SEXO         : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD         : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC          : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
##  $ TFG          : num [1:382] 95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA      : num [1:382] 94 106 168 140 94 ...
##  $ UREA         : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN          : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA      : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA   : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL   : num [1:382] 210 222 123 99 145 ...
##  $ HDL          : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL          : num [1:382] 131 142.4 79.2 52.6 71.6 ...
##  $ VLDL         : num [1:382] 32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
##  $ HbA1C        : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA         : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ALCOHOL      : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA    : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
##  $ HTA          : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
##  $ SXMET        : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE 624     : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
##  $ RAGE625      : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
##  $ RAGE600      : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Duplicar las columnas de genotipos para reflejar dos alelos 
geno <- data.frame(
  RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
  RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
  RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)

# Etiquetas para los loci (SNPs)
label <- c("RAGE 624", "RAGE625", "RAGE600")

keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]

# Variable de respuesta 
y <- data$TFG 
geno <- setupGeno(geno, miss.val = c(0, NA))

# Preparar los datos en un data.frame para el análisis
my.data <- data.frame(geno = geno,
                      SEXO = data$SEXO, EDAD = data$EDAD, IMC = data$IMC,
                      GLUCOSA = data$GLUCOSA, UREA = data$UREA, BUN = data$BUN,
                      BUNCREA = data$BUNCREA, CREATININA = data$CREATININA,
                      COLESTEROL = data$COLESTEROL, HDL = data$HDL, LDL = data$LDL,
                      VLDL = data$VLDL, TRIGLICERIDOS = data$TRIGLICERIDOS,
                      HbA1C = data$HbA1C, FUMA = data$FUMA, ALCOHOL = data$ALCOHOL,
                      ACTFISICA = data$ACTFISICA, HTA = data$HTA, y = y)


# Ejecutar el modelo
fit.gaus <- haplo.glm(y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + BUN + BUNCREA + CREATININA +
                      COLESTEROL + HDL + LDL + VLDL + TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL +
                      ACTFISICA + HTA + geno,
                      family = gaussian,
                      na.action = "na.geno.keep",
                      allele.lev = attributes(geno)$unique.alleles,  
                      data = my.data, locus.label = label,
                      control = haplo.glm.control(haplo.freq.min = 0.02))

summary(fit.gaus)
## 
## Call:
## haplo.glm(formula = y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + 
##     BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL + 
##     TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL + ACTFISICA + HTA + 
##     geno, family = gaussian, data = my.data, na.action = "na.geno.keep", 
##     locus.label = label, control = haplo.glm.control(haplo.freq.min = 0.02), 
##     allele.lev = attributes(geno)$unique.alleles)
## 
## Coefficients:
##                     coef         se     t.stat  pval
## (Intercept)    1.528e+02  1.004e-01  1.521e+03 0.000
## SEXO          -4.920e+00  1.577e+00 -3.120e+00 0.002
## EDAD          -9.108e-01  6.352e-02 -1.434e+01 0.000
## IMC            6.843e-02  1.198e-01  5.713e-01 0.568
## GLUCOSA        9.804e-03  1.084e-02  9.047e-01 0.366
## UREA          -4.239e-01  5.473e-01 -7.745e-01 0.439
## BUN            1.085e-02  1.185e+00  9.158e-03 0.993
## BUNCREA        7.236e-01  3.878e-02  1.866e+01 0.000
## CREATININA    -3.406e+00  6.740e-01 -5.053e+00 0.000
## COLESTEROL     6.575e-02  1.580e-01  4.161e-01 0.678
## HDL           -1.609e-01  1.384e-01 -1.162e+00 0.246
## LDL           -5.556e-02  1.581e-01 -3.514e-01 0.725
## VLDL           7.424e-05  2.419e-04  3.069e-01 0.759
## TRIGLICERIDOS -1.171e-02  3.302e-02 -3.546e-01 0.723
## HbA1C         -7.159e-01  4.728e-01 -1.514e+00 0.131
## FUMA          -1.144e+00  6.919e-01 -1.653e+00 0.099
## ALCOHOL        1.561e+00  1.561e+00  1.000e+00 0.318
## ACTFISICA      2.905e+00  1.534e+00  1.893e+00 0.059
## HTA           -2.691e+00  1.721e+00 -1.564e+00 0.119
## geno.1        -6.269e-01  9.094e-01 -6.894e-01 0.491
## geno.2         1.555e+00  5.609e-01  2.772e+00 0.006
## geno.4         1.803e-01  1.316e+00  1.370e-01 0.891
## geno.6        -5.448e+00  2.834e-01 -1.923e+01 0.000
## geno.11       -1.261e+00  1.165e+00 -1.082e+00 0.280
## geno.13       -1.614e+00  2.068e-01 -7.803e+00 0.000
## geno.16        1.530e+00  9.080e-01  1.685e+00 0.093
## geno.rare      1.406e+00  1.199e+00  1.173e+00 0.242
## 
## (Dispersion parameter for gaussian family taken to be 225.3871)
## 
##     Null deviance: 551004  on 381  degrees of freedom
## Residual deviance:  80012  on 355  degrees of freedom
## AIC: 3181.7
## 
## Number of Fisher Scoring iterations: 2
## 
## 
## Haplotypes:
##            RAGE.624 RAGE625 RAGE600 hap.freq
## geno.1            1       1       1  0.23298
## geno.2            1       1       2  0.03403
## geno.4            1       2       1  0.08901
## geno.6            1       3       1  0.02880
## geno.11           2       2       1  0.12042
## geno.13           2       3       1  0.02094
## geno.16           3       1       1  0.04712
## geno.rare         *       *       *  0.11780
## haplo.base        2       1       1  0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)

# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype)  # Muestra las combinaciones de alelos en cada haplotipo
##    loc-1 loc-2 loc-3
## 1      1     1     1
## 2      1     1     2
## 3      1     1     3
## 4      1     2     1
## 5      1     2     2
## 6      1     3     1
## 7      1     3     2
## 8      2     1     1
## 9      2     1     2
## 10     2     1     3
## 11     2     2     1
## 12     2     2     2
## 13     2     3     1
## 14     2     3     2
## 15     2     3     3
## 16     3     1     1
## 17     3     1     2
## 18     3     1     3
## 19     3     2     1
## 20     3     2     2
## 21     3     3     1
## 22     3     3     3
print(haplos$hap.prob)   # Muestra las frecuencias haplotípicas de cada haplotipo
##  [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
##  [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801