library(readxl)
setwd(dir = "/Users/lorandacalderonzamora/Downloads/")
data <- read_excel("BASEHAPLOTIPO04_cleaned.xlsx")
## Warning: Expecting numeric in M379 / R379C13: got a date
## Warning: Expecting numeric in M380 / R380C13: got a date
class(data)
## [1] "tbl_df"     "tbl"        "data.frame"
names(data)
##  [1] "SEXO"          "EDAD"          "IMC"           "TFG"          
##  [5] "GLUCOSA"       "UREA"          "BUN"           "BUNCREA"      
##  [9] "CREATININA"    "COLESTEROL"    "HDL"           "LDL"          
## [13] "VLDL"          "TRIGLICERIDOS" "HbA1C"         "FUMA"         
## [17] "ALCOHOL"       "ACTFISICA"     "HTA"           "SXMET"        
## [21] "RAGE 624"      "RAGE625"       "RAGE600"
head(data)
## # A tibble: 6 × 23
##    SEXO  EDAD   IMC   TFG GLUCOSA  UREA   BUN BUNCREA CREATININA COLESTEROL
##   <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>   <dbl>      <dbl>      <dbl>
## 1     1    65  40.6  95.6      94  24.3  11.3    18.9        0.6        210
## 2     1    74  33.8  92.4     106  34.8  16.2    23.2        0.7        222
## 3     1    67  22.5  94.3     168  25.6  11.9    19.9        0.6        123
## 4     0    66  25.5  98.3     140  32.3  15.1    21.5        0.7         99
## 5     1    66  24.4  95.0      94  28.7  13.4    22.3        0.6        145
## 6     1    64  31   102.      118  25.5  11.9    23.8        0.5        171
## # ℹ 13 more variables: HDL <dbl>, LDL <dbl>, VLDL <dbl>, TRIGLICERIDOS <dbl>,
## #   HbA1C <dbl>, FUMA <dbl>, ALCOHOL <dbl>, ACTFISICA <dbl>, HTA <dbl>,
## #   SXMET <dbl>, `RAGE 624` <dbl>, RAGE625 <dbl>, RAGE600 <dbl>
library(rms)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(haplo.stats)
## Loading required package: arsenal
## 
## Attaching package: 'arsenal'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
str(data)
## tibble [382 × 23] (S3: tbl_df/tbl/data.frame)
##  $ SEXO         : num [1:382] 1 1 1 0 1 1 1 1 1 1 ...
##  $ EDAD         : num [1:382] 65 74 67 66 66 64 62 65 60 67 ...
##  $ IMC          : num [1:382] 40.6 33.8 22.5 25.5 24.4 31 27.9 27.4 26.1 24.8 ...
##  $ TFG          : num [1:382] 95.7 92.3 94.3 98.3 95 ...
##  $ GLUCOSA      : num [1:382] 94 106 168 140 94 ...
##  $ UREA         : num [1:382] 24.3 34.8 25.6 32.3 28.7 25.5 24.6 38.2 17.3 33.5 ...
##  $ BUN          : num [1:382] 11.3 16.2 11.9 15.1 13.4 ...
##  $ BUNCREA      : num [1:382] 18.9 23.2 19.9 21.5 22.3 ...
##  $ CREATININA   : num [1:382] 0.6 0.7 0.6 0.7 0.6 0.5 0.5 0.7 0.68 0.69 ...
##  $ COLESTEROL   : num [1:382] 210 222 123 99 145 ...
##  $ HDL          : num [1:382] 47 53 26 29 44 39 50 43 26.4 43.9 ...
##  $ LDL          : num [1:382] 131 142.4 79.2 52.6 71.6 ...
##  $ VLDL         : num [1:382] 32 26.6 17.8 17.4 29.4 ...
##  $ TRIGLICERIDOS: num [1:382] 160 133 89 87 147 ...
##  $ HbA1C        : num [1:382] 7.1 6.8 9.5 7.4 6.9 5.5 5.3 5.7 7.6 11.8 ...
##  $ FUMA         : num [1:382] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ALCOHOL      : num [1:382] 0 0 0 1 0 1 0 0 1 0 ...
##  $ ACTFISICA    : num [1:382] 1 0 1 1 0 1 0 1 1 1 ...
##  $ HTA          : num [1:382] 1 1 1 1 1 1 0 0 0 0 ...
##  $ SXMET        : num [1:382] 1 1 0 1 0 1 0 0 0 0 ...
##  $ RAGE 624     : num [1:382] 1 2 2 1 3 2 1 2 2 1 ...
##  $ RAGE625      : num [1:382] 1 1 1 1 1 2 1 1 1 1 ...
##  $ RAGE600      : num [1:382] 1 1 1 1 1 1 1 1 1 1 ...
# Duplicar las columnas de genotipos para reflejar dos alelos 
geno <- data.frame(
  RAGE624.a1 = data$`RAGE 624`, RAGE624.a2 = data$`RAGE 624`,
  RAGE625.a1 = data$RAGE625, RAGE625.a2 = data$RAGE625,
  RAGE600.a1 = data$RAGE600, RAGE600.a2 = data$RAGE600
)

# Etiquetas para los loci (SNPs)
label <- c("RAGE 624", "RAGE625", "RAGE600")

keep <- !apply(is.na(geno) | geno == 0, 1, any)
data <- data[keep, ]
geno <- geno[keep, ]

# Variable de respuesta 
y <- data$TFG 
geno <- setupGeno(geno, miss.val = c(0, NA))

# Preparar los datos en un data.frame para el análisis
my.data <- data.frame(geno = geno,
                      SEXO = data$SEXO, EDAD = data$EDAD, IMC = data$IMC,
                      GLUCOSA = data$GLUCOSA, UREA = data$UREA, BUN = data$BUN,
                      BUNCREA = data$BUNCREA, CREATININA = data$CREATININA,
                      COLESTEROL = data$COLESTEROL, HDL = data$HDL, LDL = data$LDL,
                      VLDL = data$VLDL, TRIGLICERIDOS = data$TRIGLICERIDOS,
                      HbA1C = data$HbA1C, FUMA = data$FUMA, ALCOHOL = data$ALCOHOL,
                      ACTFISICA = data$ACTFISICA, HTA = data$HTA, y = y)


# Ejecutar el modelo
fit.gaus <- haplo.glm(y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + BUN + BUNCREA + CREATININA +
                      COLESTEROL + HDL + LDL + VLDL + TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL +
                      ACTFISICA + HTA + geno,
                      family = gaussian,
                      na.action = "na.geno.keep",
                      allele.lev = attributes(geno)$unique.alleles,  
                      data = my.data, locus.label = label,
                      control = haplo.glm.control(haplo.freq.min = 0.02))

summary(fit.gaus)
## 
## Call:
## haplo.glm(formula = y ~ SEXO + EDAD + IMC + GLUCOSA + UREA + 
##     BUN + BUNCREA + CREATININA + COLESTEROL + HDL + LDL + VLDL + 
##     TRIGLICERIDOS + HbA1C + FUMA + ALCOHOL + ACTFISICA + HTA + 
##     geno, family = gaussian, data = my.data, na.action = "na.geno.keep", 
##     locus.label = label, control = haplo.glm.control(haplo.freq.min = 0.02), 
##     allele.lev = attributes(geno)$unique.alleles)
## 
## Coefficients:
##                     coef         se     t.stat  pval
## (Intercept)    1.530e+02  9.996e-02  1.530e+03 0.000
## SEXO          -4.687e+00  1.574e+00 -2.977e+00 0.003
## EDAD          -9.129e-01  6.343e-02 -1.439e+01 0.000
## IMC            6.782e-02  1.195e-01  5.676e-01 0.571
## GLUCOSA        9.883e-03  1.082e-02  9.134e-01 0.362
## UREA          -4.253e-01  5.464e-01 -7.782e-01 0.437
## BUN            1.622e-02  1.183e+00  1.371e-02 0.989
## BUNCREA        7.228e-01  3.872e-02  1.867e+01 0.000
## CREATININA    -3.412e+00  6.730e-01 -5.069e+00 0.000
## COLESTEROL     6.335e-02  1.578e-01  4.015e-01 0.688
## HDL           -1.602e-01  1.382e-01 -1.159e+00 0.247
## LDL           -5.342e-02  1.578e-01 -3.385e-01 0.735
## VLDL           7.717e-05  2.415e-04  3.196e-01 0.749
## TRIGLICERIDOS -1.122e-02  3.296e-02 -3.404e-01 0.734
## HbA1C         -7.317e-01  4.720e-01 -1.550e+00 0.122
## FUMA          -1.092e+00  6.861e-01 -1.591e+00 0.113
## ALCOHOL        1.608e+00  1.560e+00  1.031e+00 0.303
## ACTFISICA      2.894e+00  1.532e+00  1.889e+00 0.060
## HTA           -2.733e+00  1.718e+00 -1.591e+00 0.113
## geno.1        -5.945e-01  9.079e-01 -6.548e-01 0.513
## geno.2         1.586e+00  5.684e-01  2.790e+00 0.006
## geno.4         2.126e-01  1.313e+00  1.619e-01 0.872
## geno.6        -5.402e+00  2.848e-01 -1.897e+01 0.000
## geno.11       -1.230e+00  1.164e+00 -1.057e+00 0.291
## geno.13       -1.587e+00  2.061e-01 -7.701e+00 0.000
## geno.16        1.559e+00  9.104e-01  1.713e+00 0.088
## geno.rare      1.434e+00  1.197e+00  1.197e+00 0.232
## 
## (Dispersion parameter for gaussian family taken to be 224.6813)
## 
##     Null deviance: 550318  on 381  degrees of freedom
## Residual deviance:  79762  on 355  degrees of freedom
## AIC: 3180.5
## 
## Number of Fisher Scoring iterations: 2
## 
## 
## Haplotypes:
##            RAGE.624 RAGE625 RAGE600 hap.freq
## geno.1            1       1       1  0.23298
## geno.2            1       1       2  0.03403
## geno.4            1       2       1  0.08901
## geno.6            1       3       1  0.02880
## geno.11           2       2       1  0.12042
## geno.13           2       3       1  0.02094
## geno.16           3       1       1  0.04712
## geno.rare         *       *       *  0.11780
## haplo.base        2       1       1  0.30890
# Calcular las frecuencias haplotípicas
haplos <- haplo.em(geno)

# Mostrar los haplotipos y sus frecuencias
print(haplos$haplotype)  # Muestra las combinaciones de alelos en cada haplotipo
##    loc-1 loc-2 loc-3
## 1      1     1     1
## 2      1     1     2
## 3      1     1     3
## 4      1     2     1
## 5      1     2     2
## 6      1     3     1
## 7      1     3     2
## 8      2     1     1
## 9      2     1     2
## 10     2     1     3
## 11     2     2     1
## 12     2     2     2
## 13     2     3     1
## 14     2     3     2
## 15     2     3     3
## 16     3     1     1
## 17     3     1     2
## 18     3     1     3
## 19     3     2     1
## 20     3     2     2
## 21     3     3     1
## 22     3     3     3
print(haplos$hap.prob)   # Muestra las frecuencias haplotípicas de cada haplotipo
##  [1] 0.232984293 0.034031414 0.002617801 0.089005236 0.010471204 0.028795812
##  [7] 0.005235602 0.308900524 0.015706806 0.010471204 0.120418848 0.013089005
## [13] 0.020942408 0.005235602 0.010471204 0.047120419 0.007853403 0.005235602
## [19] 0.018324607 0.005235602 0.005235602 0.002617801
# Extraer los valores de OR manualmente
coefs <- coef(fit.gaus)
try_confint <- try(confint(fit.gaus), silent = TRUE)
## Waiting for profiling to be done...
if (inherits(try_confint, "try-error")) {
  se <- summary(fit.gaus)$coef[, 2]  
  CI_lower <- coefs - 1.96 * se
  CI_upper <- coefs + 1.96 * se
} else {
  CI <- confint(fit.gaus)
  CI_lower <- CI[, 1]
  CI_upper <- CI[, 2]
}

# Calcular OR 
if (fit.gaus$family$family == "binomial") {
  OR <- exp(coefs)
  CI_lower <- exp(CI_lower)
  CI_upper <- exp(CI_upper)
} else {
  OR <- coefs  
}

p_values <- summary(fit.gaus)$coef[, 4]

haplotype_results <- data.frame(
  Haplotype = rownames(summary(fit.gaus)$coef),
  OR = OR,
  CI_lower = CI_lower,
  CI_upper = CI_upper,
  p_value = p_values
)

haplotype_results <- haplotype_results[order(haplotype_results$p_value), ]
print(haplotype_results)
##                   Haplotype            OR      CI_lower      CI_upper
## (Intercept)     (Intercept)  1.529685e+02  1.527726e+02 153.164406428
## EDAD                   EDAD -9.128810e-01 -1.037197e+00  -0.788564592
## BUNCREA             BUNCREA  7.227616e-01  6.468754e-01   0.798647800
## geno.6               geno.6 -5.402046e+00 -5.960336e+00  -4.843755217
## geno.13             geno.13 -1.587351e+00 -1.991373e+00  -1.183329187
## CREATININA       CREATININA -3.411801e+00 -4.730958e+00  -2.092643985
## SEXO                   SEXO -4.687189e+00 -7.773065e+00  -1.601313711
## geno.2               geno.2  1.585819e+00  4.717615e-01   2.699876809
## ACTFISICA         ACTFISICA  2.894135e+00 -1.080708e-01   5.896340091
## geno.16             geno.16  1.559450e+00 -2.250102e-01   3.343910284
## FUMA                   FUMA -1.091626e+00 -2.436449e+00   0.253196051
## HTA                     HTA -2.732538e+00 -6.099765e+00   0.634689949
## HbA1C                 HbA1C -7.317370e-01 -1.656919e+00   0.193445058
## geno.rare         geno.rare  1.433784e+00 -9.129800e-01   3.780547481
## HDL                     HDL -1.601796e-01 -4.310463e-01   0.110687133
## geno.11             geno.11 -1.229685e+00 -3.510529e+00   1.051159805
## ALCOHOL             ALCOHOL  1.607682e+00 -1.449198e+00   4.664560691
## GLUCOSA             GLUCOSA  9.882823e-03 -1.132317e-02   0.031088820
## UREA                   UREA -4.252532e-01 -1.496268e+00   0.645761305
## geno.1               geno.1 -5.944661e-01 -2.373977e+00   1.185045121
## IMC                     IMC  6.782319e-02 -1.663787e-01   0.302025063
## COLESTEROL       COLESTEROL  6.334967e-02 -2.458696e-01   0.372568891
## TRIGLICERIDOS TRIGLICERIDOS -1.122117e-02 -7.583247e-02   0.053390131
## LDL                     LDL -5.342021e-02 -3.627794e-01   0.255939034
## VLDL                   VLDL  7.717476e-05 -3.961775e-04   0.000550527
## geno.4               geno.4  2.125876e-01 -2.361613e+00   2.786788311
## BUN                     BUN  1.621914e-02 -2.302597e+00   2.335035254
##                    p_value
## (Intercept)   0.000000e+00
## EDAD          0.000000e+00
## BUNCREA       0.000000e+00
## geno.6        0.000000e+00
## geno.13       1.361133e-13
## CREATININA    6.442627e-07
## SEXO          3.109863e-03
## geno.2        5.555551e-03
## ACTFISICA     5.964681e-02
## geno.16       8.761230e-02
## FUMA          1.125038e-01
## HTA           1.125990e-01
## HbA1C         1.219878e-01
## geno.rare     2.319163e-01
## HDL           2.472090e-01
## geno.11       2.913644e-01
## ALCOHOL       3.033326e-01
## GLUCOSA       3.616329e-01
## UREA          4.369514e-01
## geno.1        5.130460e-01
## IMC           5.706640e-01
## COLESTEROL    6.882607e-01
## TRIGLICERIDOS 7.337590e-01
## LDL           7.352218e-01
## VLDL          7.494929e-01
## geno.4        8.715046e-01
## BUN           9.890695e-01