true

Integrantes:

Eisik, Magali

Gomez, Sergio

Querci, Marcia

Valenti, Florencia

Resultados del Protocolo ALLIC BFM 2012 para el tratamiento de la Leucemia Linfoblastica Aguda en Niños en Argentina

Objetivo General: Comparar enfermedad residual mínima al día 15 del tratamiento y de acuerdo a los niveles evaluar sobrevida libre de eventos.

library(readxl)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(dplyr)
library(ggplot2)
library(survival)
library(DT)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(arsenal)

path_datos<-file.choose()
datos<-read_excel(path_datos)


names(datos)

##  [1] "Sexo"        "Down"        "Blancos"     "Blastos"     "MO"         
##  [6] "SNC"         "Ploidia"     "Estirpe"     "RTA_PRED"    "CAT_ERM"    
## [11] "TiempoEFS"   "EFSstatus"   "Edad"        "MLL"         "TIEMPOSG"   
## [16] "SGSTATUS"    "Edad_cat"    "Blancos_cat" "TEL"

head(datos) %>%
  kbl() %>%
  kable_styling()

Sexo	Blancos	Blastos	MO	SNC	Ploidia	Estirpe	RTA_PRED	CAT_ERM	TiempoEFS	EFSstatus	Edad	TIEMPOSG	SGSTATUS	Edad_cat	Blancos_cat	TEL
1	3	2	90	1	1	B	1	NA	6	0	2	81	0	1	1	0
1	36	48	50	1	1	T	0	3	1	0	14	16	1	2	2	NA
0	10	95	98	1	2	B	0	3	5	0	4	67	0	1	1	0
1	8	98	98	1	2	B	1	1	10	0	3	131	0	1	1	0
0	12	22	80	1	2	B	1	1	1	1	1	28	1	1	1	NA
1	37	62	95	3	2	T	1	1	5	0	15	69	0	2	2	0

ncol(datos)

## [1] 19

nrow(datos)

## [1] 2096

summary(datos)

##       Sexo             Down            Blancos          Blastos         
##  Min.   :0.0000   Min.   :0.00000   Min.   :   0.00   Length:2096       
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:   4.00   Class :character  
##  Median :1.0000   Median :0.00000   Median :   9.00   Mode  :character  
##  Mean   :0.5592   Mean   :0.02099   Mean   :  42.17                     
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:  33.00                     
##  Max.   :1.0000   Max.   :1.00000   Max.   :1165.00                     
##                                                                         
##        MO              SNC           Ploidia        Estirpe         
##  Min.   : 10.00   Min.   :1.000   Min.   :1.000   Length:2096       
##  1st Qu.: 84.00   1st Qu.:1.000   1st Qu.:2.000   Class :character  
##  Median : 90.00   Median :1.000   Median :2.000   Mode  :character  
##  Mean   : 86.79   Mean   :1.088   Mean   :2.689                     
##  3rd Qu.: 96.00   3rd Qu.:1.000   3rd Qu.:4.000                     
##  Max.   :100.00   Max.   :3.000   Max.   :5.000                     
##                                                                     
##     RTA_PRED         CAT_ERM        TiempoEFS        EFSstatus     
##  Min.   :0.0000   Min.   :1.000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:1.000   1st Qu.: 1.000   1st Qu.:0.0000  
##  Median :1.0000   Median :2.000   Median : 3.000   Median :0.0000  
##  Mean   :0.8895   Mean   :1.784   Mean   : 3.605   Mean   :0.2557  
##  3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.: 6.000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :3.000   Max.   :11.000   Max.   :1.0000  
##  NA's   :5        NA's   :354                                      
##       Edad             MLL            TIEMPOSG         SGSTATUS     
##  Min.   : 0.000   Min.   :0.0000   Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.: 23.00   1st Qu.:0.0000  
##  Median : 5.000   Median :0.0000   Median : 47.00   Median :0.0000  
##  Mean   : 6.548   Mean   :0.0115   Mean   : 50.81   Mean   :0.2233  
##  3rd Qu.:10.000   3rd Qu.:0.0000   3rd Qu.: 77.00   3rd Qu.:0.0000  
##  Max.   :24.000   Max.   :1.0000   Max.   :133.00   Max.   :1.0000  
##                   NA's   :9                                         
##     Edad_cat      Blancos_cat         TEL       
##  Min.   :1.000   Min.   :1.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:0.000  
##  Median :1.000   Median :1.000   Median :0.000  
##  Mean   :1.463   Mean   :1.342   Mean   :0.131  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:0.000  
##  Max.   :2.000   Max.   :2.000   Max.   :1.000  
##                                  NA's   :401

Analisis exploratorio de datos

## Warning: NAs introducidos por coerción

## tableby Object
## 
## Function Call:
## tableby(formula = CAT_ERM ~ Sexo + Down + MLL + RTA_PRED + Estirpe + 
##     Edad_cat + TEL + Blancos_cat + MO + SNC + MO + Ploidia + 
##     Blastos, data = datos)
## 
## Variable(s):
## CAT_ERM ~ Sexo, Down, MLL, RTA_PRED, Estirpe, Edad_cat, TEL, Blancos_cat, MO, SNC, Ploidia, Blastos

## 
## 
## |             |    1 (N=656)     |    2 (N=806)     |    3 (N=280)     |  Total (N=1742)  | p value|
## |:------------|:----------------:|:----------------:|:----------------:|:----------------:|-------:|
## |Sexo         |                  |                  |                  |                  |   0.040|
## |-  0         |   317 (48.3%)    |   343 (42.6%)    |   115 (41.1%)    |   775 (44.5%)    |        |
## |-  1         |   339 (51.7%)    |   463 (57.4%)    |   165 (58.9%)    |   967 (55.5%)    |        |
## |Down         |                  |                  |                  |                  |   0.530|
## |-  0         |   645 (98.3%)    |   794 (98.5%)    |   273 (97.5%)    |   1712 (98.3%)   |        |
## |-  1         |    11 (1.7%)     |    12 (1.5%)     |     7 (2.5%)     |    30 (1.7%)     |        |
## |MLL          |                  |                  |                  |                  |   0.479|
## |-  N-Miss    |        4         |        4         |        0         |        8         |        |
## |-  0         |   642 (98.5%)    |   794 (99.0%)    |   278 (99.3%)    |   1714 (98.8%)   |        |
## |-  1         |    10 (1.5%)     |     8 (1.0%)     |     2 (0.7%)     |    20 (1.2%)     |        |
## |RTA_PRED     |                  |                  |                  |                  | < 0.001|
## |-  0         |    22 (3.4%)     |    71 (8.8%)     |   100 (35.7%)    |   193 (11.1%)    |        |
## |-  1         |   634 (96.6%)    |   735 (91.2%)    |   180 (64.3%)    |   1549 (88.9%)   |        |
## |Estirpe      |                  |                  |                  |                  | < 0.001|
## |-  B         |   598 (91.2%)    |   730 (90.6%)    |   217 (77.5%)    |   1545 (88.7%)   |        |
## |-  T         |    58 (8.8%)     |    76 (9.4%)     |    63 (22.5%)    |   197 (11.3%)    |        |
## |Edad_cat     |                  |                  |                  |                  | < 0.001|
## |-  1         |   351 (53.5%)    |   454 (56.3%)    |   116 (41.4%)    |   921 (52.9%)    |        |
## |-  2         |   305 (46.5%)    |   352 (43.7%)    |   164 (58.6%)    |   821 (47.1%)    |        |
## |TEL          |                  |                  |                  |                  |   0.062|
## |-  N-Miss    |       109        |       101        |        40        |       250        |        |
## |-  0         |   476 (87.0%)    |   596 (84.5%)    |   217 (90.4%)    |   1289 (86.4%)   |        |
## |-  1         |    71 (13.0%)    |   109 (15.5%)    |    23 (9.6%)     |   203 (13.6%)    |        |
## |Blancos_cat  |                  |                  |                  |                  | < 0.001|
## |-  1         |   470 (71.6%)    |   542 (67.2%)    |   134 (47.9%)    |   1146 (65.8%)   |        |
## |-  2         |   186 (28.4%)    |   264 (32.8%)    |   146 (52.1%)    |   596 (34.2%)    |        |
## |MO           |                  |                  |                  |                  |   0.004|
## |-  Mean (SD) | 85.294 (16.320)  | 87.618 (13.769)  | 88.114 (15.429)  | 86.823 (15.081)  |        |
## |-  Range     | 10.000 - 100.000 | 25.000 - 100.000 | 14.000 - 100.000 | 10.000 - 100.000 |        |
## |SNC          |                  |                  |                  |                  |   0.254|
## |-  1         |   625 (95.3%)    |   769 (95.4%)    |   258 (92.1%)    |   1652 (94.8%)   |        |
## |-  2         |     7 (1.1%)     |     9 (1.1%)     |     4 (1.4%)     |    20 (1.1%)     |        |
## |-  3         |    24 (3.7%)     |    28 (3.5%)     |    18 (6.4%)     |    70 (4.0%)     |        |
## |Ploidia      |                  |                  |                  |                  | < 0.001|
## |-  1         |    98 (14.9%)    |   169 (21.0%)    |    53 (18.9%)    |   320 (18.4%)    |        |
## |-  2         |   340 (51.8%)    |   313 (38.8%)    |   105 (37.5%)    |   758 (43.5%)    |        |
## |-  4         |   197 (30.0%)    |   303 (37.6%)    |   117 (41.8%)    |   617 (35.4%)    |        |
## |-  5         |    21 (3.2%)     |    21 (2.6%)     |     5 (1.8%)     |    47 (2.7%)     |        |
## |Blastos      |                  |                  |                  |                  | < 0.001|
## |-  N-Miss    |        7         |        6         |        0         |        13        |        |
## |-  Mean (SD) | 41.757 (34.808)  | 49.990 (34.967)  | 58.454 (36.262)  | 48.270 (35.583)  |        |
## |-  Range     | 0.000 - 100.000  | 0.000 - 100.000  | 0.000 - 100.000  | 0.000 - 100.000  |        |

## tableby Object
## 
## Function Call:
## tableby(formula = CAT_ERM ~ Sexo + Down + MLL + RTA_PRED + Estirpe + 
##     Edad + TEL + Blancos + MO + SNC + MO + Ploidia + Blastos, 
##     data = datos)
## 
## Variable(s):
## CAT_ERM ~ Sexo, Down, MLL, RTA_PRED, Estirpe, Edad, TEL, Blancos, MO, SNC, Ploidia, Blastos

## 
## 
## |             |    1 (N=656)     |    2 (N=806)     |    3 (N=280)     |  Total (N=1742)  | p value|
## |:------------|:----------------:|:----------------:|:----------------:|:----------------:|-------:|
## |Sexo         |                  |                  |                  |                  |   0.040|
## |-  0         |   317 (48.3%)    |   343 (42.6%)    |   115 (41.1%)    |   775 (44.5%)    |        |
## |-  1         |   339 (51.7%)    |   463 (57.4%)    |   165 (58.9%)    |   967 (55.5%)    |        |
## |Down         |                  |                  |                  |                  |   0.530|
## |-  0         |   645 (98.3%)    |   794 (98.5%)    |   273 (97.5%)    |   1712 (98.3%)   |        |
## |-  1         |    11 (1.7%)     |    12 (1.5%)     |     7 (2.5%)     |    30 (1.7%)     |        |
## |MLL          |                  |                  |                  |                  |   0.479|
## |-  N-Miss    |        4         |        4         |        0         |        8         |        |
## |-  0         |   642 (98.5%)    |   794 (99.0%)    |   278 (99.3%)    |   1714 (98.8%)   |        |
## |-  1         |    10 (1.5%)     |     8 (1.0%)     |     2 (0.7%)     |    20 (1.2%)     |        |
## |RTA_PRED     |                  |                  |                  |                  | < 0.001|
## |-  0         |    22 (3.4%)     |    71 (8.8%)     |   100 (35.7%)    |   193 (11.1%)    |        |
## |-  1         |   634 (96.6%)    |   735 (91.2%)    |   180 (64.3%)    |   1549 (88.9%)   |        |
## |Estirpe      |                  |                  |                  |                  | < 0.001|
## |-  B         |   598 (91.2%)    |   730 (90.6%)    |   217 (77.5%)    |   1545 (88.7%)   |        |
## |-  T         |    58 (8.8%)     |    76 (9.4%)     |    63 (22.5%)    |   197 (11.3%)    |        |
## |Edad         |                  |                  |                  |                  | < 0.001|
## |-  Mean (SD) |  6.552 (4.476)   |  6.347 (4.495)   |  7.821 (4.760)   |  6.661 (4.558)   |        |
## |-  Range     |  0.000 - 19.000  |  0.000 - 24.000  |  0.000 - 22.000  |  0.000 - 24.000  |        |
## |TEL          |                  |                  |                  |                  |   0.062|
## |-  N-Miss    |       109        |       101        |        40        |       250        |        |
## |-  0         |   476 (87.0%)    |   596 (84.5%)    |   217 (90.4%)    |   1289 (86.4%)   |        |
## |-  1         |    71 (13.0%)    |   109 (15.5%)    |    23 (9.6%)     |   203 (13.6%)    |        |
## |Blancos      |                  |                  |                  |                  | < 0.001|
## |-  Mean (SD) | 27.142 (55.788)  | 36.916 (84.408)  | 80.611 (141.502) | 40.258 (89.475)  |        |
## |-  Range     | 0.000 - 529.000  | 0.000 - 1165.000 | 0.000 - 957.000  | 0.000 - 1165.000 |        |
## |MO           |                  |                  |                  |                  |   0.004|
## |-  Mean (SD) | 85.294 (16.320)  | 87.618 (13.769)  | 88.114 (15.429)  | 86.823 (15.081)  |        |
## |-  Range     | 10.000 - 100.000 | 25.000 - 100.000 | 14.000 - 100.000 | 10.000 - 100.000 |        |
## |SNC          |                  |                  |                  |                  |   0.254|
## |-  1         |   625 (95.3%)    |   769 (95.4%)    |   258 (92.1%)    |   1652 (94.8%)   |        |
## |-  2         |     7 (1.1%)     |     9 (1.1%)     |     4 (1.4%)     |    20 (1.1%)     |        |
## |-  3         |    24 (3.7%)     |    28 (3.5%)     |    18 (6.4%)     |    70 (4.0%)     |        |
## |Ploidia      |                  |                  |                  |                  | < 0.001|
## |-  1         |    98 (14.9%)    |   169 (21.0%)    |    53 (18.9%)    |   320 (18.4%)    |        |
## |-  2         |   340 (51.8%)    |   313 (38.8%)    |   105 (37.5%)    |   758 (43.5%)    |        |
## |-  4         |   197 (30.0%)    |   303 (37.6%)    |   117 (41.8%)    |   617 (35.4%)    |        |
## |-  5         |    21 (3.2%)     |    21 (2.6%)     |     5 (1.8%)     |    47 (2.7%)     |        |
## |Blastos      |                  |                  |                  |                  | < 0.001|
## |-  N-Miss    |        7         |        6         |        0         |        13        |        |
## |-  Mean (SD) | 41.757 (34.808)  | 49.990 (34.967)  | 58.454 (36.262)  | 48.270 (35.583)  |        |
## |-  Range     | 0.000 - 100.000  | 0.000 - 100.000  | 0.000 - 100.000  | 0.000 - 100.000  |        |

##                         
##                          Overall             
##   n                      2096                
##   Edad (median [IQR])     5.00 [3.00, 10.00] 
##   MO (median [IQR])      90.00 [84.00, 96.00]
##   Blancos (median [IQR])  9.00 [4.00, 33.00] 
##   Blastos (median [IQR]) 50.00 [12.00, 82.50]

ver tabla by con datos faltantes de CAT ERM

##Analisis de supuestos y test

Variables categoricas

tabla1<-table(datos$CAT_ERM, datos$Down)
prop.table(tabla1)

##    
##               0           1
##   1 0.370264064 0.006314581
##   2 0.455797933 0.006888634
##   3 0.156716418 0.004018370

plot(tabla1, col = c("red", "blue"), main = "Categorias de ERM vs Down")

chisq.test(tabla1)

## Warning in chisq.test(tabla1): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tabla1
## X-squared = 1.2682, df = 2, p-value = 0.5304

#No se cumplen supuestos, se debe realizar el test de Fisher
fisher.test(tabla1)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  tabla1
## p-value = 0.479
## alternative hypothesis: two.sided

tabla2<-table(datos$Ploidia, datos$CAT_ERM)
plot(tabla2, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Ploidia")

chisq.test(tabla2)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla2
## X-squared = 35.184, df = 6, p-value = 3.97e-06

tabla3<-table(datos$RTA_PRED, datos$CAT_ERM)
plot(tabla3, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Rta predinisona")

chisq.test(tabla3)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla3
## X-squared = 216.44, df = 2, p-value < 2.2e-16

tabla4 <- table(datos$Sexo,datos$CAT_ERM)
plot(tabla4, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Sexo")

chisq.test(tabla4)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla4
## X-squared = 6.4488, df = 2, p-value = 0.03978

tabla5 <- table(datos$SNC,datos$CAT_ERM)
plot(tabla5, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs SNC")

#no se cumplen supuestos, se realiza el test de Fisher

#marca error, ver (no arroja resultado fisher)

tabla6 <- table(datos$MLL,datos$CAT_ERM)
plot(tabla6, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs MLL")

chisq.test(tabla6)

## Warning in chisq.test(tabla6): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tabla6
## X-squared = 1.4718, df = 2, p-value = 0.4791

#no se cumplen supuestos, se realiza el test de Fisher
fisher.test(tabla6)

## 
##  Fisher's Exact Test for Count Data
## 
## data:  tabla6
## p-value = 0.5936
## alternative hypothesis: two.sided

tabla7 <- table(datos$TEL,datos$CAT_ERM)
plot(tabla6, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs TEL")

chisq.test(tabla7)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla7
## X-squared = 5.5501, df = 2, p-value = 0.06235

tabla8 <- table(datos$Estirpe,datos$CAT_ERM)
plot(tabla8, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Estirpe")

chisq.test(tabla8)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla8
## X-squared = 41.784, df = 2, p-value = 8.448e-10

tabla9 <- table(datos$Edad_cat,datos$CAT_ERM)
plot(tabla9, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Edad")

chisq.test(tabla9)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla9
## X-squared = 18.683, df = 2, p-value = 8.769e-05

tabla10 <- table(datos$Blancos_cat,datos$CAT_ERM)
plot(tabla10, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Globulos Blancos")

chisq.test(tabla10)

## 
##  Pearson's Chi-squared test
## 
## data:  tabla10
## X-squared = 50.76, df = 2, p-value = 9.495e-12

Variables continuas

#Normalidad
#Edad
hist(datos$Edad)

shapiro.test(datos$Edad)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Edad
## W = 0.90726, p-value < 2.2e-16

#analisis de residuos
modedad=lm(Edad~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos <- resid(modedad)
r_estandarizados <- rstandard(modedad)
predichos <- fitted(modedad)
data_residuos <- as.data.frame(residuos, r_estandarizados, predichos)

## Warning in as.data.frame.numeric(residuos, r_estandarizados, predichos):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!

## Warning in if (!optional) names(value) <- nm: la condición tiene longitud > 1 y
## sólo el primer elemento será usado

#Shapiro de los residuos
shapiro.test(residuos)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuos
## W = 0.91802, p-value < 2.2e-16

qqnorm(residuos)
qqline(residuos)

#no da normal

#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos)+
  geom_histogram(
    mapping = aes(x=residuos)
  )

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(residuos)
qqline(residuos)

ggplot(data_residuos)+
  geom_point(
    mapping = aes(x=predichos, y=r_estandarizados))

#Blastos
hist(datos$Blastos)

shapiro.test(datos$Blastos)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Blastos
## W = 0.89253, p-value < 2.2e-16

#analisis de residuos
modblast=lm(Blastos~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos1 <- resid(modblast)
r_estandarizados1 <- rstandard(modblast)
predichos1 <- fitted(modblast)
data_residuos1 <- as.data.frame(residuos, r_estandarizados1, predichos1)

## Warning in as.data.frame.numeric(residuos, r_estandarizados1, predichos1):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!

## Warning in as.data.frame.numeric(residuos, r_estandarizados1, predichos1): la
## condición tiene longitud > 1 y sólo el primer elemento será usado

#Shapiro de los residuos
shapiro.test(residuos1)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuos1
## W = 0.91901, p-value < 2.2e-16

#no da normal


qqnorm(residuos1)
qqline(residuos1)

#Blancos
hist(datos$Blancos)

shapiro.test(datos$Blancos)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$Blancos
## W = 0.44578, p-value < 2.2e-16

#analisis de residuos
modblanc=lm(Blancos~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos2 <- resid(modblanc)
r_estandarizados2 <- rstandard(modblanc)
predichos2<- fitted(modblanc)
data_residuos2 <- as.data.frame(residuos2, r_estandarizados2, predichos2)

## Warning in as.data.frame.numeric(residuos2, r_estandarizados2, predichos2):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!

## Warning in as.data.frame.numeric(residuos2, r_estandarizados2, predichos2): la
## condición tiene longitud > 1 y sólo el primer elemento será usado

#Shapiro de los residuos
shapiro.test(residuos2)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuos2
## W = 0.52884, p-value < 2.2e-16

qqnorm(residuos2)
qqline(residuos2)

#no da normal

#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos2)+
  geom_histogram(
    mapping = aes(x=residuos2)
  )

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(residuos2)
qqline(residuos2)

ggplot(data_residuos2)+
  geom_point(
    mapping = aes(x=predichos2, y=r_estandarizados2))

#MO 
hist(datos$MO)

shapiro.test(datos$MO)

## 
##  Shapiro-Wilk normality test
## 
## data:  datos$MO
## W = 0.74845, p-value < 2.2e-16

#analisis de residuos
modMO=lm(MO~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos3 <- resid(modMO)
r_estandarizados3 <- rstandard(modMO)
predichos3<- fitted(modMO)
data_residuos3 <- as.data.frame(residuos3, r_estandarizados3, predichos3)

## Warning in as.data.frame.numeric(residuos3, r_estandarizados3, predichos3):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!

## Warning in as.data.frame.numeric(residuos3, r_estandarizados3, predichos3): la
## condición tiene longitud > 1 y sólo el primer elemento será usado

#Shapiro de los residuos
shapiro.test(residuos3)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuos3
## W = 0.75815, p-value < 2.2e-16

#no da normal
qqnorm(residuos3)
qqline(residuos3)

#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos3)+
  geom_histogram(
    mapping = aes(x=residuos3)
  )

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(residuos3)
qqline(residuos3)

ggplot(data_residuos3)+
  geom_point(
    mapping = aes(x=predichos3, y=r_estandarizados3))

#Evaluacion de homocedasticidad
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

#EDAD
leveneTest(Edad ~ CAT_ERM, data = datos)

## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value  Pr(>F)  
## group    2  4.1252 0.01632 *
##       1739                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#pv< 0.05, hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas

#Test

#Blastos
leveneTest(Blastos ~ CAT_ERM, data = datos)

## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value Pr(>F)
## group    2  0.0868 0.9168
##       1726

#pv>0.05 no hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas

#Test
#Blancos
leveneTest(Blancos ~ CAT_ERM, data = datos)

## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value    Pr(>F)    
## group    2  34.944 1.321e-15 ***
##       1739                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#pv<0.05 hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas

#Test
#MO
leveneTest(MO ~ CAT_ERM, data = datos)

## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value  Pr(>F)  
## group    2  3.4411 0.03225 *
##       1739                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#pv<0.05  hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas

#Test

Analisis e imputacion de datos faltantes

require(dplyr)
library(car)
library(lattice)
library(mice)

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(rlist)
library(naniar)
library(glmmTMB)

## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.1
## Current Matrix version is 1.4.0
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package

library(VIM)

## Loading required package: colorspace

## Loading required package: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Attaching package: 'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

require(ggplot2)
library(visdat)
library(naniar)
library(rpart)
library(rpart.plot)
require(arsenal)


library(visdat)
vis_miss(datos, sort=TRUE)

vis_miss(datos, cluster = TRUE)

vis_dat(datos)

colSums(is.na(datos))

##        Sexo        Down     Blancos     Blastos          MO         SNC 
##           0           0           0          13           0           0 
##     Ploidia     Estirpe    RTA_PRED     CAT_ERM   TiempoEFS   EFSstatus 
##           0           0           5         354           0           0 
##        Edad         MLL    TIEMPOSG    SGSTATUS    Edad_cat Blancos_cat 
##           0           9           0           0           0           0 
##         TEL 
##         401

n_miss(datos)

## [1] 782

n_complete(datos)

## [1] 39042

prop_miss(datos)

## [1] 0.0196364

pct_miss(datos)

## [1] 1.96364

prop_complete(datos)

## [1] 0.9803636

pct_complete(datos)

## [1] 98.03636

#Categorias de enfermedad residual, VA categorica
prop_miss(datos$CAT_ERM)

## [1] 0.1688931

pct_complete(datos$CAT_ERM)

## [1] 83.11069

#TEL, VA categorica
prop_miss(datos$TEL)

## [1] 0.1913168

pct_complete(datos$TEL)

## [1] 80.86832

#MLL, VA categorica
prop_miss(datos$MLL)

## [1] 0.004293893

pct_complete(datos$MLL)

## [1] 99.57061

#Rta a prednisona, VA categorica
prop_miss(datos$RTA_PRED)

## [1] 0.002385496

pct_complete(datos$RTA_PRED)

## [1] 99.76145

#Blastos, VA cuantitativa
prop_miss(datos$Blastos)

## [1] 0.00620229

pct_complete(datos$Blastos)

## [1] 99.37977

datos %>% miss_var_summary()

## # A tibble: 19 x 3
##    variable    n_miss pct_miss
##    <chr>        <int>    <dbl>
##  1 TEL            401   19.1  
##  2 CAT_ERM        354   16.9  
##  3 Blastos         13    0.620
##  4 MLL              9    0.429
##  5 RTA_PRED         5    0.239
##  6 Sexo             0    0    
##  7 Down             0    0    
##  8 Blancos          0    0    
##  9 MO               0    0    
## 10 SNC              0    0    
## 11 Ploidia          0    0    
## 12 Estirpe          0    0    
## 13 TiempoEFS        0    0    
## 14 EFSstatus        0    0    
## 15 Edad             0    0    
## 16 TIEMPOSG         0    0    
## 17 SGSTATUS         0    0    
## 18 Edad_cat         0    0    
## 19 Blancos_cat      0    0

datos %>% miss_case_summary()

## # A tibble: 2,096 x 3
##     case n_miss pct_miss
##    <int>  <int>    <dbl>
##  1   498      3     15.8
##  2   555      3     15.8
##  3  1350      3     15.8
##  4     9      2     10.5
##  5   313      2     10.5
##  6   376      2     10.5
##  7   408      2     10.5
##  8   411      2     10.5
##  9   412      2     10.5
## 10   419      2     10.5
## # ... with 2,086 more rows

vis_miss(datos)

vis_miss(datos, sort_miss = T, cluster = T)

gg_miss_var(datos)

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

gg_miss_var(datos, facet = CAT_ERM)

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

gg_miss_case(datos)

gg_miss_case(datos, facet = CAT_ERM)

#con tel

gg_miss_var(datos, facet = TEL)

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

gg_miss_case(datos, facet = TEL)

#con MLL
gg_miss_var(datos, facet = MLL)

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

gg_miss_case(datos, facet = MLL)

#con rta a prednisona
gg_miss_var(datos, facet = RTA_PRED)

## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

gg_miss_case(datos, facet = RTA_PRED)

gg_miss_upset(datos)

gg_miss_upset(datos, nset=20, nintersects=NA)

md.pattern(datos, rotate.names = T)

##      Sexo Down Blancos MO SNC Ploidia Estirpe TiempoEFS EFSstatus Edad TIEMPOSG
## 1472    1    1       1  1   1       1       1         1         1    1        1
## 249     1    1       1  1   1       1       1         1         1    1        1
## 200     1    1       1  1   1       1       1         1         1    1        1
## 148     1    1       1  1   1       1       1         1         1    1        1
## 12      1    1       1  1   1       1       1         1         1    1        1
## 1       1    1       1  1   1       1       1         1         1    1        1
## 8       1    1       1  1   1       1       1         1         1    1        1
## 1       1    1       1  1   1       1       1         1         1    1        1
## 3       1    1       1  1   1       1       1         1         1    1        1
## 2       1    1       1  1   1       1       1         1         1    1        1
##         0    0       0  0   0       0       0         0         0    0        0
##      SGSTATUS Edad_cat Blancos_cat RTA_PRED MLL Blastos CAT_ERM TEL    
## 1472        1        1           1        1   1       1       1   1   0
## 249         1        1           1        1   1       1       1   0   1
## 200         1        1           1        1   1       1       0   1   1
## 148         1        1           1        1   1       1       0   0   2
## 12          1        1           1        1   1       0       1   1   1
## 1           1        1           1        1   1       0       1   0   2
## 8           1        1           1        1   0       1       1   1   1
## 1           1        1           1        1   0       1       0   0   3
## 3           1        1           1        0   1       1       0   1   2
## 2           1        1           1        0   1       1       0   0   3
##             0        0           0        5   9      13     354 401 782

aggr(datos, col=c('navyblue','red'),numbers=TRUE, sortVars=TRUE, labels=names(datos), cex.axis=.7, gap=3, ylab=c("Histograma de missing data","Patron"))

## Warning in plot.aggr(res, ...): not enough horizontal space to display
## frequencies

## 
##  Variables sorted by number of missings: 
##     Variable       Count
##          TEL 0.191316794
##      CAT_ERM 0.168893130
##      Blastos 0.006202290
##          MLL 0.004293893
##     RTA_PRED 0.002385496
##         Sexo 0.000000000
##         Down 0.000000000
##      Blancos 0.000000000
##           MO 0.000000000
##          SNC 0.000000000
##      Ploidia 0.000000000
##      Estirpe 0.000000000
##    TiempoEFS 0.000000000
##    EFSstatus 0.000000000
##         Edad 0.000000000
##     TIEMPOSG 0.000000000
##     SGSTATUS 0.000000000
##     Edad_cat 0.000000000
##  Blancos_cat 0.000000000

aggr(datos, col=c('navyblue','red'),varheight=T, numbers=TRUE, sortVars=TRUE, labels=names(datos), cex.axis=.7, gap=3, ylab=c("Histograma de missing data","Patron"))

## 
##  Variables sorted by number of missings: 
##     Variable       Count
##          TEL 0.191316794
##      CAT_ERM 0.168893130
##      Blastos 0.006202290
##          MLL 0.004293893
##     RTA_PRED 0.002385496
##         Sexo 0.000000000
##         Down 0.000000000
##      Blancos 0.000000000
##           MO 0.000000000
##          SNC 0.000000000
##      Ploidia 0.000000000
##      Estirpe 0.000000000
##    TiempoEFS 0.000000000
##    EFSstatus 0.000000000
##         Edad 0.000000000
##     TIEMPOSG 0.000000000
##     SGSTATUS 0.000000000
##     Edad_cat 0.000000000
##  Blancos_cat 0.000000000

as_shadow(datos)

## # A tibble: 2,096 x 19
##    Sexo_NA Down_NA Blancos_NA Blastos_NA MO_NA SNC_NA Ploidia_NA Estirpe_NA
##    <fct>   <fct>   <fct>      <fct>      <fct> <fct>  <fct>      <fct>     
##  1 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  2 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  3 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  4 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  5 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  6 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  7 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  8 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
##  9 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
## 10 !NA     !NA     !NA        !NA        !NA   !NA    !NA        !NA       
## # ... with 2,086 more rows, and 11 more variables: RTA_PRED_NA <fct>,
## #   CAT_ERM_NA <fct>, TiempoEFS_NA <fct>, EFSstatus_NA <fct>, Edad_NA <fct>,
## #   MLL_NA <fct>, TIEMPOSG_NA <fct>, SGSTATUS_NA <fct>, Edad_cat_NA <fct>,
## #   Blancos_cat_NA <fct>, TEL_NA <fct>

glimpse(nabular(datos))

## Rows: 2,096
## Columns: 38
## $ Sexo           <fct> 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1~
## $ Down           <fct> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ Blancos        <dbl> 3, 36, 10, 8, 12, 37, 203, 3, 1, 3, 13, 30, 52, 130, 6,~
## $ Blastos        <dbl> 2, 48, 95, 98, 22, 62, 93, 21, 0, 8, 27, 90, 84, 100, 0~
## $ MO             <dbl> 90, 50, 98, 98, 80, 95, 80, 85, 87, 54, 95, 92, 99, 100~
## $ SNC            <fct> 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 3, 1~
## $ Ploidia        <fct> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ Estirpe        <fct> B, T, B, B, B, T, T, B, B, B, T, T, T, T, T, T, T, T, T~
## $ RTA_PRED       <fct> 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ CAT_ERM        <fct> NA, 3, 3, 1, 1, 1, 3, 1, NA, NA, NA, 1, 2, 2, 2, NA, 1,~
## $ TiempoEFS      <dbl> 6, 1, 5, 10, 1, 5, 0, 7, 5, 2, 5, 1, 5, 1, 3, 2, 5, 5, ~
## $ EFSstatus      <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0~
## $ Edad           <dbl> 2, 14, 4, 3, 1, 15, 13, 6, 8, 12, 5, 10, 3, 6, 14, 10, ~
## $ MLL            <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ TIEMPOSG       <dbl> 81, 16, 67, 131, 28, 69, 37, 86, 67, 36, 61, 23, 62, 26~
## $ SGSTATUS       <dbl> 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0~
## $ Edad_cat       <fct> 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2~
## $ Blancos_cat    <fct> 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2~
## $ TEL            <fct> 0, NA, 0, 0, NA, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ Sexo_NA        <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Down_NA        <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blancos_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blastos_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ MO_NA          <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ SNC_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Ploidia_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Estirpe_NA     <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ RTA_PRED_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ CAT_ERM_NA     <fct> NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, NA, NA, NA, !NA,~
## $ TiempoEFS_NA   <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ EFSstatus_NA   <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Edad_NA        <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ MLL_NA         <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ TIEMPOSG_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ SGSTATUS_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Edad_cat_NA    <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blancos_cat_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ TEL_NA         <fct> !NA, NA, !NA, !NA, NA, !NA, !NA, NA, NA, !NA, !NA, !NA,~

datos_prueba <- na.omit(datos)
n_inicial <- nrow(datos)
n_final <- nrow(datos_prueba)
(n_inicial- n_final)/n_inicial

## [1] 0.2977099

datos_imputados <- mice(datos, m = 10,  defaultMethod = c("pmm","logreg", "polyreg", "polr"), maxit = 5,printFlag = F)



densityplot(datos_imputados)

modelo1 <- glm.mids(CAT_ERM ~ Sexo + Down + MLL+ RTA_PRED + Estirpe+ Edad + TEL+ Blancos + MO + SNC+RTA_PRED +Ploidia + Blastos,data =  datos_imputados,family = binomial)

## Warning: Use with(imp, glm(yourmodel).

summary(pool(modelo1))

##           term     estimate    std.error  statistic         df      p.value
## 1  (Intercept)  1.197895790 0.4177689415  2.8673644  156.24620 4.710736e-03
## 2        Sexo1  0.185334249 0.0996360491  1.8601124  617.41333 6.334494e-02
## 3        Down1  0.139621549 0.3882097535  0.3596549  108.46106 7.198045e-01
## 4         MLL1 -0.385890895 0.4687881584 -0.8231669  239.54154 4.112318e-01
## 5    RTA_PRED1 -1.436631863 0.2325295630 -6.1782762  939.31168 9.641683e-10
## 6     EstirpeT -0.095501471 0.2072864817 -0.4607221  150.98616 6.456609e-01
## 7         Edad  0.002973366 0.0111912290  0.2656872 1023.39162 7.905336e-01
## 8         TEL1  0.175930225 0.1791213461  0.9821846   54.16466 3.303759e-01
## 9      Blancos  0.001432464 0.0008535576  1.6782276  444.33984 9.400580e-02
## 10          MO  0.006390716 0.0037530366  1.7028121  111.04476 9.140079e-02
## 11        SNC2 -0.114550480 0.4867394317 -0.2353425  680.26327 8.140137e-01
## 12        SNC3 -0.117906986 0.2882139377 -0.4090954  199.34170 6.829089e-01
## 13    Ploidia2 -0.543295758 0.1456493217 -3.7301633  767.42763 2.054558e-04
## 14    Ploidia4 -0.057316331 0.1498944596 -0.3823779 1148.61329 7.022518e-01
## 15    Ploidia5 -0.503153107 0.3086564950 -1.6301394  125.32086 1.055829e-01
## 16     Blastos  0.003945405 0.0017136440  2.3023482  210.30142 2.229394e-02

#hacer con edad y gb  como variables categoricas

modelo1 <- glm.mids(CAT_ERM~ Sexo + Down + MLL+ RTA_PRED + Estirpe + TEL + MO + SNC + RTA_PRED +Ploidia,data =  datos_imputados,family = binomial)

## Warning: Use with(imp, glm(yourmodel).

summary(pool(modelo1))

##           term    estimate   std.error   statistic         df      p.value
## 1  (Intercept)  1.26542243 0.413598229  3.05954510  152.56876 2.619337e-03
## 2        Sexo1  0.19371303 0.099361233  1.94958360  595.85322 5.169449e-02
## 3        Down1  0.17597528 0.385159338  0.45688956  110.31643 6.486488e-01
## 4         MLL1 -0.37717026 0.467859102 -0.80616207  236.23818 4.209602e-01
## 5    RTA_PRED1 -1.58591661 0.230543358 -6.87903840  727.89407 1.299494e-11
## 6     EstirpeT  0.10797762 0.190115141  0.56795910  132.51618 5.710238e-01
## 7         TEL1  0.16128011 0.178234908  0.90487386   52.13944 3.696946e-01
## 8           MO  0.00997229 0.003789159  2.63179532   67.36580 1.052036e-02
## 9         SNC2 -0.08504106 0.482473793 -0.17626047  678.06613 8.601419e-01
## 10        SNC3 -0.02567434 0.282963749 -0.09073367  204.44845 9.277930e-01
## 11    Ploidia2 -0.57149917 0.145818861 -3.91924043  669.10875 9.796815e-05
## 12    Ploidia4 -0.06370565 0.148679529 -0.42847626 1262.71163 6.683775e-01
## 13    Ploidia5 -0.50477263 0.307191176 -1.64318728  127.18784 1.028141e-01

fit <- with(datos_imputados, glm(CAT_ERM~ Sexo + Down + MLL+ RTA_PRED + Estirpe + TEL + MO + SNC + RTA_PRED +Ploidia,family = binomial))
print(pool(fit))

## Class: mipo    m = 10 
##           term  m    estimate         ubar            b            t dfcom
## 1  (Intercept) 10  1.26542243 1.315461e-01 3.592493e-02 1.710635e-01  2083
## 2        Sexo1 10  0.19371303 8.871114e-03 9.104917e-04 9.872655e-03  2083
## 3        Down1 10  0.17597528 1.075539e-01 3.708529e-02 1.483477e-01  2083
## 4         MLL1 10 -0.37717026 1.792397e-01 3.604768e-02 2.188921e-01  2083
## 5    RTA_PRED1 10 -1.58591661 4.850885e-02 4.219442e-03 5.315024e-02  2083
## 6     EstirpeT 10  0.10797762 2.713279e-02 8.191795e-03 3.614377e-02  2083
## 7         TEL1 10  0.16128011 1.885086e-02 1.174257e-02 3.176768e-02  2083
## 8           MO 10  0.00997229 9.243445e-06 4.649343e-06 1.435772e-05  2083
## 9         SNC2 10 -0.08504106 2.113086e-01 1.952033e-02 2.327810e-01  2083
## 10        SNC3 10 -0.02567434 6.432979e-02 1.430790e-02 8.006848e-02  2083
## 11    Ploidia2 10 -0.57149917 1.928195e-02 1.801078e-03 2.126314e-02  2083
## 12    Ploidia4 10 -0.06370565 2.098460e-02 1.019096e-03 2.210560e-02  2083
## 13    Ploidia5 10 -0.50477263 7.031550e-02 2.186447e-02 9.436642e-02  2083
##            df        riv     lambda        fmi
## 1   152.56876 0.30040749 0.23101027 0.24089644
## 2   595.85322 0.11289912 0.10144596 0.10444688
## 3   110.31643 0.37928717 0.27498782 0.28778406
## 4   236.23818 0.22122579 0.18115060 0.18799608
## 5   727.89407 0.09568122 0.08732579 0.08982320
## 6   132.51618 0.33210643 0.24930923 0.26038821
## 7    52.13944 0.68521169 0.40660274 0.42812626
## 8    67.36580 0.55328690 0.35620393 0.37450248
## 9   678.06613 0.10161614 0.09224278 0.09490848
## 10  204.44845 0.24465631 0.19656536 0.20431123
## 11  669.10875 0.10274821 0.09317468 0.09587313
## 12 1262.71163 0.05342042 0.05071140 0.05221140
## 13  127.18784 0.34204292 0.25486735 0.26631439

pool(fit)

## Class: mipo    m = 10 
##           term  m    estimate         ubar            b            t dfcom
## 1  (Intercept) 10  1.26542243 1.315461e-01 3.592493e-02 1.710635e-01  2083
## 2        Sexo1 10  0.19371303 8.871114e-03 9.104917e-04 9.872655e-03  2083
## 3        Down1 10  0.17597528 1.075539e-01 3.708529e-02 1.483477e-01  2083
## 4         MLL1 10 -0.37717026 1.792397e-01 3.604768e-02 2.188921e-01  2083
## 5    RTA_PRED1 10 -1.58591661 4.850885e-02 4.219442e-03 5.315024e-02  2083
## 6     EstirpeT 10  0.10797762 2.713279e-02 8.191795e-03 3.614377e-02  2083
## 7         TEL1 10  0.16128011 1.885086e-02 1.174257e-02 3.176768e-02  2083
## 8           MO 10  0.00997229 9.243445e-06 4.649343e-06 1.435772e-05  2083
## 9         SNC2 10 -0.08504106 2.113086e-01 1.952033e-02 2.327810e-01  2083
## 10        SNC3 10 -0.02567434 6.432979e-02 1.430790e-02 8.006848e-02  2083
## 11    Ploidia2 10 -0.57149917 1.928195e-02 1.801078e-03 2.126314e-02  2083
## 12    Ploidia4 10 -0.06370565 2.098460e-02 1.019096e-03 2.210560e-02  2083
## 13    Ploidia5 10 -0.50477263 7.031550e-02 2.186447e-02 9.436642e-02  2083
##            df        riv     lambda        fmi
## 1   152.56876 0.30040749 0.23101027 0.24089644
## 2   595.85322 0.11289912 0.10144596 0.10444688
## 3   110.31643 0.37928717 0.27498782 0.28778406
## 4   236.23818 0.22122579 0.18115060 0.18799608
## 5   727.89407 0.09568122 0.08732579 0.08982320
## 6   132.51618 0.33210643 0.24930923 0.26038821
## 7    52.13944 0.68521169 0.40660274 0.42812626
## 8    67.36580 0.55328690 0.35620393 0.37450248
## 9   678.06613 0.10161614 0.09224278 0.09490848
## 10  204.44845 0.24465631 0.19656536 0.20431123
## 11  669.10875 0.10274821 0.09317468 0.09587313
## 12 1262.71163 0.05342042 0.05071140 0.05221140
## 13  127.18784 0.34204292 0.25486735 0.26631439

#ver no da imputacion con cox, ni paquete dharma

Graficos

library(DT)
library(networkD3)

## 
## Attaching package: 'networkD3'

## The following object is masked from 'package:DT':
## 
##     JS

require(dplyr)
library(highcharter)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Highcharts (www.highcharts.com) is a Highsoft software product which is

## not free for commercial and Governmental use

## 
## Attaching package: 'highcharter'

## The following object is masked from 'package:networkD3':
## 
##     JS

library(js)

#Categorias de enfermedad residual

barplot(table(datos$CAT_ERM),                       
        main="Categorias de Enfermedad Residual",             
        xlab="ERM",                  
        ylab="Frecuencia",col=c("red","green","blue"))

#algun barplot
    
#Edad

ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Edad))+
  geom_boxplot(mapping = aes(fill=CAT_ERM))+
  geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Edad") + ggtitle("Edad vs ERM")

ggplot(data = datos, aes(x=Edad))+
  geom_density(color = 5,    
               lwd = 1,      
               linetype = 1)+ xlab("Edad")+ylab("Densidad")

#Blancos
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Blancos))+
  geom_boxplot(mapping = aes(fill=CAT_ERM))+
  geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Globulos Blancos")+ ggtitle("Edad vs ERM")

ggplot(data = datos, aes(x=Blancos))+
  geom_density(color = 5,    
               lwd = 1,      
               linetype = 1)+ xlab("Blancos")+ylab("Densidad")

#Blastos
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Blastos))+
  geom_boxplot(mapping = aes(fill=CAT_ERM))+
  geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Blastos en sangre periferica")

## Warning: Removed 13 rows containing non-finite values (stat_boxplot).

## Warning: Removed 13 rows containing missing values (geom_point).

ggplot(data = datos, aes(x=Blastos))+
  geom_density(color = 5,    
               lwd = 1,      
               linetype = 1)+ xlab("Blastos en sangre periferica")+ylab("Densidad")

## Warning: Removed 13 rows containing non-finite values (stat_density).

#MO

ggplot(data = datos, mapping = aes(x=CAT_ERM, y=MO))+
  geom_boxplot(mapping = aes(fill=CAT_ERM))+
  geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Blastos en medula osea")

ggplot(data = datos, aes(x=MO))+
  geom_density(color = 5,    
               lwd = 1,      
               linetype = 1)+ xlab("Blastos en Medula Osea")+ylab("Densidad")

#Down
ggplot(datos, aes(Down,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Down")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por  Down") +   
  theme_bw(base_size = 14)

datos$evolucion<- factor(datos$SGSTATUS, levels = c(0,1),labels = c("Vivo","Fallecido"))
#Sankey plot
datos$down<-factor(datos$Down, levels = c(0,1),labels = c("NO DOWN","DOWN"))
data1<- datos%>%dplyr::select(down, evolucion)
hchart(data_to_sankey(data1), "sankey", name = "Sobrevida segun sindrome de Down")

#Sexo
ggplot(datos, aes(Sexo,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Sexo")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por Sexo") +   
  theme_bw(base_size = 14)

#Sankey plot
datos$sexo<-factor(datos$Sexo, levels = c(1,0),labels = c("Masculino","Femenino"))

data<- datos%>%dplyr::select(sexo, evolucion)
hchart(data_to_sankey(data), "sankey", name = "Sobrevida segun sexo")

#SNC
ggplot(datos, aes(SNC,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("SNC")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por SNC") +   
  theme_bw(base_size = 14)

#Estirpe
ggplot(datos, aes(Estirpe,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Estirpe")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por Estirpe") +   
  theme_bw(base_size = 14)

#Sankey plot
data2<- datos%>%dplyr::select(Estirpe, evolucion)
hchart(data_to_sankey(data2), "sankey", name = "Sobrevida segun estirpe de leucemia")

#Respuesta a la prednisona
ggplot(datos, aes(RTA_PRED,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Rta Prednisona")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por Rta a la Prednisona") +   
  theme_bw(base_size = 14)

#Sankey plot
data3<- datos%>%dplyr::select(RTA_PRED, evolucion)
hchart(data_to_sankey(data3), "sankey", name = "Sobrevida segun respuesta a prednisona")

#TEL
ggplot(datos, aes(TEL,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("TEL")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por TEL") +   
  theme_bw(base_size = 14)

#Sankey plot
data4<- datos%>%dplyr::select(TEL, evolucion)
hchart(data_to_sankey(data4), "sankey", name = "Sobrevida segun TEL")

#MLL
ggplot(datos, aes(MLL,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("MLL")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por MLL") +   
  theme_bw(base_size = 14)

#Sankey plot
data5<- datos%>%dplyr::select(MLL, evolucion)
hchart(data_to_sankey(data5), "sankey", name = "Sobrevida segun MLL")

#Ploidia
ggplot(datos, aes(Ploidia,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Ploidia")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por Ploidia") +   
  theme_bw(base_size = 14)

#Edad como variable categorica
ggplot(datos, aes(Edad_cat,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Edad")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por edad") +   
  theme_bw(base_size = 14)

#Sankey plot

data6<- datos%>%dplyr::select(Edad_cat, evolucion)
hchart(data_to_sankey(data6), "sankey", name = "Sobrevida segun edad")

#Recuento de globulos blancos como variable categorica

ggplot(datos, aes(Blancos_cat,  fill=CAT_ERM)) + 
  geom_bar(position="dodge",colour="black") +
  xlab("Globulos Blancos")+ ylab("Frecuencia")+
  ggtitle("Distribución de ERM por globulos blancos") +   
  theme_bw(base_size = 14)

#Sankey plot

data7<- datos%>%dplyr::select(Blancos_cat, evolucion)
hchart(data_to_sankey(data7), "sankey", name = "Sobrevida segun globulos blancos")

sankeyNetworkOutput("TF-ceecs.html", width = "500px", height = "1000px")

#revisar sankey plot (en tel y mll que tienen NA)

#Swimmer plot

library(SwimmeR)
library(swimplot)

Observaciones de los graficos:

Se puede observar que la mayor cantidad de pacientes se encuentran en la categoria 2 (ERM intermedio).

Random forest para seleccion de variables

require(ggplot2)
require(dplyr)
library(randomForest)

## randomForest 4.7-1.1

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:ggplot2':
## 
##     margin

rf<-randomForest(TIEMPOSG~CAT_ERM + Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC + RTA_PRED +Ploidia + Blastos,data=datos, importance = TRUE,  na.action = na.roughfix)
str(rf)

## List of 18
##  $ call           : language randomForest(formula = TIEMPOSG ~ CAT_ERM + Sexo + Down + RTA_PRED + Estirpe +      Edad + TEL + Blancos + SNC + | __truncated__ ...
##  $ type           : chr "regression"
##  $ predicted      : Named num [1:2096] 58.1 25.9 45.5 61.1 63.8 ...
##   ..- attr(*, "names")= chr [1:2096] "1" "2" "3" "4" ...
##  $ mse            : num [1:500] 1481 1424 1415 1371 1331 ...
##  $ rsq            : num [1:500] -0.311 -0.261 -0.253 -0.214 -0.178 ...
##  $ oob.times      : int [1:2096] 172 163 181 176 169 184 177 177 199 186 ...
##  $ importance     : num [1:11, 1:2] 18.49 4.72 -3.17 38.32 6.79 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
##   .. ..$ : chr [1:2] "%IncMSE" "IncNodePurity"
##  $ importanceSD   : Named num [1:11] 2.363 1.865 0.625 1.806 1.298 ...
##   ..- attr(*, "names")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
##  $ localImportance: NULL
##  $ proximity      : NULL
##  $ ntree          : num 500
##  $ mtry           : num 3
##  $ forest         :List of 11
##   ..$ ndbigtree    : int [1:500] 883 705 1073 815 777 941 703 873 921 879 ...
##   ..$ nodestatus   : int [1:1097, 1:500] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ leftDaughter : int [1:1097, 1:500] 2 4 6 8 10 12 14 16 18 20 ...
##   ..$ rightDaughter: int [1:1097, 1:500] 3 5 7 9 11 13 15 17 19 21 ...
##   ..$ nodepred     : num [1:1097, 1:500] 51 33.6 52.9 25.6 54.1 ...
##   ..$ bestvar      : int [1:1097, 1:500] 4 10 8 6 8 10 10 5 2 8 ...
##   ..$ xbestsplit   : num [1:1097, 1:500] 1 13 133 11.5 305.5 ...
##   ..$ ncat         : Named int [1:11] 3 2 2 2 2 1 2 1 3 4 ...
##   .. ..- attr(*, "names")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
##   ..$ nrnodes      : int 1097
##   ..$ ntree        : num 500
##   ..$ xlevels      :List of 11
##   .. ..$ CAT_ERM : chr [1:3] "1" "2" "3"
##   .. ..$ Sexo    : chr [1:2] "0" "1"
##   .. ..$ Down    : chr [1:2] "0" "1"
##   .. ..$ RTA_PRED: chr [1:2] "0" "1"
##   .. ..$ Estirpe : chr [1:2] "B" "T"
##   .. ..$ Edad    : num 0
##   .. ..$ TEL     : chr [1:2] "0" "1"
##   .. ..$ Blancos : num 0
##   .. ..$ SNC     : chr [1:3] "1" "2" "3"
##   .. ..$ Ploidia : chr [1:4] "1" "2" "4" "5"
##   .. ..$ Blastos : num 0
##  $ coefs          : NULL
##  $ y              : Named num [1:2096] 81 16 67 131 28 69 37 86 67 36 ...
##   ..- attr(*, "names")= chr [1:2096] "1" "2" "3" "4" ...
##  $ test           : NULL
##  $ inbag          : NULL
##  $ terms          :Classes 'terms', 'formula'  language TIEMPOSG ~ CAT_ERM + Sexo + Down + RTA_PRED + Estirpe + Edad + TEL + Blancos +      SNC + RTA_PRED + Ploidia + Blastos
##   .. ..- attr(*, "variables")= language list(TIEMPOSG, CAT_ERM, Sexo, Down, RTA_PRED, Estirpe, Edad, TEL, Blancos,      SNC, Ploidia, Blastos)
##   .. ..- attr(*, "factors")= int [1:12, 1:11] 0 1 0 0 0 0 0 0 0 0 ...
##   .. .. ..- attr(*, "dimnames")=List of 2
##   .. .. .. ..$ : chr [1:12] "TIEMPOSG" "CAT_ERM" "Sexo" "Down" ...
##   .. .. .. ..$ : chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
##   .. ..- attr(*, "term.labels")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
##   .. ..- attr(*, "order")= int [1:11] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..- attr(*, "intercept")= num 0
##   .. ..- attr(*, "response")= int 1
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##   .. ..- attr(*, "predvars")= language list(TIEMPOSG, CAT_ERM, Sexo, Down, RTA_PRED, Estirpe, Edad, TEL, Blancos,      SNC, Ploidia, Blastos)
##   .. ..- attr(*, "dataClasses")= Named chr [1:12] "numeric" "factor" "factor" "factor" ...
##   .. .. ..- attr(*, "names")= chr [1:12] "TIEMPOSG" "CAT_ERM" "Sexo" "Down" ...
##  - attr(*, "class")= chr [1:2] "randomForest.formula" "randomForest"

rf$importance

##              %IncMSE IncNodePurity
## CAT_ERM   18.4914408      80949.43
## Sexo       4.7178089      48833.35
## Down      -3.1678033      15877.11
## RTA_PRED  38.3151458      50224.08
## Estirpe    6.7906239      28150.91
## Edad       1.4052645     244616.06
## TEL        3.0074175      30900.51
## Blancos   62.0673283     330776.92
## SNC        0.8186955      32903.26
## Ploidia  259.6809927     340200.01
## Blastos   47.2625019     312418.44

v<-as.vector(rf$importance[,1])
w<-(as.vector((colnames(datos))))
DF<-cbind(w,v)
DF<-as.data.frame(DF)
str(DF)

## 'data.frame':    22 obs. of  2 variables:
##  $ w: chr  "Sexo" "Down" "Blancos" "Blastos" ...
##  $ v: chr  "18.4914408201089" "4.71780894783562" "-3.16780333380769" "38.3151458135479" ...

DF<-DF %>% mutate(v=as.numeric(v),
              w=as.factor(w))

ggplot(DF, aes(x=reorder(w,v), y=v,fill=w))+ 
  geom_bar(stat="identity", position="dodge")+ coord_flip()+
  ylab("Importancia de variables")+
  xlab("")+
  theme(legend.position = "none")

Se plantea como modelo, efectuar una regresion de COX:

Regresion de COX con base de datos sin imputacion

library(survival)
library(ggplot2)
library(KMsurv)
library(ggfortify)
library ( survminer)

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

library(survMisc)

## 
## Attaching package: 'survMisc'

## The following object is masked from 'package:ggplot2':
## 
##     autoplot

library(base)
library(flexsurv)
library(coin)
library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose

library(Hmisc)

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:arsenal':
## 
##     %nin%

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

#Graficos

ckm<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ CAT_ERM,data=datos, conf.type="log-log")

#CATEGORIAS DE ENFERMEDAD RESIDUAL
ggsurvplot(fit = ckm, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "CAT_ERM",
                legend.labs = c("Estandar", "Intermedio", "Elevado"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#SEXO
ckm_sexo<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Sexo,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_sexo, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Sexo",
                legend.labs = c("Femenino","Masculino"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#Down
ckm_down<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Down,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_down, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Down",
                legend.labs = c("No","Si"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

#SNC
ckm_snc<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ SNC,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_snc, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "SNC",
                legend.labs = c("1","2","3"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#Ploidia
ckm_ploidia<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Ploidia,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_ploidia, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Ploidia",
                legend.labs = c("1","2","4","5"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

#Estirpe
ckm_estirpe<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Estirpe,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_estirpe, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Estirpe",
                legend.labs = c("B","T"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#RTA_pred
ckm_pred<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ RTA_PRED,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_pred, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Rta Prednisona",
                legend.labs = c("No","Si"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

#MLL

ckm_mll<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ MLL,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_mll, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "MLL",
                legend.labs = c("Ausente","Presente"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#TEL
ckm_tel<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ TEL,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_tel, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "TEL",
                legend.labs = c("Ausente","Presente"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#Edad cat
ckm_edad<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Edad_cat,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_edad, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Edad",
                legend.labs = c("Mayor a 6","Menor a 6"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

#Blancos cat

ckm_blancos<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Blancos_cat,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_blancos, data =datos, conf.int = T, title = "LLA",
          xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "GB",
                legend.labs = c("Menor a 20.000","Mayor a 20.000"), risk.table = "percentage",  ncensor.plot = F, surv.median.line = "hv")

## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

kmaids<-survfit(Surv(TIEMPOSG, SGSTATUS)~CAT_ERM,data=datos, type="kaplan-meier",conf.type = "log-log", conf.int = 0.95)
 
print(kmaids, print.rmean=T)

## Call: survfit(formula = Surv(TIEMPOSG, SGSTATUS) ~ CAT_ERM, data = datos, 
##     type = "kaplan-meier", conf.type = "log-log", conf.int = 0.95)
## 
##    354 observations deleted due to missingness 
##             n events rmean* se(rmean) median 0.95LCL 0.95UCL
## CAT_ERM=1 656     87  115.6      1.76     NA      NA      NA
## CAT_ERM=2 806    177  102.8      2.05     NA      NA      NA
## CAT_ERM=3 280    105   83.4      3.73     NA      67      NA
##     * restricted mean with upper limit =  133

summary(coxph( Surv(TIEMPOSG,SGSTATUS) ~ I(CAT_ERM=="2")+I(CAT_ERM=="3"), data=datos))

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ I(CAT_ERM == "2") + 
##     I(CAT_ERM == "3"), data = datos)
## 
##   n= 1742, number of events= 369 
##    (354 observations deleted due to missingness)
## 
##                         coef exp(coef) se(coef)     z Pr(>|z|)    
## I(CAT_ERM == "2")TRUE 0.5966    1.8160   0.1310 4.555 5.25e-06 ***
## I(CAT_ERM == "3")TRUE 1.2912    3.6371   0.1452 8.893  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## I(CAT_ERM == "2")TRUE     1.816     0.5507     1.405     2.348
## I(CAT_ERM == "3")TRUE     3.637     0.2749     2.736     4.834
## 
## Concordance= 0.628  (se = 0.014 )
## Likelihood ratio test= 77.78  on 2 df,   p=<2e-16
## Wald test            = 80.19  on 2 df,   p=<2e-16
## Score (logrank) test = 87.6  on 2 df,   p=<2e-16

#Modelos univariados

#Categorias de ERM
modelo_erm<- coxph( Surv(TIEMPOSG,SGSTATUS) ~ I(CAT_ERM=="2")+I(CAT_ERM=="3"), data=datos)
summary(modelo_erm)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ I(CAT_ERM == "2") + 
##     I(CAT_ERM == "3"), data = datos)
## 
##   n= 1742, number of events= 369 
##    (354 observations deleted due to missingness)
## 
##                         coef exp(coef) se(coef)     z Pr(>|z|)    
## I(CAT_ERM == "2")TRUE 0.5966    1.8160   0.1310 4.555 5.25e-06 ***
## I(CAT_ERM == "3")TRUE 1.2912    3.6371   0.1452 8.893  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## I(CAT_ERM == "2")TRUE     1.816     0.5507     1.405     2.348
## I(CAT_ERM == "3")TRUE     3.637     0.2749     2.736     4.834
## 
## Concordance= 0.628  (se = 0.014 )
## Likelihood ratio test= 77.78  on 2 df,   p=<2e-16
## Wald test            = 80.19  on 2 df,   p=<2e-16
## Score (logrank) test = 87.6  on 2 df,   p=<2e-16

#Modelo con SNC 
modelo_snc<- coxph( Surv(TIEMPOSG, SGSTATUS)~SNC, data=datos)
summary(modelo_snc)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ SNC, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##        coef exp(coef) se(coef)     z Pr(>|z|)   
## SNC2 0.5656    1.7606   0.3368 1.679  0.09311 . 
## SNC3 0.5882    1.8008   0.1951 3.015  0.00257 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## SNC2     1.761     0.5680    0.9098     3.407
## SNC3     1.801     0.5553    1.2286     2.640
## 
## Concordance= 0.519  (se = 0.006 )
## Likelihood ratio test= 9.86  on 2 df,   p=0.007
## Wald test            = 11.57  on 2 df,   p=0.003
## Score (logrank) test = 11.9  on 2 df,   p=0.003

#Modelo con Globulos blancos
modelo_blancos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos, data=datos)
summary(modelo_blancos)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##              coef exp(coef)  se(coef)     z Pr(>|z|)    
## Blancos 0.0030767 1.0030814 0.0002609 11.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Blancos     1.003     0.9969     1.003     1.004
## 
## Concordance= 0.615  (se = 0.014 )
## Likelihood ratio test= 82.27  on 1 df,   p=<2e-16
## Wald test            = 139.1  on 1 df,   p=<2e-16
## Score (logrank) test = 153.8  on 1 df,   p=<2e-16

#Modelo con MO
modelo_mo<-coxph( Surv(TIEMPOSG, SGSTATUS)~MO, data=datos)
summary(modelo_mo)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ MO, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##          coef  exp(coef)   se(coef)      z Pr(>|z|)
## MO -0.0000736  0.9999264  0.0031069 -0.024    0.981
## 
##    exp(coef) exp(-coef) lower .95 upper .95
## MO    0.9999          1    0.9939     1.006
## 
## Concordance= 0.482  (se = 0.014 )
## Likelihood ratio test= 0  on 1 df,   p=1
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 0  on 1 df,   p=1

#Modelo con Blastos
modelo_blastos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blastos, data=datos)
summary(modelo_blastos)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blastos, data = datos)
## 
##   n= 2083, number of events= 466 
##    (13 observations deleted due to missingness)
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)    
## Blastos 0.007007  1.007031 0.001343 5.217 1.82e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Blastos     1.007      0.993     1.004      1.01
## 
## Concordance= 0.58  (se = 0.014 )
## Likelihood ratio test= 27.78  on 1 df,   p=1e-07
## Wald test            = 27.22  on 1 df,   p=2e-07
## Score (logrank) test = 27.64  on 1 df,   p=1e-07

#Modelo con TEL
modelo_tel<- coxph( Surv(TIEMPOSG, SGSTATUS)~TEL, data=datos)
summary(modelo_tel)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ TEL, data = datos)
## 
##   n= 1695, number of events= 391 
##    (401 observations deleted due to missingness)
## 
##         coef exp(coef) se(coef)      z Pr(>|z|)    
## TEL1 -0.7011    0.4960   0.1930 -3.633  0.00028 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## TEL1     0.496      2.016    0.3398    0.7241
## 
## Concordance= 0.53  (se = 0.008 )
## Likelihood ratio test= 16.15  on 1 df,   p=6e-05
## Wald test            = 13.2  on 1 df,   p=3e-04
## Score (logrank) test = 13.75  on 1 df,   p=2e-04

#Modelo con MLL
modelo_mll<- coxph( Surv(TIEMPOSG, SGSTATUS)~MLL, data=datos)
summary(modelo_mll)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ MLL, data = datos)
## 
##   n= 2087, number of events= 467 
##    (9 observations deleted due to missingness)
## 
##         coef exp(coef) se(coef)      z Pr(>|z|)
## MLL1 -0.1211    0.8859   0.4497 -0.269    0.788
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## MLL1    0.8859      1.129     0.367     2.139
## 
## Concordance= 0.501  (se = 0.003 )
## Likelihood ratio test= 0.08  on 1 df,   p=0.8
## Wald test            = 0.07  on 1 df,   p=0.8
## Score (logrank) test = 0.07  on 1 df,   p=0.8

#Modelo con Estirpe
modelo_estirpe<- coxph( Surv(TIEMPOSG, SGSTATUS)~Estirpe, data=datos)
summary(modelo_estirpe)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Estirpe, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)    
## EstirpeT 0.6344    1.8858   0.1217 5.213 1.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## EstirpeT     1.886     0.5303     1.486     2.394
## 
## Concordance= 0.544  (se = 0.009 )
## Likelihood ratio test= 23.62  on 1 df,   p=1e-06
## Wald test            = 27.17  on 1 df,   p=2e-07
## Score (logrank) test = 28.1  on 1 df,   p=1e-07

#Modelo con Ploidia
modelo_ploidia<- coxph( Surv(TIEMPOSG, SGSTATUS)~Ploidia, data=datos)
summary(modelo_ploidia)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Ploidia, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)    
## Ploidia2 -1.3654    0.2553   0.1869 -7.306 2.74e-13 ***
## Ploidia4  0.9572    2.6044   0.1369  6.990 2.75e-12 ***
## Ploidia5 -3.1259    0.0439   1.0084 -3.100  0.00194 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## Ploidia2    0.2553      3.917  0.176982    0.3682
## Ploidia4    2.6044      0.384  1.991335    3.4061
## Ploidia5    0.0439     22.781  0.006082    0.3168
## 
## Concordance= 0.744  (se = 0.009 )
## Likelihood ratio test= 433.8  on 3 df,   p=<2e-16
## Wald test            = 284.4  on 3 df,   p=<2e-16
## Score (logrank) test = 433.4  on 3 df,   p=<2e-16

#Modelo con respuesta a la Prednisona
modelo_pred<- coxph( Surv(TIEMPOSG, SGSTATUS)~RTA_PRED, data=datos)
summary(modelo_pred)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ RTA_PRED, data = datos)
## 
##   n= 2091, number of events= 464 
##    (5 observations deleted due to missingness)
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)    
## RTA_PRED1 -1.1487    0.3170   0.1111 -10.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## RTA_PRED1     0.317      3.154     0.255    0.3942
## 
## Concordance= 0.578  (se = 0.01 )
## Likelihood ratio test= 85.64  on 1 df,   p=<2e-16
## Wald test            = 107  on 1 df,   p=<2e-16
## Score (logrank) test = 119.1  on 1 df,   p=<2e-16

#Modelo con Sexo
modelo_sexo<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo, data=datos)
summary(modelo_sexo)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)
## Sexo1 0.12922   1.13794  0.09398 1.375    0.169
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## Sexo1     1.138     0.8788    0.9465     1.368
## 
## Concordance= 0.515  (se = 0.012 )
## Likelihood ratio test= 1.9  on 1 df,   p=0.2
## Wald test            = 1.89  on 1 df,   p=0.2
## Score (logrank) test = 1.89  on 1 df,   p=0.2

#Modelo con edad como VA categorica
modelo_edad_cat<- coxph( Surv(TIEMPOSG, SGSTATUS)~Edad_cat, data=datos)
summary(modelo_edad_cat)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Edad_cat, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##              coef exp(coef) se(coef)     z Pr(>|z|)    
## Edad_cat2 0.42730   1.53311  0.09299 4.595 4.32e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Edad_cat2     1.533     0.6523     1.278      1.84
## 
## Concordance= 0.555  (se = 0.012 )
## Likelihood ratio test= 21.27  on 1 df,   p=4e-06
## Wald test            = 21.12  on 1 df,   p=4e-06
## Score (logrank) test = 21.44  on 1 df,   p=4e-06

#Modelo con globulos blancos como VA categorica
modelo_gb_cat<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos_cat, data=datos)
summary(modelo_gb_cat)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos_cat, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)    
## Blancos_cat2 0.70475   2.02333  0.09256 7.614 2.66e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## Blancos_cat2     2.023     0.4942     1.688     2.426
## 
## Concordance= 0.592  (se = 0.012 )
## Likelihood ratio test= 56.43  on 1 df,   p=6e-14
## Wald test            = 57.97  on 1 df,   p=3e-14
## Score (logrank) test = 60.4  on 1 df,   p=8e-15

#Modelo con edad como VA continua
modelo_edad<- coxph( Surv(TIEMPOSG, SGSTATUS)~Edad, data=datos)
summary(modelo_edad)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Edad, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)    
## Edad 0.049639  1.050891 0.009922 5.003 5.65e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## Edad     1.051     0.9516     1.031     1.072
## 
## Concordance= 0.557  (se = 0.015 )
## Likelihood ratio test= 23.94  on 1 df,   p=1e-06
## Wald test            = 25.03  on 1 df,   p=6e-07
## Score (logrank) test = 25.31  on 1 df,   p=5e-07

#Modelo con globulos blancos como VA continua
modelo_blancos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos, data=datos)
summary(modelo_blancos)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos, data = datos)
## 
##   n= 2096, number of events= 468 
## 
##              coef exp(coef)  se(coef)     z Pr(>|z|)    
## Blancos 0.0030767 1.0030814 0.0002609 11.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Blancos     1.003     0.9969     1.003     1.004
## 
## Concordance= 0.615  (se = 0.014 )
## Likelihood ratio test= 82.27  on 1 df,   p=<2e-16
## Wald test            = 139.1  on 1 df,   p=<2e-16
## Score (logrank) test = 153.8  on 1 df,   p=<2e-16

Tabla con HR de modelos univariados

De los analisis univariados efectuados, se excluyen para el analisis multivariado aquellas variables con pv> 0.2, MLL y MO.

#Modelo multivariado

#Modelo multivariado sin MLL ni MO 

summary(coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos + CAT_ERM, data=datos))

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + 
##     Blastos + CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      0.0320119  1.0325298  0.1156128  0.277 0.781865    
## Down1      1.2173951  3.3783760  0.2740249  4.443 8.89e-06 ***
## RTA_PRED1 -0.5315738  0.5876794  0.1613593 -3.294 0.000987 ***
## EstirpeT  -0.0750978  0.9276527  0.1709595 -0.439 0.660464    
## Edad       0.0199750  1.0201759  0.0125502  1.592 0.111474    
## TEL1      -0.4480192  0.6388924  0.2133808 -2.100 0.035762 *  
## Blancos    0.0020543  1.0020564  0.0005058  4.061 4.88e-05 ***
## SNC2       0.2882750  1.3341242  0.4183052  0.689 0.490729    
## SNC3       0.3566726  1.4285681  0.2286605  1.560 0.118799    
## Ploidia2  -1.2947669  0.2739617  0.2180549 -5.938 2.89e-09 ***
## Ploidia4   0.9981649  2.7132980  0.1540460  6.480 9.19e-11 ***
## Ploidia5  -2.1108716  0.1211323  1.0107490 -2.088 0.036760 *  
## Blastos   -0.0002697  0.9997303  0.0018073 -0.149 0.881364    
## CAT_ERM2   0.3678790  1.4446673  0.1436321  2.561 0.010429 *  
## CAT_ERM3   0.7351325  2.0857583  0.1759066  4.179 2.93e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0325     0.9685   0.82318    1.2951
## Down1        3.3784     0.2960   1.97450    5.7804
## RTA_PRED1    0.5877     1.7016   0.42834    0.8063
## EstirpeT     0.9277     1.0780   0.66354    1.2969
## Edad         1.0202     0.9802   0.99539    1.0456
## TEL1         0.6389     1.5652   0.42053    0.9706
## Blancos      1.0021     0.9979   1.00106    1.0031
## SNC2         1.3341     0.7496   0.58767    3.0287
## SNC3         1.4286     0.7000   0.91257    2.2363
## Ploidia2     0.2740     3.6501   0.17868    0.4200
## Ploidia4     2.7133     0.3686   2.00620    3.6696
## Ploidia5     0.1211     8.2554   0.01671    0.8782
## Blastos      0.9997     1.0003   0.99620    1.0033
## CAT_ERM2     1.4447     0.6922   1.09020    1.9144
## CAT_ERM3     2.0858     0.4794   1.47752    2.9444
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 406.8  on 15 df,   p=<2e-16
## Wald test            = 338.6  on 15 df,   p=<2e-16
## Score (logrank) test = 458.3  on 15 df,   p=<2e-16

coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos, data=datos)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + 
##     Blastos, data = datos)
## 
##                 coef  exp(coef)   se(coef)      z        p
## Sexo1      3.212e-02  1.033e+00  1.059e-01  0.303   0.7618
## Down1      1.169e+00  3.220e+00  2.539e-01  4.606 4.11e-06
## RTA_PRED1 -7.653e-01  4.652e-01  1.357e-01 -5.641 1.69e-08
## EstirpeT  -5.905e-02  9.427e-01  1.551e-01 -0.381   0.7033
## Edad       2.590e-02  1.026e+00  1.159e-02  2.235   0.0254
## TEL1      -3.690e-01  6.914e-01  1.972e-01 -1.872   0.0613
## Blancos    2.085e-03  1.002e+00  4.166e-04  5.004 5.60e-07
## SNC2       3.270e-01  1.387e+00  3.628e-01  0.901   0.3675
## SNC3       4.102e-01  1.507e+00  2.086e-01  1.967   0.0492
## Ploidia2  -1.330e+00  2.644e-01  2.090e-01 -6.363 1.97e-10
## Ploidia4   1.040e+00  2.830e+00  1.462e-01  7.113 1.13e-12
## Ploidia5  -2.548e+00  7.821e-02  1.010e+00 -2.524   0.0116
## Blastos   -8.867e-05  9.999e-01  1.636e-03 -0.054   0.9568
## 
## Likelihood ratio test=481.2  on 13 df, p=< 2.2e-16
## n= 1680, number of events= 386 
##    (416 observations deleted due to missingness)

#con edad y gb como variables categoricas
modelo_mult<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad_cat + TEL+ Blancos_cat  + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo_mult)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + Edad_cat + TEL + Blancos_cat + SNC + RTA_PRED + 
##     Ploidia + Blastos + CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                    coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1         0.0218378  1.0220780  0.1155556  0.189  0.85011    
## Down1         1.1497799  3.1574980  0.2777192  4.140 3.47e-05 ***
## RTA_PRED1    -0.5089549  0.6011235  0.1590766 -3.199  0.00138 ** 
## EstirpeT      0.0468603  1.0479756  0.1636782  0.286  0.77465    
## Edad_cat2     0.0790463  1.0822544  0.1165434  0.678  0.49761    
## TEL1         -0.4917902  0.6115307  0.2141550 -2.296  0.02165 *  
## Blancos_cat2  0.3682409  1.4451901  0.1490434  2.471  0.01349 *  
## SNC2          0.4035746  1.4971670  0.4168909  0.968  0.33302    
## SNC3          0.5695412  1.7674559  0.2197318  2.592  0.00954 ** 
## Ploidia2     -1.3034923  0.2715817  0.2175331 -5.992 2.07e-09 ***
## Ploidia4      0.9800643  2.6646276  0.1535633  6.382 1.75e-10 ***
## Ploidia5     -2.0791775  0.1250330  1.0106309 -2.057  0.03966 *  
## Blastos      -0.0005152  0.9994850  0.0020302 -0.254  0.79968    
## CAT_ERM2      0.3756233  1.4558985  0.1438508  2.611  0.00902 ** 
## CAT_ERM3      0.7632108  2.1451529  0.1747163  4.368 1.25e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## Sexo1           1.0221     0.9784   0.81493    1.2819
## Down1           3.1575     0.3167   1.83210    5.4417
## RTA_PRED1       0.6011     1.6636   0.44011    0.8211
## EstirpeT        1.0480     0.9542   0.76038    1.4444
## Edad_cat2       1.0823     0.9240   0.86125    1.3600
## TEL1            0.6115     1.6352   0.40191    0.9305
## Blancos_cat2    1.4452     0.6920   1.07909    1.9355
## SNC2            1.4972     0.6679   0.66132    3.3894
## SNC3            1.7675     0.5658   1.14898    2.7188
## Ploidia2        0.2716     3.6821   0.17731    0.4160
## Ploidia4        2.6646     0.3753   1.97207    3.6004
## Ploidia5        0.1250     7.9979   0.01725    0.9063
## Blastos         0.9995     1.0005   0.99552    1.0035
## CAT_ERM2        1.4559     0.6869   1.09821    1.9301
## CAT_ERM3        2.1452     0.4662   1.52314    3.0212
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 397.3  on 15 df,   p=<2e-16
## Wald test            = 323.8  on 15 df,   p=<2e-16
## Score (logrank) test = 433.3  on 15 df,   p=<2e-16

#Con edad y gb como variables continuas
modelo_mult1<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo_mult1)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + 
##     Blastos + CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      0.0320119  1.0325298  0.1156128  0.277 0.781865    
## Down1      1.2173951  3.3783760  0.2740249  4.443 8.89e-06 ***
## RTA_PRED1 -0.5315738  0.5876794  0.1613593 -3.294 0.000987 ***
## EstirpeT  -0.0750978  0.9276527  0.1709595 -0.439 0.660464    
## Edad       0.0199750  1.0201759  0.0125502  1.592 0.111474    
## TEL1      -0.4480192  0.6388924  0.2133808 -2.100 0.035762 *  
## Blancos    0.0020543  1.0020564  0.0005058  4.061 4.88e-05 ***
## SNC2       0.2882750  1.3341242  0.4183052  0.689 0.490729    
## SNC3       0.3566726  1.4285681  0.2286605  1.560 0.118799    
## Ploidia2  -1.2947669  0.2739617  0.2180549 -5.938 2.89e-09 ***
## Ploidia4   0.9981649  2.7132980  0.1540460  6.480 9.19e-11 ***
## Ploidia5  -2.1108716  0.1211323  1.0107490 -2.088 0.036760 *  
## Blastos   -0.0002697  0.9997303  0.0018073 -0.149 0.881364    
## CAT_ERM2   0.3678790  1.4446673  0.1436321  2.561 0.010429 *  
## CAT_ERM3   0.7351325  2.0857583  0.1759066  4.179 2.93e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0325     0.9685   0.82318    1.2951
## Down1        3.3784     0.2960   1.97450    5.7804
## RTA_PRED1    0.5877     1.7016   0.42834    0.8063
## EstirpeT     0.9277     1.0780   0.66354    1.2969
## Edad         1.0202     0.9802   0.99539    1.0456
## TEL1         0.6389     1.5652   0.42053    0.9706
## Blancos      1.0021     0.9979   1.00106    1.0031
## SNC2         1.3341     0.7496   0.58767    3.0287
## SNC3         1.4286     0.7000   0.91257    2.2363
## Ploidia2     0.2740     3.6501   0.17868    0.4200
## Ploidia4     2.7133     0.3686   2.00620    3.6696
## Ploidia5     0.1211     8.2554   0.01671    0.8782
## Blastos      0.9997     1.0003   0.99620    1.0033
## CAT_ERM2     1.4447     0.6922   1.09020    1.9144
## CAT_ERM3     2.0858     0.4794   1.47752    2.9444
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 406.8  on 15 df,   p=<2e-16
## Wald test            = 338.6  on 15 df,   p=<2e-16
## Score (logrank) test = 458.3  on 15 df,   p=<2e-16

#impresion de HR en tabla (agregar)



library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

#VIF del modelo


library(car)
vif_modelo<- vif(modelo_mult)

## Warning in vif.default(modelo_mult): No intercept: vifs may not be sensible.

print(vif_modelo)

##                 GVIF Df GVIF^(1/(2*Df))
## Sexo        1.037140  1        1.018401
## Down        1.093849  1        1.045872
## RTA_PRED    1.453125  1        1.205456
## Estirpe     1.287930  1        1.134870
## Edad_cat    1.081776  1        1.040085
## TEL         1.056968  1        1.028089
## Blancos_cat 1.779789  1        1.334087
## SNC         1.086985  2        1.021071
## Ploidia     1.026765  3        1.004412
## Blastos     1.726685  1        1.314034
## CAT_ERM     1.349149  2        1.077742

vif_modelo1<- vif(modelo_mult1)

## Warning in vif.default(modelo_mult1): No intercept: vifs may not be sensible.

print(vif_modelo1)

##              GVIF Df GVIF^(1/(2*Df))
## Sexo     1.038088  1        1.018866
## Down     1.066044  1        1.032494
## RTA_PRED 1.488479  1        1.220032
## Estirpe  1.401772  1        1.183965
## Edad     1.075803  1        1.037209
## TEL      1.049241  1        1.024325
## Blancos  1.709111  1        1.307330
## SNC      1.180038  2        1.042255
## Ploidia  1.031431  3        1.005171
## Blastos  1.398638  1        1.182640
## CAT_ERM  1.365224  2        1.080939

#Forest plot para visualizacion de resultados del modelo de COX

#Modelo con edad y globulos blancos como VA categoricas
ggforest(modelo_mult,data=datos)

#Modelo con edad y globulos blancos como VA continuas
ggforest(modelo_mult1,data=datos)

En el analisis del VIF, se observa que los valores son menores a 5, con lo cual no existe problema de colinealidad entre las variables explicativas.

Validacion de los supuestos de COX: riesgo proporcional y residuos

Chequeo de la suposicion de riesgo proporcional

#modelo multivariado sin MO ni MLL (edad y gb como VA categoricas)
coxlla1<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe+ Edad_cat + TEL+ Blancos_cat  + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
cox.zph(coxlla1)

##                chisq df    p
## Sexo         0.00218  1 0.96
## Down         2.13511  1 0.14
## RTA_PRED     0.70844  1 0.40
## Estirpe      2.19226  1 0.14
## Edad_cat     0.04011  1 0.84
## TEL          2.70359  1 0.10
## Blancos_cat  1.91151  1 0.17
## SNC          3.07656  2 0.21
## Ploidia      3.59139  3 0.31
## Blastos      0.58146  1 0.45
## CAT_ERM      3.12794  2 0.21
## GLOBAL      18.47390 15 0.24

#modelo multivariado sin MO ni MLL (edad y gb como VA continuas)
coxlla2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
cox.zph(coxlla2)

##            chisq df     p
## Sexo      0.0376  1 0.846
## Down      1.7193  1 0.190
## RTA_PRED  0.2446  1 0.621
## Estirpe   1.2804  1 0.258
## Edad      0.3118  1 0.577
## TEL       2.9190  1 0.088
## Blancos   2.1840  1 0.139
## SNC       3.5443  2 0.170
## Ploidia   3.4984  3 0.321
## Blastos   0.1216  1 0.727
## CAT_ERM   2.2974  2 0.317
## GLOBAL   22.4574 15 0.096

# Se cumple supuesto de riesgo proporcional, en todos los casos, los pv obtenidos son mayores a 0.05 ( de forma global y por cada variable predictora)
#De esta manera, el modelo supone que el HR para cada variable Xj es el mismo cualquiera sea el tiempo t



#Verificacion del supuesto de riesgo proporcional graficamente:
require(rms)

## Loading required package: rms

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 
## Attaching package: 'rms'

## The following objects are masked from 'package:car':
## 
##     Predict, vif

require(car)

survlla1<- npsurv(Surv(TIEMPOSG,SGSTATUS)~CAT_ERM,data=datos)
survplot(survlla1, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#Sexo
survlla2<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Sexo,data=datos)
survplot(survlla2, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#Down
survlla3<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Down,data=datos)
survplot(survlla3, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#SNC
survlla4<- npsurv(Surv(TIEMPOSG,SGSTATUS)~SNC,data=datos)
survplot(survlla4, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#Ploidia

survlla5<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Ploidia,data=datos)

#ver no da con ploidia


#Rta a la prednisona
survlla6<- npsurv(Surv(TIEMPOSG,SGSTATUS)~RTA_PRED,data=datos)
survplot(survlla6, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#Estirpe
survlla7<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Estirpe,data=datos)
survplot(survlla7, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#TEL
survlla8<- npsurv(Surv(TIEMPOSG,SGSTATUS)~TEL,data=datos)
survplot(survlla8, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

#MLL
survlla9<- npsurv(Surv(TIEMPOSG,SGSTATUS)~MLL,data=datos)
survplot(survlla9, loglog=T, logt=F, xlim = c(0,12),
 xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")

Analisis de residuos

#Residuos
residuos_lla<-cox.zph(coxlla1)
residuos_lla

##                chisq df    p
## Sexo         0.00218  1 0.96
## Down         2.13511  1 0.14
## RTA_PRED     0.70844  1 0.40
## Estirpe      2.19226  1 0.14
## Edad_cat     0.04011  1 0.84
## TEL          2.70359  1 0.10
## Blancos_cat  1.91151  1 0.17
## SNC          3.07656  2 0.21
## Ploidia      3.59139  3 0.31
## Blastos      0.58146  1 0.45
## CAT_ERM      3.12794  2 0.21
## GLOBAL      18.47390 15 0.24

ggcoxzph(residuos_lla,font.main=10)

par(mfrow=c(2,2))
#Se muestran  los residuos escalados de Schoenfeld vs. tiempo para cada variable del modelo. Estos residuos tambien permiten testear la hipotesis de riesgo proporcional.


#Residuos martingala
ggcoxdiagnostics(coxlla1, type = "martingale",
                 linear.predictions = FALSE, ggtheme = theme_bw())

## `geom_smooth()` using formula 'y ~ x'

#Los residuos se  distribuyen de forma aproximadamente simetrica alrededor de cero.

#Residuos dfbeta
ggcoxdiagnostics(coxlla1, type = "dfbeta",
                 linear.predictions = FALSE, ggtheme = theme_bw())

## `geom_smooth()` using formula 'y ~ x'

#Residuos deviance para detctar outliers

mres <- resid(modelo_mult1, type="deviance")

##Riesgo competitivo (revisar)

library(readxl)
ERM <- read_excel("C:/Users/Magali/Desktop/TF CEECS/Base final.xlsx", 
    sheet = "riesgo comp")
head(ERM)

## # A tibble: 6 x 3
##   ftime status   dis
##   <dbl>  <dbl> <dbl>
## 1    37      2     1
## 2    19      2     1
## 3    16      2     1
## 4    31      2     2
## 5    34      2     3
## 6    40      2     2

table(ERM$dis,ERM$status)

##    
##       0   1   2
##   1 577  22  80
##   2 635  37 175
##   3 198  38  80

ERM$dis<-factor(ERM$dis,levels = c(1,2,3),labels = c("<0.1 ","0.1-10",">10"))

#La funcion de incidencia acumulada CIF puede estimar diferentes causas de falla y permite comparaciones entre grupos


library(cmprsk)
CIFdata_lla <- cuminc(ftime = ERM$ftime, fstatus= ERM$status, cencode = 0)
cifdata_lladis <- cuminc(ftime = ERM$ftime, fstatus= ERM$status, group = ERM$dis)
plot(cifdata_lladis,xlab="Tiempo (meses")

#Prueba de Chi Cuadrado
cifdata_lladis$Tests

##       stat           pv df
## 1 38.28877 4.849526e-09  2
## 2 44.96465 1.722071e-10  2

#Grafico de  incidencias acumulada por categoria de ERM 
require(ggplot2)
ggcompetingrisks(cifdata_lladis, palette = "lancet",legend = "top", ggtheme = theme_bw())

Seleccion de modelos

#Modelo multivariado eliminando las variables no significativas (Sexo, Edad, Estirpe,SNC y Blastos)
modelo1<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + TEL+ Blancos+RTA_PRED +Ploidia+CAT_ERM, data=datos)
summary(modelo1)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Down + RTA_PRED + 
##     TEL + Blancos + RTA_PRED + Ploidia + CAT_ERM, data = datos)
## 
##   n= 1492, number of events= 324 
##    (604 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Down1      1.2519694  3.4972237  0.2687036  4.659 3.17e-06 ***
## RTA_PRED1 -0.4994671  0.6068540  0.1539842 -3.244  0.00118 ** 
## TEL1      -0.4934883  0.6104931  0.2095660 -2.355  0.01853 *  
## Blancos    0.0021778  1.0021801  0.0004186  5.202 1.97e-07 ***
## Ploidia2  -1.2760240  0.2791450  0.2160617 -5.906 3.51e-09 ***
## Ploidia4   1.0036052  2.7280996  0.1532123  6.550 5.74e-11 ***
## Ploidia5  -2.1593846  0.1153961  1.0101705 -2.138  0.03255 *  
## CAT_ERM2   0.3744556  1.4541995  0.1420055  2.637  0.00837 ** 
## CAT_ERM3   0.7600290  2.1383383  0.1739915  4.368 1.25e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Down1        3.4972     0.2859   2.06539    5.9217
## RTA_PRED1    0.6069     1.6478   0.44876    0.8206
## TEL1         0.6105     1.6380   0.40485    0.9206
## Blancos      1.0022     0.9978   1.00136    1.0030
## Ploidia2     0.2791     3.5824   0.18278    0.4263
## Ploidia4     2.7281     0.3666   2.02044    3.6836
## Ploidia5     0.1154     8.6658   0.01593    0.8357
## CAT_ERM2     1.4542     0.6877   1.10090    1.9209
## CAT_ERM3     2.1383     0.4677   1.52046    3.0073
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 401.9  on 9 df,   p=<2e-16
## Wald test            = 335.3  on 9 df,   p=<2e-16
## Score (logrank) test = 451.6  on 9 df,   p=<2e-16

#Modelo multivariado eliminando Sexo
modelo2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
summary(modelo2)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Down + RTA_PRED + 
##     Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + 
##     Blastos + CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Down1      1.2113879  3.3581422  0.2731949  4.434 9.24e-06 ***
## RTA_PRED1 -0.5341630  0.5861597  0.1609960 -3.318 0.000907 ***
## EstirpeT  -0.0694643  0.9328934  0.1697857 -0.409 0.682445    
## Edad       0.0200406  1.0202428  0.0125512  1.597 0.110330    
## TEL1      -0.4479239  0.6389533  0.2134239 -2.099 0.035839 *  
## Blancos    0.0020472  1.0020493  0.0005055  4.050 5.12e-05 ***
## SNC2       0.2954843  1.3437769  0.4174822  0.708 0.479084    
## SNC3       0.3557135  1.4271986  0.2285331  1.557 0.119587    
## Ploidia2  -1.2952898  0.2738185  0.2180407 -5.941 2.84e-09 ***
## Ploidia4   0.9987291  2.7148294  0.1540185  6.484 8.90e-11 ***
## Ploidia5  -2.1156908  0.1205500  1.0106026 -2.093 0.036305 *  
## Blastos   -0.0002632  0.9997369  0.0018069 -0.146 0.884205    
## CAT_ERM2   0.3689451  1.4462082  0.1435918  2.569 0.010187 *  
## CAT_ERM3   0.7348562  2.0851821  0.1758722  4.178 2.94e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Down1        3.3581     0.2978   1.96587    5.7364
## RTA_PRED1    0.5862     1.7060   0.42754    0.8036
## EstirpeT     0.9329     1.0719   0.66882    1.3012
## Edad         1.0202     0.9802   0.99545    1.0457
## TEL1         0.6390     1.5651   0.42054    0.9708
## Blancos      1.0020     0.9980   1.00106    1.0030
## SNC2         1.3438     0.7442   0.59288    3.0457
## SNC3         1.4272     0.7007   0.91192    2.2336
## Ploidia2     0.2738     3.6521   0.17859    0.4198
## Ploidia4     2.7148     0.3683   2.00744    3.6715
## Ploidia5     0.1205     8.2953   0.01663    0.8738
## Blastos      0.9997     1.0003   0.99620    1.0033
## CAT_ERM2     1.4462     0.6915   1.09145    1.9163
## CAT_ERM3     2.0852     0.4796   1.47721    2.9434
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 406.7  on 14 df,   p=<2e-16
## Wald test            = 338.6  on 14 df,   p=<2e-16
## Score (logrank) test = 458.3  on 14 df,   p=<2e-16

#Modelo multivariado eliminando Estirpe
modelo3<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo+ Down + RTA_PRED + Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
summary(modelo3)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + Blastos + 
##     CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      0.0258837  1.0262216  0.1147929  0.225  0.82160    
## Down1      1.2256144  3.4062582  0.2733558  4.484 7.34e-06 ***
## RTA_PRED1 -0.5168242  0.5964116  0.1580203 -3.271  0.00107 ** 
## Edad       0.0192324  1.0194185  0.0124568  1.544  0.12261    
## TEL1      -0.4424548  0.6424574  0.2130545 -2.077  0.03783 *  
## Blancos    0.0019790  1.0019810  0.0004776  4.144 3.41e-05 ***
## SNC2       0.2959569  1.3444122  0.4178827  0.708  0.47880    
## SNC3       0.3539496  1.4246833  0.2279209  1.553  0.12044    
## Ploidia2  -1.2979229  0.2730985  0.2179297 -5.956 2.59e-09 ***
## Ploidia4   0.9982177  2.7134413  0.1540682  6.479 9.23e-11 ***
## Ploidia5  -2.1072579  0.1215709  1.0106949 -2.085  0.03707 *  
## Blastos   -0.0002456  0.9997545  0.0018084 -0.136  0.89198    
## CAT_ERM2   0.3692158  1.4465997  0.1436174  2.571  0.01015 *  
## CAT_ERM3   0.7333533  2.0820506  0.1758428  4.171 3.04e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0262     0.9744   0.81946    1.2851
## Down1        3.4063     0.2936   1.99341    5.8205
## RTA_PRED1    0.5964     1.6767   0.43756    0.8129
## Edad         1.0194     0.9810   0.99483    1.0446
## TEL1         0.6425     1.5565   0.42315    0.9754
## Blancos      1.0020     0.9980   1.00104    1.0029
## SNC2         1.3444     0.7438   0.59269    3.0495
## SNC3         1.4247     0.7019   0.91141    2.2270
## Ploidia2     0.2731     3.6617   0.17816    0.4186
## Ploidia4     2.7134     0.3685   2.00621    3.6700
## Ploidia5     0.1216     8.2257   0.01677    0.8813
## Blastos      0.9998     1.0002   0.99622    1.0033
## CAT_ERM2     1.4466     0.6913   1.09169    1.9169
## CAT_ERM3     2.0821     0.4803   1.47507    2.9388
## 
## Concordance= 0.799  (se = 0.011 )
## Likelihood ratio test= 406.6  on 14 df,   p=<2e-16
## Wald test            = 337.9  on 14 df,   p=<2e-16
## Score (logrank) test = 456.9  on 14 df,   p=<2e-16

#Modelo multivariado eliminando Edad
modelo4<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo4)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + TEL + Blancos + SNC + RTA_PRED + Ploidia + Blastos + 
##     CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      3.509e-02  1.036e+00  1.154e-01  0.304  0.76099    
## Down1      1.261e+00  3.529e+00  2.725e-01  4.627 3.71e-06 ***
## RTA_PRED1 -5.068e-01  6.024e-01  1.604e-01 -3.160  0.00158 ** 
## EstirpeT  -3.809e-02  9.626e-01  1.690e-01 -0.225  0.82164    
## TEL1      -4.941e-01  6.101e-01  2.113e-01 -2.338  0.01937 *  
## Blancos    2.007e-03  1.002e+00  5.022e-04  3.997 6.40e-05 ***
## SNC2       3.143e-01  1.369e+00  4.176e-01  0.753  0.45168    
## SNC3       3.449e-01  1.412e+00  2.290e-01  1.506  0.13201    
## Ploidia2  -1.284e+00  2.770e-01  2.180e-01 -5.889 3.88e-09 ***
## Ploidia4   1.015e+00  2.760e+00  1.537e-01  6.606 3.95e-11 ***
## Ploidia5  -2.114e+00  1.208e-01  1.011e+00 -2.091  0.03652 *  
## Blastos   -9.143e-05  9.999e-01  1.803e-03 -0.051  0.95956    
## CAT_ERM2   3.714e-01  1.450e+00  1.436e-01  2.587  0.00969 ** 
## CAT_ERM3   7.587e-01  2.135e+00  1.752e-01  4.331 1.49e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0357     0.9655   0.82613    1.2985
## Down1        3.5290     0.2834   2.06857    6.0204
## RTA_PRED1    0.6024     1.6599   0.43997    0.8249
## EstirpeT     0.9626     1.0388   0.69125    1.3405
## TEL1         0.6101     1.6390   0.40325    0.9232
## Blancos      1.0020     0.9980   1.00102    1.0030
## SNC2         1.3693     0.7303   0.60401    3.1041
## SNC3         1.4119     0.7083   0.90131    2.2116
## Ploidia2     0.2770     3.6103   0.18068    0.4246
## Ploidia4     2.7600     0.3623   2.04220    3.7302
## Ploidia5     0.1208     8.2778   0.01666    0.8759
## Blastos      0.9999     1.0001   0.99638    1.0034
## CAT_ERM2     1.4498     0.6898   1.09415    1.9210
## CAT_ERM3     2.1354     0.4683   1.51483    3.0103
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 404.3  on 14 df,   p=<2e-16
## Wald test            = 336.7  on 14 df,   p=<2e-16
## Score (logrank) test = 456.5  on 14 df,   p=<2e-16

#Modelo multivariado eliminando SNC
modelo4<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos  + Sexo+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo4)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + TEL + Blancos + Sexo + RTA_PRED + Ploidia + Blastos + 
##     CAT_ERM, data = datos)
## 
##   n= 1480, number of events= 322 
##    (616 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      0.0379587  1.0386883  0.1152330  0.329  0.74185    
## Down1      1.2476843  3.4822696  0.2723825  4.581 4.64e-06 ***
## RTA_PRED1 -0.5054968  0.6032058  0.1600703 -3.158  0.00159 ** 
## EstirpeT  -0.0398533  0.9609304  0.1695726 -0.235  0.81419    
## TEL1      -0.4910419  0.6119884  0.2106862 -2.331  0.01977 *  
## Blancos    0.0022424  1.0022449  0.0004896  4.580 4.64e-06 ***
## Ploidia2  -1.2901588  0.2752271  0.2179827 -5.919 3.25e-09 ***
## Ploidia4   1.0060053  2.7346550  0.1533347  6.561 5.35e-11 ***
## Ploidia5  -2.1510035  0.1163673  1.0104440 -2.129  0.03327 *  
## Blastos   -0.0002308  0.9997693  0.0018050 -0.128  0.89827    
## CAT_ERM2   0.3674584  1.4440597  0.1433850  2.563  0.01038 *  
## CAT_ERM3   0.7568939  2.1316448  0.1749745  4.326 1.52e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0387     0.9628   0.82870    1.3019
## Down1        3.4823     0.2872   2.04178    5.9390
## RTA_PRED1    0.6032     1.6578   0.44077    0.8255
## EstirpeT     0.9609     1.0407   0.68921    1.3398
## TEL1         0.6120     1.6340   0.40495    0.9249
## Blancos      1.0022     0.9978   1.00128    1.0032
## Ploidia2     0.2752     3.6334   0.17953    0.4219
## Ploidia4     2.7347     0.3657   2.02481    3.6934
## Ploidia5     0.1164     8.5935   0.01606    0.8432
## Blastos      0.9998     1.0002   0.99624    1.0033
## CAT_ERM2     1.4441     0.6925   1.09027    1.9126
## CAT_ERM3     2.1316     0.4691   1.51278    3.0037
## 
## Concordance= 0.797  (se = 0.011 )
## Likelihood ratio test= 401.8  on 12 df,   p=<2e-16
## Wald test            = 334.1  on 12 df,   p=<2e-16
## Score (logrank) test = 453.1  on 12 df,   p=<2e-16

#Modelo multivariado eliminando Blastos
modelo5<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos  + Sexo+RTA_PRED +Ploidia + SNC+ CAT_ERM, data=datos)
summary(modelo5)

## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED + 
##     Estirpe + TEL + Blancos + Sexo + RTA_PRED + Ploidia + SNC + 
##     CAT_ERM, data = datos)
## 
##   n= 1492, number of events= 324 
##    (604 observations deleted due to missingness)
## 
##                 coef  exp(coef)   se(coef)      z Pr(>|z|)    
## Sexo1      0.0458101  1.0468756  0.1150859  0.398  0.69059    
## Down1      1.2668673  3.5497149  0.2699213  4.693 2.69e-06 ***
## RTA_PRED1 -0.5060011  0.6029017  0.1574880 -3.213  0.00131 ** 
## EstirpeT  -0.0448084  0.9561807  0.1688928 -0.265  0.79077    
## TEL1      -0.4994694  0.6068526  0.2108179 -2.369  0.01783 *  
## Blancos    0.0020071  1.0020092  0.0004663  4.304 1.68e-05 ***
## Ploidia2  -1.2675931  0.2815084  0.2162065 -5.863 4.55e-09 ***
## Ploidia4   1.0124136  2.7522359  0.1536285  6.590 4.40e-11 ***
## Ploidia5  -2.1180045  0.1202714  1.0107652 -2.095  0.03613 *  
## SNC2       0.3111999  1.3650621  0.4164748  0.747  0.45493    
## SNC3       0.3442877  1.4109846  0.2289780  1.504  0.13269    
## CAT_ERM2   0.3770787  1.4580190  0.1423105  2.650  0.00806 ** 
## CAT_ERM3   0.7638754  2.1465790  0.1743111  4.382 1.17e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## Sexo1        1.0469     0.9552   0.83548    1.3118
## Down1        3.5497     0.2817   2.09139    6.0249
## RTA_PRED1    0.6029     1.6586   0.44278    0.8209
## EstirpeT     0.9562     1.0458   0.68672    1.3314
## TEL1         0.6069     1.6478   0.40145    0.9173
## Blancos      1.0020     0.9980   1.00109    1.0029
## Ploidia2     0.2815     3.5523   0.18427    0.4301
## Ploidia4     2.7522     0.3633   2.03665    3.7192
## Ploidia5     0.1203     8.3145   0.01659    0.8720
## SNC2         1.3651     0.7326   0.60346    3.0879
## SNC3         1.4110     0.7087   0.90077    2.2102
## CAT_ERM2     1.4580     0.6859   1.10313    1.9271
## CAT_ERM3     2.1466     0.4659   1.52536    3.0208
## 
## Concordance= 0.798  (se = 0.011 )
## Likelihood ratio test= 404.6  on 13 df,   p=<2e-16
## Wald test            = 338.3  on 13 df,   p=<2e-16
## Score (logrank) test = 455.8  on 13 df,   p=<2e-16

#Seleccion de modelo utilizando AIC
AIC(modelo1,modelo2, modelo3,modelo4,modelo5)

## Warning in AIC.default(modelo1, modelo2, modelo3, modelo4, modelo5): models are
## not all fitted to the same number of observations

##         df      AIC
## modelo1  9 4145.965
## modelo2 14 4120.666
## modelo3 14 4120.784
## modelo4 12 4121.604
## modelo5 13 4151.231

#El modelo 2 es aquel que tiene menor AIC


modelo2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos  + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)

Forest plot del modelo seleccionado

ggforest(modelo2, data = datos)

TF CEECS

Grupo 10

16/10/2022