Integrantes:
Eisik, Magali
Gomez, Sergio
Querci, Marcia
Valenti, Florencia
Resultados del Protocolo ALLIC BFM 2012 para el tratamiento de la Leucemia Linfoblastica Aguda en Niños en Argentina
Objetivo General: Comparar enfermedad residual mínima al día 15 del tratamiento y de acuerdo a los niveles evaluar sobrevida libre de eventos.
library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(survival)
library(DT)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(arsenal)
path_datos<-file.choose()
datos<-read_excel(path_datos)
names(datos)
## [1] "Sexo" "Down" "Blancos" "Blastos" "MO"
## [6] "SNC" "Ploidia" "Estirpe" "RTA_PRED" "CAT_ERM"
## [11] "TiempoEFS" "EFSstatus" "Edad" "MLL" "TIEMPOSG"
## [16] "SGSTATUS" "Edad_cat" "Blancos_cat" "TEL"
head(datos) %>%
kbl() %>%
kable_styling()
Sexo | Down | Blancos | Blastos | MO | SNC | Ploidia | Estirpe | RTA_PRED | CAT_ERM | TiempoEFS | EFSstatus | Edad | MLL | TIEMPOSG | SGSTATUS | Edad_cat | Blancos_cat | TEL |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | 2 | 90 | 1 | 1 | B | 1 | NA | 6 | 0 | 2 | 0 | 81 | 0 | 1 | 1 | 0 |
1 | 0 | 36 | 48 | 50 | 1 | 1 | T | 0 | 3 | 1 | 0 | 14 | 0 | 16 | 1 | 2 | 2 | NA |
0 | 0 | 10 | 95 | 98 | 1 | 2 | B | 0 | 3 | 5 | 0 | 4 | 0 | 67 | 0 | 1 | 1 | 0 |
1 | 0 | 8 | 98 | 98 | 1 | 2 | B | 1 | 1 | 10 | 0 | 3 | 0 | 131 | 0 | 1 | 1 | 0 |
0 | 0 | 12 | 22 | 80 | 1 | 2 | B | 1 | 1 | 1 | 1 | 1 | 0 | 28 | 1 | 1 | 1 | NA |
1 | 0 | 37 | 62 | 95 | 3 | 2 | T | 1 | 1 | 5 | 0 | 15 | 0 | 69 | 0 | 2 | 2 | 0 |
ncol(datos)
## [1] 19
nrow(datos)
## [1] 2096
summary(datos)
## Sexo Down Blancos Blastos
## Min. :0.0000 Min. :0.00000 Min. : 0.00 Length:2096
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.: 4.00 Class :character
## Median :1.0000 Median :0.00000 Median : 9.00 Mode :character
## Mean :0.5592 Mean :0.02099 Mean : 42.17
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.: 33.00
## Max. :1.0000 Max. :1.00000 Max. :1165.00
##
## MO SNC Ploidia Estirpe
## Min. : 10.00 Min. :1.000 Min. :1.000 Length:2096
## 1st Qu.: 84.00 1st Qu.:1.000 1st Qu.:2.000 Class :character
## Median : 90.00 Median :1.000 Median :2.000 Mode :character
## Mean : 86.79 Mean :1.088 Mean :2.689
## 3rd Qu.: 96.00 3rd Qu.:1.000 3rd Qu.:4.000
## Max. :100.00 Max. :3.000 Max. :5.000
##
## RTA_PRED CAT_ERM TiempoEFS EFSstatus
## Min. :0.0000 Min. :1.000 Min. : 0.000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.: 1.000 1st Qu.:0.0000
## Median :1.0000 Median :2.000 Median : 3.000 Median :0.0000
## Mean :0.8895 Mean :1.784 Mean : 3.605 Mean :0.2557
## 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.: 6.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :3.000 Max. :11.000 Max. :1.0000
## NA's :5 NA's :354
## Edad MLL TIEMPOSG SGSTATUS
## Min. : 0.000 Min. :0.0000 Min. : 0.00 Min. :0.0000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.: 23.00 1st Qu.:0.0000
## Median : 5.000 Median :0.0000 Median : 47.00 Median :0.0000
## Mean : 6.548 Mean :0.0115 Mean : 50.81 Mean :0.2233
## 3rd Qu.:10.000 3rd Qu.:0.0000 3rd Qu.: 77.00 3rd Qu.:0.0000
## Max. :24.000 Max. :1.0000 Max. :133.00 Max. :1.0000
## NA's :9
## Edad_cat Blancos_cat TEL
## Min. :1.000 Min. :1.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.000
## Median :1.000 Median :1.000 Median :0.000
## Mean :1.463 Mean :1.342 Mean :0.131
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:0.000
## Max. :2.000 Max. :2.000 Max. :1.000
## NA's :401
## Warning: NAs introducidos por coerción
## tableby Object
##
## Function Call:
## tableby(formula = CAT_ERM ~ Sexo + Down + MLL + RTA_PRED + Estirpe +
## Edad_cat + TEL + Blancos_cat + MO + SNC + MO + Ploidia +
## Blastos, data = datos)
##
## Variable(s):
## CAT_ERM ~ Sexo, Down, MLL, RTA_PRED, Estirpe, Edad_cat, TEL, Blancos_cat, MO, SNC, Ploidia, Blastos
##
##
## | | 1 (N=656) | 2 (N=806) | 3 (N=280) | Total (N=1742) | p value|
## |:------------|:----------------:|:----------------:|:----------------:|:----------------:|-------:|
## |Sexo | | | | | 0.040|
## |- 0 | 317 (48.3%) | 343 (42.6%) | 115 (41.1%) | 775 (44.5%) | |
## |- 1 | 339 (51.7%) | 463 (57.4%) | 165 (58.9%) | 967 (55.5%) | |
## |Down | | | | | 0.530|
## |- 0 | 645 (98.3%) | 794 (98.5%) | 273 (97.5%) | 1712 (98.3%) | |
## |- 1 | 11 (1.7%) | 12 (1.5%) | 7 (2.5%) | 30 (1.7%) | |
## |MLL | | | | | 0.479|
## |- N-Miss | 4 | 4 | 0 | 8 | |
## |- 0 | 642 (98.5%) | 794 (99.0%) | 278 (99.3%) | 1714 (98.8%) | |
## |- 1 | 10 (1.5%) | 8 (1.0%) | 2 (0.7%) | 20 (1.2%) | |
## |RTA_PRED | | | | | < 0.001|
## |- 0 | 22 (3.4%) | 71 (8.8%) | 100 (35.7%) | 193 (11.1%) | |
## |- 1 | 634 (96.6%) | 735 (91.2%) | 180 (64.3%) | 1549 (88.9%) | |
## |Estirpe | | | | | < 0.001|
## |- B | 598 (91.2%) | 730 (90.6%) | 217 (77.5%) | 1545 (88.7%) | |
## |- T | 58 (8.8%) | 76 (9.4%) | 63 (22.5%) | 197 (11.3%) | |
## |Edad_cat | | | | | < 0.001|
## |- 1 | 351 (53.5%) | 454 (56.3%) | 116 (41.4%) | 921 (52.9%) | |
## |- 2 | 305 (46.5%) | 352 (43.7%) | 164 (58.6%) | 821 (47.1%) | |
## |TEL | | | | | 0.062|
## |- N-Miss | 109 | 101 | 40 | 250 | |
## |- 0 | 476 (87.0%) | 596 (84.5%) | 217 (90.4%) | 1289 (86.4%) | |
## |- 1 | 71 (13.0%) | 109 (15.5%) | 23 (9.6%) | 203 (13.6%) | |
## |Blancos_cat | | | | | < 0.001|
## |- 1 | 470 (71.6%) | 542 (67.2%) | 134 (47.9%) | 1146 (65.8%) | |
## |- 2 | 186 (28.4%) | 264 (32.8%) | 146 (52.1%) | 596 (34.2%) | |
## |MO | | | | | 0.004|
## |- Mean (SD) | 85.294 (16.320) | 87.618 (13.769) | 88.114 (15.429) | 86.823 (15.081) | |
## |- Range | 10.000 - 100.000 | 25.000 - 100.000 | 14.000 - 100.000 | 10.000 - 100.000 | |
## |SNC | | | | | 0.254|
## |- 1 | 625 (95.3%) | 769 (95.4%) | 258 (92.1%) | 1652 (94.8%) | |
## |- 2 | 7 (1.1%) | 9 (1.1%) | 4 (1.4%) | 20 (1.1%) | |
## |- 3 | 24 (3.7%) | 28 (3.5%) | 18 (6.4%) | 70 (4.0%) | |
## |Ploidia | | | | | < 0.001|
## |- 1 | 98 (14.9%) | 169 (21.0%) | 53 (18.9%) | 320 (18.4%) | |
## |- 2 | 340 (51.8%) | 313 (38.8%) | 105 (37.5%) | 758 (43.5%) | |
## |- 4 | 197 (30.0%) | 303 (37.6%) | 117 (41.8%) | 617 (35.4%) | |
## |- 5 | 21 (3.2%) | 21 (2.6%) | 5 (1.8%) | 47 (2.7%) | |
## |Blastos | | | | | < 0.001|
## |- N-Miss | 7 | 6 | 0 | 13 | |
## |- Mean (SD) | 41.757 (34.808) | 49.990 (34.967) | 58.454 (36.262) | 48.270 (35.583) | |
## |- Range | 0.000 - 100.000 | 0.000 - 100.000 | 0.000 - 100.000 | 0.000 - 100.000 | |
## tableby Object
##
## Function Call:
## tableby(formula = CAT_ERM ~ Sexo + Down + MLL + RTA_PRED + Estirpe +
## Edad + TEL + Blancos + MO + SNC + MO + Ploidia + Blastos,
## data = datos)
##
## Variable(s):
## CAT_ERM ~ Sexo, Down, MLL, RTA_PRED, Estirpe, Edad, TEL, Blancos, MO, SNC, Ploidia, Blastos
##
##
## | | 1 (N=656) | 2 (N=806) | 3 (N=280) | Total (N=1742) | p value|
## |:------------|:----------------:|:----------------:|:----------------:|:----------------:|-------:|
## |Sexo | | | | | 0.040|
## |- 0 | 317 (48.3%) | 343 (42.6%) | 115 (41.1%) | 775 (44.5%) | |
## |- 1 | 339 (51.7%) | 463 (57.4%) | 165 (58.9%) | 967 (55.5%) | |
## |Down | | | | | 0.530|
## |- 0 | 645 (98.3%) | 794 (98.5%) | 273 (97.5%) | 1712 (98.3%) | |
## |- 1 | 11 (1.7%) | 12 (1.5%) | 7 (2.5%) | 30 (1.7%) | |
## |MLL | | | | | 0.479|
## |- N-Miss | 4 | 4 | 0 | 8 | |
## |- 0 | 642 (98.5%) | 794 (99.0%) | 278 (99.3%) | 1714 (98.8%) | |
## |- 1 | 10 (1.5%) | 8 (1.0%) | 2 (0.7%) | 20 (1.2%) | |
## |RTA_PRED | | | | | < 0.001|
## |- 0 | 22 (3.4%) | 71 (8.8%) | 100 (35.7%) | 193 (11.1%) | |
## |- 1 | 634 (96.6%) | 735 (91.2%) | 180 (64.3%) | 1549 (88.9%) | |
## |Estirpe | | | | | < 0.001|
## |- B | 598 (91.2%) | 730 (90.6%) | 217 (77.5%) | 1545 (88.7%) | |
## |- T | 58 (8.8%) | 76 (9.4%) | 63 (22.5%) | 197 (11.3%) | |
## |Edad | | | | | < 0.001|
## |- Mean (SD) | 6.552 (4.476) | 6.347 (4.495) | 7.821 (4.760) | 6.661 (4.558) | |
## |- Range | 0.000 - 19.000 | 0.000 - 24.000 | 0.000 - 22.000 | 0.000 - 24.000 | |
## |TEL | | | | | 0.062|
## |- N-Miss | 109 | 101 | 40 | 250 | |
## |- 0 | 476 (87.0%) | 596 (84.5%) | 217 (90.4%) | 1289 (86.4%) | |
## |- 1 | 71 (13.0%) | 109 (15.5%) | 23 (9.6%) | 203 (13.6%) | |
## |Blancos | | | | | < 0.001|
## |- Mean (SD) | 27.142 (55.788) | 36.916 (84.408) | 80.611 (141.502) | 40.258 (89.475) | |
## |- Range | 0.000 - 529.000 | 0.000 - 1165.000 | 0.000 - 957.000 | 0.000 - 1165.000 | |
## |MO | | | | | 0.004|
## |- Mean (SD) | 85.294 (16.320) | 87.618 (13.769) | 88.114 (15.429) | 86.823 (15.081) | |
## |- Range | 10.000 - 100.000 | 25.000 - 100.000 | 14.000 - 100.000 | 10.000 - 100.000 | |
## |SNC | | | | | 0.254|
## |- 1 | 625 (95.3%) | 769 (95.4%) | 258 (92.1%) | 1652 (94.8%) | |
## |- 2 | 7 (1.1%) | 9 (1.1%) | 4 (1.4%) | 20 (1.1%) | |
## |- 3 | 24 (3.7%) | 28 (3.5%) | 18 (6.4%) | 70 (4.0%) | |
## |Ploidia | | | | | < 0.001|
## |- 1 | 98 (14.9%) | 169 (21.0%) | 53 (18.9%) | 320 (18.4%) | |
## |- 2 | 340 (51.8%) | 313 (38.8%) | 105 (37.5%) | 758 (43.5%) | |
## |- 4 | 197 (30.0%) | 303 (37.6%) | 117 (41.8%) | 617 (35.4%) | |
## |- 5 | 21 (3.2%) | 21 (2.6%) | 5 (1.8%) | 47 (2.7%) | |
## |Blastos | | | | | < 0.001|
## |- N-Miss | 7 | 6 | 0 | 13 | |
## |- Mean (SD) | 41.757 (34.808) | 49.990 (34.967) | 58.454 (36.262) | 48.270 (35.583) | |
## |- Range | 0.000 - 100.000 | 0.000 - 100.000 | 0.000 - 100.000 | 0.000 - 100.000 | |
##
## Overall
## n 2096
## Edad (median [IQR]) 5.00 [3.00, 10.00]
## MO (median [IQR]) 90.00 [84.00, 96.00]
## Blancos (median [IQR]) 9.00 [4.00, 33.00]
## Blastos (median [IQR]) 50.00 [12.00, 82.50]
##Analisis de supuestos y test
Variables categoricas
tabla1<-table(datos$CAT_ERM, datos$Down)
prop.table(tabla1)
##
## 0 1
## 1 0.370264064 0.006314581
## 2 0.455797933 0.006888634
## 3 0.156716418 0.004018370
plot(tabla1, col = c("red", "blue"), main = "Categorias de ERM vs Down")
chisq.test(tabla1)
## Warning in chisq.test(tabla1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tabla1
## X-squared = 1.2682, df = 2, p-value = 0.5304
#No se cumplen supuestos, se debe realizar el test de Fisher
fisher.test(tabla1)
##
## Fisher's Exact Test for Count Data
##
## data: tabla1
## p-value = 0.479
## alternative hypothesis: two.sided
tabla2<-table(datos$Ploidia, datos$CAT_ERM)
plot(tabla2, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Ploidia")
chisq.test(tabla2)
##
## Pearson's Chi-squared test
##
## data: tabla2
## X-squared = 35.184, df = 6, p-value = 3.97e-06
tabla3<-table(datos$RTA_PRED, datos$CAT_ERM)
plot(tabla3, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Rta predinisona")
chisq.test(tabla3)
##
## Pearson's Chi-squared test
##
## data: tabla3
## X-squared = 216.44, df = 2, p-value < 2.2e-16
tabla4 <- table(datos$Sexo,datos$CAT_ERM)
plot(tabla4, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Sexo")
chisq.test(tabla4)
##
## Pearson's Chi-squared test
##
## data: tabla4
## X-squared = 6.4488, df = 2, p-value = 0.03978
tabla5 <- table(datos$SNC,datos$CAT_ERM)
plot(tabla5, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs SNC")
#no se cumplen supuestos, se realiza el test de Fisher
#marca error, ver (no arroja resultado fisher)
tabla6 <- table(datos$MLL,datos$CAT_ERM)
plot(tabla6, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs MLL")
chisq.test(tabla6)
## Warning in chisq.test(tabla6): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tabla6
## X-squared = 1.4718, df = 2, p-value = 0.4791
#no se cumplen supuestos, se realiza el test de Fisher
fisher.test(tabla6)
##
## Fisher's Exact Test for Count Data
##
## data: tabla6
## p-value = 0.5936
## alternative hypothesis: two.sided
tabla7 <- table(datos$TEL,datos$CAT_ERM)
plot(tabla6, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs TEL")
chisq.test(tabla7)
##
## Pearson's Chi-squared test
##
## data: tabla7
## X-squared = 5.5501, df = 2, p-value = 0.06235
tabla8 <- table(datos$Estirpe,datos$CAT_ERM)
plot(tabla8, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Estirpe")
chisq.test(tabla8)
##
## Pearson's Chi-squared test
##
## data: tabla8
## X-squared = 41.784, df = 2, p-value = 8.448e-10
tabla9 <- table(datos$Edad_cat,datos$CAT_ERM)
plot(tabla9, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Edad")
chisq.test(tabla9)
##
## Pearson's Chi-squared test
##
## data: tabla9
## X-squared = 18.683, df = 2, p-value = 8.769e-05
tabla10 <- table(datos$Blancos_cat,datos$CAT_ERM)
plot(tabla10, col=c("red", "blue", "yellow"), main = "Categorias de ERM vs Globulos Blancos")
chisq.test(tabla10)
##
## Pearson's Chi-squared test
##
## data: tabla10
## X-squared = 50.76, df = 2, p-value = 9.495e-12
Variables continuas
#Normalidad
#Edad
hist(datos$Edad)
shapiro.test(datos$Edad)
##
## Shapiro-Wilk normality test
##
## data: datos$Edad
## W = 0.90726, p-value < 2.2e-16
#analisis de residuos
modedad=lm(Edad~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos <- resid(modedad)
r_estandarizados <- rstandard(modedad)
predichos <- fitted(modedad)
data_residuos <- as.data.frame(residuos, r_estandarizados, predichos)
## Warning in as.data.frame.numeric(residuos, r_estandarizados, predichos):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!
## Warning in if (!optional) names(value) <- nm: la condición tiene longitud > 1 y
## sólo el primer elemento será usado
#Shapiro de los residuos
shapiro.test(residuos)
##
## Shapiro-Wilk normality test
##
## data: residuos
## W = 0.91802, p-value < 2.2e-16
qqnorm(residuos)
qqline(residuos)
#no da normal
#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos)+
geom_histogram(
mapping = aes(x=residuos)
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(residuos)
qqline(residuos)
ggplot(data_residuos)+
geom_point(
mapping = aes(x=predichos, y=r_estandarizados))
#Blastos
hist(datos$Blastos)
shapiro.test(datos$Blastos)
##
## Shapiro-Wilk normality test
##
## data: datos$Blastos
## W = 0.89253, p-value < 2.2e-16
#analisis de residuos
modblast=lm(Blastos~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos1 <- resid(modblast)
r_estandarizados1 <- rstandard(modblast)
predichos1 <- fitted(modblast)
data_residuos1 <- as.data.frame(residuos, r_estandarizados1, predichos1)
## Warning in as.data.frame.numeric(residuos, r_estandarizados1, predichos1):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!
## Warning in as.data.frame.numeric(residuos, r_estandarizados1, predichos1): la
## condición tiene longitud > 1 y sólo el primer elemento será usado
#Shapiro de los residuos
shapiro.test(residuos1)
##
## Shapiro-Wilk normality test
##
## data: residuos1
## W = 0.91901, p-value < 2.2e-16
#no da normal
qqnorm(residuos1)
qqline(residuos1)
#Blancos
hist(datos$Blancos)
shapiro.test(datos$Blancos)
##
## Shapiro-Wilk normality test
##
## data: datos$Blancos
## W = 0.44578, p-value < 2.2e-16
#analisis de residuos
modblanc=lm(Blancos~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos2 <- resid(modblanc)
r_estandarizados2 <- rstandard(modblanc)
predichos2<- fitted(modblanc)
data_residuos2 <- as.data.frame(residuos2, r_estandarizados2, predichos2)
## Warning in as.data.frame.numeric(residuos2, r_estandarizados2, predichos2):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!
## Warning in as.data.frame.numeric(residuos2, r_estandarizados2, predichos2): la
## condición tiene longitud > 1 y sólo el primer elemento será usado
#Shapiro de los residuos
shapiro.test(residuos2)
##
## Shapiro-Wilk normality test
##
## data: residuos2
## W = 0.52884, p-value < 2.2e-16
qqnorm(residuos2)
qqline(residuos2)
#no da normal
#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos2)+
geom_histogram(
mapping = aes(x=residuos2)
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(residuos2)
qqline(residuos2)
ggplot(data_residuos2)+
geom_point(
mapping = aes(x=predichos2, y=r_estandarizados2))
#MO
hist(datos$MO)
shapiro.test(datos$MO)
##
## Shapiro-Wilk normality test
##
## data: datos$MO
## W = 0.74845, p-value < 2.2e-16
#analisis de residuos
modMO=lm(MO~CAT_ERM, data = datos)
#Creo una tabla para evaluar los residuos, los residuos standarizados y los predichos
residuos3 <- resid(modMO)
r_estandarizados3 <- rstandard(modMO)
predichos3<- fitted(modMO)
data_residuos3 <- as.data.frame(residuos3, r_estandarizados3, predichos3)
## Warning in as.data.frame.numeric(residuos3, r_estandarizados3, predichos3):
## 'row.names' is not a character vector of length 1742 -- omitting it. Will be an
## error!
## Warning in as.data.frame.numeric(residuos3, r_estandarizados3, predichos3): la
## condición tiene longitud > 1 y sólo el primer elemento será usado
#Shapiro de los residuos
shapiro.test(residuos3)
##
## Shapiro-Wilk normality test
##
## data: residuos3
## W = 0.75815, p-value < 2.2e-16
#no da normal
qqnorm(residuos3)
qqline(residuos3)
#Evalúo la normalidad de los residuos graficamente
ggplot(data_residuos3)+
geom_histogram(
mapping = aes(x=residuos3)
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm(residuos3)
qqline(residuos3)
ggplot(data_residuos3)+
geom_point(
mapping = aes(x=predichos3, y=r_estandarizados3))
#Evaluacion de homocedasticidad
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
#EDAD
leveneTest(Edad ~ CAT_ERM, data = datos)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 4.1252 0.01632 *
## 1739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pv< 0.05, hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas
#Test
#Blastos
leveneTest(Blastos ~ CAT_ERM, data = datos)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.0868 0.9168
## 1726
#pv>0.05 no hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas
#Test
#Blancos
leveneTest(Blancos ~ CAT_ERM, data = datos)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 34.944 1.321e-15 ***
## 1739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pv<0.05 hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas
#Test
#MO
leveneTest(MO ~ CAT_ERM, data = datos)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 3.4411 0.03225 *
## 1739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pv<0.05 hay evidencias de incumplimiento del supuesto de homogeneidad de varianzas
#Test
require(dplyr)
library(car)
library(lattice)
library(mice)
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(rlist)
library(naniar)
library(glmmTMB)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.1
## Current Matrix version is 1.4.0
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
require(ggplot2)
library(visdat)
library(naniar)
library(rpart)
library(rpart.plot)
require(arsenal)
library(visdat)
vis_miss(datos, sort=TRUE)
vis_miss(datos, cluster = TRUE)
vis_dat(datos)
colSums(is.na(datos))
## Sexo Down Blancos Blastos MO SNC
## 0 0 0 13 0 0
## Ploidia Estirpe RTA_PRED CAT_ERM TiempoEFS EFSstatus
## 0 0 5 354 0 0
## Edad MLL TIEMPOSG SGSTATUS Edad_cat Blancos_cat
## 0 9 0 0 0 0
## TEL
## 401
n_miss(datos)
## [1] 782
n_complete(datos)
## [1] 39042
prop_miss(datos)
## [1] 0.0196364
pct_miss(datos)
## [1] 1.96364
prop_complete(datos)
## [1] 0.9803636
pct_complete(datos)
## [1] 98.03636
#Categorias de enfermedad residual, VA categorica
prop_miss(datos$CAT_ERM)
## [1] 0.1688931
pct_complete(datos$CAT_ERM)
## [1] 83.11069
#TEL, VA categorica
prop_miss(datos$TEL)
## [1] 0.1913168
pct_complete(datos$TEL)
## [1] 80.86832
#MLL, VA categorica
prop_miss(datos$MLL)
## [1] 0.004293893
pct_complete(datos$MLL)
## [1] 99.57061
#Rta a prednisona, VA categorica
prop_miss(datos$RTA_PRED)
## [1] 0.002385496
pct_complete(datos$RTA_PRED)
## [1] 99.76145
#Blastos, VA cuantitativa
prop_miss(datos$Blastos)
## [1] 0.00620229
pct_complete(datos$Blastos)
## [1] 99.37977
datos %>% miss_var_summary()
## # A tibble: 19 x 3
## variable n_miss pct_miss
## <chr> <int> <dbl>
## 1 TEL 401 19.1
## 2 CAT_ERM 354 16.9
## 3 Blastos 13 0.620
## 4 MLL 9 0.429
## 5 RTA_PRED 5 0.239
## 6 Sexo 0 0
## 7 Down 0 0
## 8 Blancos 0 0
## 9 MO 0 0
## 10 SNC 0 0
## 11 Ploidia 0 0
## 12 Estirpe 0 0
## 13 TiempoEFS 0 0
## 14 EFSstatus 0 0
## 15 Edad 0 0
## 16 TIEMPOSG 0 0
## 17 SGSTATUS 0 0
## 18 Edad_cat 0 0
## 19 Blancos_cat 0 0
datos %>% miss_case_summary()
## # A tibble: 2,096 x 3
## case n_miss pct_miss
## <int> <int> <dbl>
## 1 498 3 15.8
## 2 555 3 15.8
## 3 1350 3 15.8
## 4 9 2 10.5
## 5 313 2 10.5
## 6 376 2 10.5
## 7 408 2 10.5
## 8 411 2 10.5
## 9 412 2 10.5
## 10 419 2 10.5
## # ... with 2,086 more rows
vis_miss(datos)
vis_miss(datos, sort_miss = T, cluster = T)
gg_miss_var(datos)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_miss_var(datos, facet = CAT_ERM)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_miss_case(datos)
gg_miss_case(datos, facet = CAT_ERM)
#con tel
gg_miss_var(datos, facet = TEL)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_miss_case(datos, facet = TEL)
#con MLL
gg_miss_var(datos, facet = MLL)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_miss_case(datos, facet = MLL)
#con rta a prednisona
gg_miss_var(datos, facet = RTA_PRED)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_miss_case(datos, facet = RTA_PRED)
gg_miss_upset(datos)
gg_miss_upset(datos, nset=20, nintersects=NA)
md.pattern(datos, rotate.names = T)
## Sexo Down Blancos MO SNC Ploidia Estirpe TiempoEFS EFSstatus Edad TIEMPOSG
## 1472 1 1 1 1 1 1 1 1 1 1 1
## 249 1 1 1 1 1 1 1 1 1 1 1
## 200 1 1 1 1 1 1 1 1 1 1 1
## 148 1 1 1 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0 0
## SGSTATUS Edad_cat Blancos_cat RTA_PRED MLL Blastos CAT_ERM TEL
## 1472 1 1 1 1 1 1 1 1 0
## 249 1 1 1 1 1 1 1 0 1
## 200 1 1 1 1 1 1 0 1 1
## 148 1 1 1 1 1 1 0 0 2
## 12 1 1 1 1 1 0 1 1 1
## 1 1 1 1 1 1 0 1 0 2
## 8 1 1 1 1 0 1 1 1 1
## 1 1 1 1 1 0 1 0 0 3
## 3 1 1 1 0 1 1 0 1 2
## 2 1 1 1 0 1 1 0 0 3
## 0 0 0 5 9 13 354 401 782
aggr(datos, col=c('navyblue','red'),numbers=TRUE, sortVars=TRUE, labels=names(datos), cex.axis=.7, gap=3, ylab=c("Histograma de missing data","Patron"))
## Warning in plot.aggr(res, ...): not enough horizontal space to display
## frequencies
##
## Variables sorted by number of missings:
## Variable Count
## TEL 0.191316794
## CAT_ERM 0.168893130
## Blastos 0.006202290
## MLL 0.004293893
## RTA_PRED 0.002385496
## Sexo 0.000000000
## Down 0.000000000
## Blancos 0.000000000
## MO 0.000000000
## SNC 0.000000000
## Ploidia 0.000000000
## Estirpe 0.000000000
## TiempoEFS 0.000000000
## EFSstatus 0.000000000
## Edad 0.000000000
## TIEMPOSG 0.000000000
## SGSTATUS 0.000000000
## Edad_cat 0.000000000
## Blancos_cat 0.000000000
aggr(datos, col=c('navyblue','red'),varheight=T, numbers=TRUE, sortVars=TRUE, labels=names(datos), cex.axis=.7, gap=3, ylab=c("Histograma de missing data","Patron"))
##
## Variables sorted by number of missings:
## Variable Count
## TEL 0.191316794
## CAT_ERM 0.168893130
## Blastos 0.006202290
## MLL 0.004293893
## RTA_PRED 0.002385496
## Sexo 0.000000000
## Down 0.000000000
## Blancos 0.000000000
## MO 0.000000000
## SNC 0.000000000
## Ploidia 0.000000000
## Estirpe 0.000000000
## TiempoEFS 0.000000000
## EFSstatus 0.000000000
## Edad 0.000000000
## TIEMPOSG 0.000000000
## SGSTATUS 0.000000000
## Edad_cat 0.000000000
## Blancos_cat 0.000000000
as_shadow(datos)
## # A tibble: 2,096 x 19
## Sexo_NA Down_NA Blancos_NA Blastos_NA MO_NA SNC_NA Ploidia_NA Estirpe_NA
## <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
## 1 !NA !NA !NA !NA !NA !NA !NA !NA
## 2 !NA !NA !NA !NA !NA !NA !NA !NA
## 3 !NA !NA !NA !NA !NA !NA !NA !NA
## 4 !NA !NA !NA !NA !NA !NA !NA !NA
## 5 !NA !NA !NA !NA !NA !NA !NA !NA
## 6 !NA !NA !NA !NA !NA !NA !NA !NA
## 7 !NA !NA !NA !NA !NA !NA !NA !NA
## 8 !NA !NA !NA !NA !NA !NA !NA !NA
## 9 !NA !NA !NA !NA !NA !NA !NA !NA
## 10 !NA !NA !NA !NA !NA !NA !NA !NA
## # ... with 2,086 more rows, and 11 more variables: RTA_PRED_NA <fct>,
## # CAT_ERM_NA <fct>, TiempoEFS_NA <fct>, EFSstatus_NA <fct>, Edad_NA <fct>,
## # MLL_NA <fct>, TIEMPOSG_NA <fct>, SGSTATUS_NA <fct>, Edad_cat_NA <fct>,
## # Blancos_cat_NA <fct>, TEL_NA <fct>
glimpse(nabular(datos))
## Rows: 2,096
## Columns: 38
## $ Sexo <fct> 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1~
## $ Down <fct> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ Blancos <dbl> 3, 36, 10, 8, 12, 37, 203, 3, 1, 3, 13, 30, 52, 130, 6,~
## $ Blastos <dbl> 2, 48, 95, 98, 22, 62, 93, 21, 0, 8, 27, 90, 84, 100, 0~
## $ MO <dbl> 90, 50, 98, 98, 80, 95, 80, 85, 87, 54, 95, 92, 99, 100~
## $ SNC <fct> 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 3, 1~
## $ Ploidia <fct> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ Estirpe <fct> B, T, B, B, B, T, T, B, B, B, T, T, T, T, T, T, T, T, T~
## $ RTA_PRED <fct> 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ CAT_ERM <fct> NA, 3, 3, 1, 1, 1, 3, 1, NA, NA, NA, 1, 2, 2, 2, NA, 1,~
## $ TiempoEFS <dbl> 6, 1, 5, 10, 1, 5, 0, 7, 5, 2, 5, 1, 5, 1, 3, 2, 5, 5, ~
## $ EFSstatus <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0~
## $ Edad <dbl> 2, 14, 4, 3, 1, 15, 13, 6, 8, 12, 5, 10, 3, 6, 14, 10, ~
## $ MLL <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ TIEMPOSG <dbl> 81, 16, 67, 131, 28, 69, 37, 86, 67, 36, 61, 23, 62, 26~
## $ SGSTATUS <dbl> 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0~
## $ Edad_cat <fct> 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2~
## $ Blancos_cat <fct> 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2~
## $ TEL <fct> 0, NA, 0, 0, NA, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ Sexo_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Down_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blancos_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blastos_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ MO_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ SNC_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Ploidia_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Estirpe_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ RTA_PRED_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ CAT_ERM_NA <fct> NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, NA, NA, NA, !NA,~
## $ TiempoEFS_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ EFSstatus_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Edad_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ MLL_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ TIEMPOSG_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ SGSTATUS_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Edad_cat_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ Blancos_cat_NA <fct> !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, !NA, ~
## $ TEL_NA <fct> !NA, NA, !NA, !NA, NA, !NA, !NA, NA, NA, !NA, !NA, !NA,~
datos_prueba <- na.omit(datos)
n_inicial <- nrow(datos)
n_final <- nrow(datos_prueba)
(n_inicial- n_final)/n_inicial
## [1] 0.2977099
datos_imputados <- mice(datos, m = 10, defaultMethod = c("pmm","logreg", "polyreg", "polr"), maxit = 5,printFlag = F)
densityplot(datos_imputados)
modelo1 <- glm.mids(CAT_ERM ~ Sexo + Down + MLL+ RTA_PRED + Estirpe+ Edad + TEL+ Blancos + MO + SNC+RTA_PRED +Ploidia + Blastos,data = datos_imputados,family = binomial)
## Warning: Use with(imp, glm(yourmodel).
summary(pool(modelo1))
## term estimate std.error statistic df p.value
## 1 (Intercept) 1.197895790 0.4177689415 2.8673644 156.24620 4.710736e-03
## 2 Sexo1 0.185334249 0.0996360491 1.8601124 617.41333 6.334494e-02
## 3 Down1 0.139621549 0.3882097535 0.3596549 108.46106 7.198045e-01
## 4 MLL1 -0.385890895 0.4687881584 -0.8231669 239.54154 4.112318e-01
## 5 RTA_PRED1 -1.436631863 0.2325295630 -6.1782762 939.31168 9.641683e-10
## 6 EstirpeT -0.095501471 0.2072864817 -0.4607221 150.98616 6.456609e-01
## 7 Edad 0.002973366 0.0111912290 0.2656872 1023.39162 7.905336e-01
## 8 TEL1 0.175930225 0.1791213461 0.9821846 54.16466 3.303759e-01
## 9 Blancos 0.001432464 0.0008535576 1.6782276 444.33984 9.400580e-02
## 10 MO 0.006390716 0.0037530366 1.7028121 111.04476 9.140079e-02
## 11 SNC2 -0.114550480 0.4867394317 -0.2353425 680.26327 8.140137e-01
## 12 SNC3 -0.117906986 0.2882139377 -0.4090954 199.34170 6.829089e-01
## 13 Ploidia2 -0.543295758 0.1456493217 -3.7301633 767.42763 2.054558e-04
## 14 Ploidia4 -0.057316331 0.1498944596 -0.3823779 1148.61329 7.022518e-01
## 15 Ploidia5 -0.503153107 0.3086564950 -1.6301394 125.32086 1.055829e-01
## 16 Blastos 0.003945405 0.0017136440 2.3023482 210.30142 2.229394e-02
#hacer con edad y gb como variables categoricas
modelo1 <- glm.mids(CAT_ERM~ Sexo + Down + MLL+ RTA_PRED + Estirpe + TEL + MO + SNC + RTA_PRED +Ploidia,data = datos_imputados,family = binomial)
## Warning: Use with(imp, glm(yourmodel).
summary(pool(modelo1))
## term estimate std.error statistic df p.value
## 1 (Intercept) 1.26542243 0.413598229 3.05954510 152.56876 2.619337e-03
## 2 Sexo1 0.19371303 0.099361233 1.94958360 595.85322 5.169449e-02
## 3 Down1 0.17597528 0.385159338 0.45688956 110.31643 6.486488e-01
## 4 MLL1 -0.37717026 0.467859102 -0.80616207 236.23818 4.209602e-01
## 5 RTA_PRED1 -1.58591661 0.230543358 -6.87903840 727.89407 1.299494e-11
## 6 EstirpeT 0.10797762 0.190115141 0.56795910 132.51618 5.710238e-01
## 7 TEL1 0.16128011 0.178234908 0.90487386 52.13944 3.696946e-01
## 8 MO 0.00997229 0.003789159 2.63179532 67.36580 1.052036e-02
## 9 SNC2 -0.08504106 0.482473793 -0.17626047 678.06613 8.601419e-01
## 10 SNC3 -0.02567434 0.282963749 -0.09073367 204.44845 9.277930e-01
## 11 Ploidia2 -0.57149917 0.145818861 -3.91924043 669.10875 9.796815e-05
## 12 Ploidia4 -0.06370565 0.148679529 -0.42847626 1262.71163 6.683775e-01
## 13 Ploidia5 -0.50477263 0.307191176 -1.64318728 127.18784 1.028141e-01
fit <- with(datos_imputados, glm(CAT_ERM~ Sexo + Down + MLL+ RTA_PRED + Estirpe + TEL + MO + SNC + RTA_PRED +Ploidia,family = binomial))
print(pool(fit))
## Class: mipo m = 10
## term m estimate ubar b t dfcom
## 1 (Intercept) 10 1.26542243 1.315461e-01 3.592493e-02 1.710635e-01 2083
## 2 Sexo1 10 0.19371303 8.871114e-03 9.104917e-04 9.872655e-03 2083
## 3 Down1 10 0.17597528 1.075539e-01 3.708529e-02 1.483477e-01 2083
## 4 MLL1 10 -0.37717026 1.792397e-01 3.604768e-02 2.188921e-01 2083
## 5 RTA_PRED1 10 -1.58591661 4.850885e-02 4.219442e-03 5.315024e-02 2083
## 6 EstirpeT 10 0.10797762 2.713279e-02 8.191795e-03 3.614377e-02 2083
## 7 TEL1 10 0.16128011 1.885086e-02 1.174257e-02 3.176768e-02 2083
## 8 MO 10 0.00997229 9.243445e-06 4.649343e-06 1.435772e-05 2083
## 9 SNC2 10 -0.08504106 2.113086e-01 1.952033e-02 2.327810e-01 2083
## 10 SNC3 10 -0.02567434 6.432979e-02 1.430790e-02 8.006848e-02 2083
## 11 Ploidia2 10 -0.57149917 1.928195e-02 1.801078e-03 2.126314e-02 2083
## 12 Ploidia4 10 -0.06370565 2.098460e-02 1.019096e-03 2.210560e-02 2083
## 13 Ploidia5 10 -0.50477263 7.031550e-02 2.186447e-02 9.436642e-02 2083
## df riv lambda fmi
## 1 152.56876 0.30040749 0.23101027 0.24089644
## 2 595.85322 0.11289912 0.10144596 0.10444688
## 3 110.31643 0.37928717 0.27498782 0.28778406
## 4 236.23818 0.22122579 0.18115060 0.18799608
## 5 727.89407 0.09568122 0.08732579 0.08982320
## 6 132.51618 0.33210643 0.24930923 0.26038821
## 7 52.13944 0.68521169 0.40660274 0.42812626
## 8 67.36580 0.55328690 0.35620393 0.37450248
## 9 678.06613 0.10161614 0.09224278 0.09490848
## 10 204.44845 0.24465631 0.19656536 0.20431123
## 11 669.10875 0.10274821 0.09317468 0.09587313
## 12 1262.71163 0.05342042 0.05071140 0.05221140
## 13 127.18784 0.34204292 0.25486735 0.26631439
pool(fit)
## Class: mipo m = 10
## term m estimate ubar b t dfcom
## 1 (Intercept) 10 1.26542243 1.315461e-01 3.592493e-02 1.710635e-01 2083
## 2 Sexo1 10 0.19371303 8.871114e-03 9.104917e-04 9.872655e-03 2083
## 3 Down1 10 0.17597528 1.075539e-01 3.708529e-02 1.483477e-01 2083
## 4 MLL1 10 -0.37717026 1.792397e-01 3.604768e-02 2.188921e-01 2083
## 5 RTA_PRED1 10 -1.58591661 4.850885e-02 4.219442e-03 5.315024e-02 2083
## 6 EstirpeT 10 0.10797762 2.713279e-02 8.191795e-03 3.614377e-02 2083
## 7 TEL1 10 0.16128011 1.885086e-02 1.174257e-02 3.176768e-02 2083
## 8 MO 10 0.00997229 9.243445e-06 4.649343e-06 1.435772e-05 2083
## 9 SNC2 10 -0.08504106 2.113086e-01 1.952033e-02 2.327810e-01 2083
## 10 SNC3 10 -0.02567434 6.432979e-02 1.430790e-02 8.006848e-02 2083
## 11 Ploidia2 10 -0.57149917 1.928195e-02 1.801078e-03 2.126314e-02 2083
## 12 Ploidia4 10 -0.06370565 2.098460e-02 1.019096e-03 2.210560e-02 2083
## 13 Ploidia5 10 -0.50477263 7.031550e-02 2.186447e-02 9.436642e-02 2083
## df riv lambda fmi
## 1 152.56876 0.30040749 0.23101027 0.24089644
## 2 595.85322 0.11289912 0.10144596 0.10444688
## 3 110.31643 0.37928717 0.27498782 0.28778406
## 4 236.23818 0.22122579 0.18115060 0.18799608
## 5 727.89407 0.09568122 0.08732579 0.08982320
## 6 132.51618 0.33210643 0.24930923 0.26038821
## 7 52.13944 0.68521169 0.40660274 0.42812626
## 8 67.36580 0.55328690 0.35620393 0.37450248
## 9 678.06613 0.10161614 0.09224278 0.09490848
## 10 204.44845 0.24465631 0.19656536 0.20431123
## 11 669.10875 0.10274821 0.09317468 0.09587313
## 12 1262.71163 0.05342042 0.05071140 0.05221140
## 13 127.18784 0.34204292 0.25486735 0.26631439
#ver no da imputacion con cox, ni paquete dharma
library(DT)
library(networkD3)
##
## Attaching package: 'networkD3'
## The following object is masked from 'package:DT':
##
## JS
require(dplyr)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:networkD3':
##
## JS
library(js)
#Categorias de enfermedad residual
barplot(table(datos$CAT_ERM),
main="Categorias de Enfermedad Residual",
xlab="ERM",
ylab="Frecuencia",col=c("red","green","blue"))
#algun barplot
#Edad
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Edad))+
geom_boxplot(mapping = aes(fill=CAT_ERM))+
geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Edad") + ggtitle("Edad vs ERM")
ggplot(data = datos, aes(x=Edad))+
geom_density(color = 5,
lwd = 1,
linetype = 1)+ xlab("Edad")+ylab("Densidad")
#Blancos
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Blancos))+
geom_boxplot(mapping = aes(fill=CAT_ERM))+
geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Globulos Blancos")+ ggtitle("Edad vs ERM")
ggplot(data = datos, aes(x=Blancos))+
geom_density(color = 5,
lwd = 1,
linetype = 1)+ xlab("Blancos")+ylab("Densidad")
#Blastos
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=Blastos))+
geom_boxplot(mapping = aes(fill=CAT_ERM))+
geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Blastos en sangre periferica")
## Warning: Removed 13 rows containing non-finite values (stat_boxplot).
## Warning: Removed 13 rows containing missing values (geom_point).
ggplot(data = datos, aes(x=Blastos))+
geom_density(color = 5,
lwd = 1,
linetype = 1)+ xlab("Blastos en sangre periferica")+ylab("Densidad")
## Warning: Removed 13 rows containing non-finite values (stat_density).
#MO
ggplot(data = datos, mapping = aes(x=CAT_ERM, y=MO))+
geom_boxplot(mapping = aes(fill=CAT_ERM))+
geom_jitter(size=2, position = position_jitter(width = 0.05))+xlab("Categorias de ERM")+ylab("Blastos en medula osea")
ggplot(data = datos, aes(x=MO))+
geom_density(color = 5,
lwd = 1,
linetype = 1)+ xlab("Blastos en Medula Osea")+ylab("Densidad")
#Down
ggplot(datos, aes(Down, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Down")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por Down") +
theme_bw(base_size = 14)
datos$evolucion<- factor(datos$SGSTATUS, levels = c(0,1),labels = c("Vivo","Fallecido"))
#Sankey plot
datos$down<-factor(datos$Down, levels = c(0,1),labels = c("NO DOWN","DOWN"))
data1<- datos%>%dplyr::select(down, evolucion)
hchart(data_to_sankey(data1), "sankey", name = "Sobrevida segun sindrome de Down")
#Sexo
ggplot(datos, aes(Sexo, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Sexo")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por Sexo") +
theme_bw(base_size = 14)
#Sankey plot
datos$sexo<-factor(datos$Sexo, levels = c(1,0),labels = c("Masculino","Femenino"))
data<- datos%>%dplyr::select(sexo, evolucion)
hchart(data_to_sankey(data), "sankey", name = "Sobrevida segun sexo")
#SNC
ggplot(datos, aes(SNC, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("SNC")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por SNC") +
theme_bw(base_size = 14)
#Estirpe
ggplot(datos, aes(Estirpe, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Estirpe")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por Estirpe") +
theme_bw(base_size = 14)
#Sankey plot
data2<- datos%>%dplyr::select(Estirpe, evolucion)
hchart(data_to_sankey(data2), "sankey", name = "Sobrevida segun estirpe de leucemia")
#Respuesta a la prednisona
ggplot(datos, aes(RTA_PRED, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Rta Prednisona")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por Rta a la Prednisona") +
theme_bw(base_size = 14)
#Sankey plot
data3<- datos%>%dplyr::select(RTA_PRED, evolucion)
hchart(data_to_sankey(data3), "sankey", name = "Sobrevida segun respuesta a prednisona")
#TEL
ggplot(datos, aes(TEL, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("TEL")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por TEL") +
theme_bw(base_size = 14)
#Sankey plot
data4<- datos%>%dplyr::select(TEL, evolucion)
hchart(data_to_sankey(data4), "sankey", name = "Sobrevida segun TEL")
#MLL
ggplot(datos, aes(MLL, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("MLL")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por MLL") +
theme_bw(base_size = 14)
#Sankey plot
data5<- datos%>%dplyr::select(MLL, evolucion)
hchart(data_to_sankey(data5), "sankey", name = "Sobrevida segun MLL")
#Ploidia
ggplot(datos, aes(Ploidia, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Ploidia")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por Ploidia") +
theme_bw(base_size = 14)
#Edad como variable categorica
ggplot(datos, aes(Edad_cat, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Edad")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por edad") +
theme_bw(base_size = 14)
#Sankey plot
data6<- datos%>%dplyr::select(Edad_cat, evolucion)
hchart(data_to_sankey(data6), "sankey", name = "Sobrevida segun edad")
#Recuento de globulos blancos como variable categorica
ggplot(datos, aes(Blancos_cat, fill=CAT_ERM)) +
geom_bar(position="dodge",colour="black") +
xlab("Globulos Blancos")+ ylab("Frecuencia")+
ggtitle("Distribución de ERM por globulos blancos") +
theme_bw(base_size = 14)
#Sankey plot
data7<- datos%>%dplyr::select(Blancos_cat, evolucion)
hchart(data_to_sankey(data7), "sankey", name = "Sobrevida segun globulos blancos")
sankeyNetworkOutput("TF-ceecs.html", width = "500px", height = "1000px")
#revisar sankey plot (en tel y mll que tienen NA)
#Swimmer plot
library(SwimmeR)
library(swimplot)
Observaciones de los graficos:
Se puede observar que la mayor cantidad de pacientes se encuentran en la categoria 2 (ERM intermedio).
Random forest para seleccion de variables
require(ggplot2)
require(dplyr)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
rf<-randomForest(TIEMPOSG~CAT_ERM + Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC + RTA_PRED +Ploidia + Blastos,data=datos, importance = TRUE, na.action = na.roughfix)
str(rf)
## List of 18
## $ call : language randomForest(formula = TIEMPOSG ~ CAT_ERM + Sexo + Down + RTA_PRED + Estirpe + Edad + TEL + Blancos + SNC + | __truncated__ ...
## $ type : chr "regression"
## $ predicted : Named num [1:2096] 58.1 25.9 45.5 61.1 63.8 ...
## ..- attr(*, "names")= chr [1:2096] "1" "2" "3" "4" ...
## $ mse : num [1:500] 1481 1424 1415 1371 1331 ...
## $ rsq : num [1:500] -0.311 -0.261 -0.253 -0.214 -0.178 ...
## $ oob.times : int [1:2096] 172 163 181 176 169 184 177 177 199 186 ...
## $ importance : num [1:11, 1:2] 18.49 4.72 -3.17 38.32 6.79 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
## .. ..$ : chr [1:2] "%IncMSE" "IncNodePurity"
## $ importanceSD : Named num [1:11] 2.363 1.865 0.625 1.806 1.298 ...
## ..- attr(*, "names")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
## $ localImportance: NULL
## $ proximity : NULL
## $ ntree : num 500
## $ mtry : num 3
## $ forest :List of 11
## ..$ ndbigtree : int [1:500] 883 705 1073 815 777 941 703 873 921 879 ...
## ..$ nodestatus : int [1:1097, 1:500] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
## ..$ leftDaughter : int [1:1097, 1:500] 2 4 6 8 10 12 14 16 18 20 ...
## ..$ rightDaughter: int [1:1097, 1:500] 3 5 7 9 11 13 15 17 19 21 ...
## ..$ nodepred : num [1:1097, 1:500] 51 33.6 52.9 25.6 54.1 ...
## ..$ bestvar : int [1:1097, 1:500] 4 10 8 6 8 10 10 5 2 8 ...
## ..$ xbestsplit : num [1:1097, 1:500] 1 13 133 11.5 305.5 ...
## ..$ ncat : Named int [1:11] 3 2 2 2 2 1 2 1 3 4 ...
## .. ..- attr(*, "names")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
## ..$ nrnodes : int 1097
## ..$ ntree : num 500
## ..$ xlevels :List of 11
## .. ..$ CAT_ERM : chr [1:3] "1" "2" "3"
## .. ..$ Sexo : chr [1:2] "0" "1"
## .. ..$ Down : chr [1:2] "0" "1"
## .. ..$ RTA_PRED: chr [1:2] "0" "1"
## .. ..$ Estirpe : chr [1:2] "B" "T"
## .. ..$ Edad : num 0
## .. ..$ TEL : chr [1:2] "0" "1"
## .. ..$ Blancos : num 0
## .. ..$ SNC : chr [1:3] "1" "2" "3"
## .. ..$ Ploidia : chr [1:4] "1" "2" "4" "5"
## .. ..$ Blastos : num 0
## $ coefs : NULL
## $ y : Named num [1:2096] 81 16 67 131 28 69 37 86 67 36 ...
## ..- attr(*, "names")= chr [1:2096] "1" "2" "3" "4" ...
## $ test : NULL
## $ inbag : NULL
## $ terms :Classes 'terms', 'formula' language TIEMPOSG ~ CAT_ERM + Sexo + Down + RTA_PRED + Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + Blastos
## .. ..- attr(*, "variables")= language list(TIEMPOSG, CAT_ERM, Sexo, Down, RTA_PRED, Estirpe, Edad, TEL, Blancos, SNC, Ploidia, Blastos)
## .. ..- attr(*, "factors")= int [1:12, 1:11] 0 1 0 0 0 0 0 0 0 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:12] "TIEMPOSG" "CAT_ERM" "Sexo" "Down" ...
## .. .. .. ..$ : chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
## .. ..- attr(*, "term.labels")= chr [1:11] "CAT_ERM" "Sexo" "Down" "RTA_PRED" ...
## .. ..- attr(*, "order")= int [1:11] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..- attr(*, "intercept")= num 0
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(TIEMPOSG, CAT_ERM, Sexo, Down, RTA_PRED, Estirpe, Edad, TEL, Blancos, SNC, Ploidia, Blastos)
## .. ..- attr(*, "dataClasses")= Named chr [1:12] "numeric" "factor" "factor" "factor" ...
## .. .. ..- attr(*, "names")= chr [1:12] "TIEMPOSG" "CAT_ERM" "Sexo" "Down" ...
## - attr(*, "class")= chr [1:2] "randomForest.formula" "randomForest"
rf$importance
## %IncMSE IncNodePurity
## CAT_ERM 18.4914408 80949.43
## Sexo 4.7178089 48833.35
## Down -3.1678033 15877.11
## RTA_PRED 38.3151458 50224.08
## Estirpe 6.7906239 28150.91
## Edad 1.4052645 244616.06
## TEL 3.0074175 30900.51
## Blancos 62.0673283 330776.92
## SNC 0.8186955 32903.26
## Ploidia 259.6809927 340200.01
## Blastos 47.2625019 312418.44
v<-as.vector(rf$importance[,1])
w<-(as.vector((colnames(datos))))
DF<-cbind(w,v)
DF<-as.data.frame(DF)
str(DF)
## 'data.frame': 22 obs. of 2 variables:
## $ w: chr "Sexo" "Down" "Blancos" "Blastos" ...
## $ v: chr "18.4914408201089" "4.71780894783562" "-3.16780333380769" "38.3151458135479" ...
DF<-DF %>% mutate(v=as.numeric(v),
w=as.factor(w))
ggplot(DF, aes(x=reorder(w,v), y=v,fill=w))+
geom_bar(stat="identity", position="dodge")+ coord_flip()+
ylab("Importancia de variables")+
xlab("")+
theme(legend.position = "none")
Regresion de COX con base de datos sin imputacion
library(survival)
library(ggplot2)
library(KMsurv)
library(ggfortify)
library ( survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(survMisc)
##
## Attaching package: 'survMisc'
## The following object is masked from 'package:ggplot2':
##
## autoplot
library(base)
library(flexsurv)
library(coin)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(Hmisc)
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:arsenal':
##
## %nin%
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
#Graficos
ckm<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ CAT_ERM,data=datos, conf.type="log-log")
#CATEGORIAS DE ENFERMEDAD RESIDUAL
ggsurvplot(fit = ckm, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "CAT_ERM",
legend.labs = c("Estandar", "Intermedio", "Elevado"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#SEXO
ckm_sexo<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Sexo,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_sexo, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Sexo",
legend.labs = c("Femenino","Masculino"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#Down
ckm_down<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Down,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_down, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Down",
legend.labs = c("No","Si"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
#SNC
ckm_snc<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ SNC,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_snc, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "SNC",
legend.labs = c("1","2","3"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#Ploidia
ckm_ploidia<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Ploidia,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_ploidia, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Ploidia",
legend.labs = c("1","2","4","5"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
#Estirpe
ckm_estirpe<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Estirpe,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_estirpe, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Estirpe",
legend.labs = c("B","T"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#RTA_pred
ckm_pred<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ RTA_PRED,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_pred, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Rta Prednisona",
legend.labs = c("No","Si"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
#MLL
ckm_mll<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ MLL,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_mll, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "MLL",
legend.labs = c("Ausente","Presente"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#TEL
ckm_tel<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ TEL,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_tel, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "TEL",
legend.labs = c("Ausente","Presente"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#Edad cat
ckm_edad<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Edad_cat,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_edad, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "Edad",
legend.labs = c("Mayor a 6","Menor a 6"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
#Blancos cat
ckm_blancos<-survfit(Surv(TIEMPOSG, SGSTATUS) ~ Blancos_cat,data=datos, conf.type="log-log")
ggsurvplot(fit = ckm_blancos, data =datos, conf.int = T, title = "LLA",
xlab = "Tiempo", ylab = "Probabilidad de sobrevida", legend.title = "GB",
legend.labs = c("Menor a 20.000","Mayor a 20.000"), risk.table = "percentage", ncensor.plot = F, surv.median.line = "hv")
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
kmaids<-survfit(Surv(TIEMPOSG, SGSTATUS)~CAT_ERM,data=datos, type="kaplan-meier",conf.type = "log-log", conf.int = 0.95)
print(kmaids, print.rmean=T)
## Call: survfit(formula = Surv(TIEMPOSG, SGSTATUS) ~ CAT_ERM, data = datos,
## type = "kaplan-meier", conf.type = "log-log", conf.int = 0.95)
##
## 354 observations deleted due to missingness
## n events rmean* se(rmean) median 0.95LCL 0.95UCL
## CAT_ERM=1 656 87 115.6 1.76 NA NA NA
## CAT_ERM=2 806 177 102.8 2.05 NA NA NA
## CAT_ERM=3 280 105 83.4 3.73 NA 67 NA
## * restricted mean with upper limit = 133
summary(coxph( Surv(TIEMPOSG,SGSTATUS) ~ I(CAT_ERM=="2")+I(CAT_ERM=="3"), data=datos))
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ I(CAT_ERM == "2") +
## I(CAT_ERM == "3"), data = datos)
##
## n= 1742, number of events= 369
## (354 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## I(CAT_ERM == "2")TRUE 0.5966 1.8160 0.1310 4.555 5.25e-06 ***
## I(CAT_ERM == "3")TRUE 1.2912 3.6371 0.1452 8.893 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## I(CAT_ERM == "2")TRUE 1.816 0.5507 1.405 2.348
## I(CAT_ERM == "3")TRUE 3.637 0.2749 2.736 4.834
##
## Concordance= 0.628 (se = 0.014 )
## Likelihood ratio test= 77.78 on 2 df, p=<2e-16
## Wald test = 80.19 on 2 df, p=<2e-16
## Score (logrank) test = 87.6 on 2 df, p=<2e-16
#Modelos univariados
#Categorias de ERM
modelo_erm<- coxph( Surv(TIEMPOSG,SGSTATUS) ~ I(CAT_ERM=="2")+I(CAT_ERM=="3"), data=datos)
summary(modelo_erm)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ I(CAT_ERM == "2") +
## I(CAT_ERM == "3"), data = datos)
##
## n= 1742, number of events= 369
## (354 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## I(CAT_ERM == "2")TRUE 0.5966 1.8160 0.1310 4.555 5.25e-06 ***
## I(CAT_ERM == "3")TRUE 1.2912 3.6371 0.1452 8.893 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## I(CAT_ERM == "2")TRUE 1.816 0.5507 1.405 2.348
## I(CAT_ERM == "3")TRUE 3.637 0.2749 2.736 4.834
##
## Concordance= 0.628 (se = 0.014 )
## Likelihood ratio test= 77.78 on 2 df, p=<2e-16
## Wald test = 80.19 on 2 df, p=<2e-16
## Score (logrank) test = 87.6 on 2 df, p=<2e-16
#Modelo con SNC
modelo_snc<- coxph( Surv(TIEMPOSG, SGSTATUS)~SNC, data=datos)
summary(modelo_snc)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ SNC, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SNC2 0.5656 1.7606 0.3368 1.679 0.09311 .
## SNC3 0.5882 1.8008 0.1951 3.015 0.00257 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SNC2 1.761 0.5680 0.9098 3.407
## SNC3 1.801 0.5553 1.2286 2.640
##
## Concordance= 0.519 (se = 0.006 )
## Likelihood ratio test= 9.86 on 2 df, p=0.007
## Wald test = 11.57 on 2 df, p=0.003
## Score (logrank) test = 11.9 on 2 df, p=0.003
#Modelo con Globulos blancos
modelo_blancos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos, data=datos)
summary(modelo_blancos)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Blancos 0.0030767 1.0030814 0.0002609 11.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Blancos 1.003 0.9969 1.003 1.004
##
## Concordance= 0.615 (se = 0.014 )
## Likelihood ratio test= 82.27 on 1 df, p=<2e-16
## Wald test = 139.1 on 1 df, p=<2e-16
## Score (logrank) test = 153.8 on 1 df, p=<2e-16
#Modelo con MO
modelo_mo<-coxph( Surv(TIEMPOSG, SGSTATUS)~MO, data=datos)
summary(modelo_mo)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ MO, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## MO -0.0000736 0.9999264 0.0031069 -0.024 0.981
##
## exp(coef) exp(-coef) lower .95 upper .95
## MO 0.9999 1 0.9939 1.006
##
## Concordance= 0.482 (se = 0.014 )
## Likelihood ratio test= 0 on 1 df, p=1
## Wald test = 0 on 1 df, p=1
## Score (logrank) test = 0 on 1 df, p=1
#Modelo con Blastos
modelo_blastos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blastos, data=datos)
summary(modelo_blastos)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blastos, data = datos)
##
## n= 2083, number of events= 466
## (13 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Blastos 0.007007 1.007031 0.001343 5.217 1.82e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Blastos 1.007 0.993 1.004 1.01
##
## Concordance= 0.58 (se = 0.014 )
## Likelihood ratio test= 27.78 on 1 df, p=1e-07
## Wald test = 27.22 on 1 df, p=2e-07
## Score (logrank) test = 27.64 on 1 df, p=1e-07
#Modelo con TEL
modelo_tel<- coxph( Surv(TIEMPOSG, SGSTATUS)~TEL, data=datos)
summary(modelo_tel)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ TEL, data = datos)
##
## n= 1695, number of events= 391
## (401 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## TEL1 -0.7011 0.4960 0.1930 -3.633 0.00028 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## TEL1 0.496 2.016 0.3398 0.7241
##
## Concordance= 0.53 (se = 0.008 )
## Likelihood ratio test= 16.15 on 1 df, p=6e-05
## Wald test = 13.2 on 1 df, p=3e-04
## Score (logrank) test = 13.75 on 1 df, p=2e-04
#Modelo con MLL
modelo_mll<- coxph( Surv(TIEMPOSG, SGSTATUS)~MLL, data=datos)
summary(modelo_mll)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ MLL, data = datos)
##
## n= 2087, number of events= 467
## (9 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## MLL1 -0.1211 0.8859 0.4497 -0.269 0.788
##
## exp(coef) exp(-coef) lower .95 upper .95
## MLL1 0.8859 1.129 0.367 2.139
##
## Concordance= 0.501 (se = 0.003 )
## Likelihood ratio test= 0.08 on 1 df, p=0.8
## Wald test = 0.07 on 1 df, p=0.8
## Score (logrank) test = 0.07 on 1 df, p=0.8
#Modelo con Estirpe
modelo_estirpe<- coxph( Surv(TIEMPOSG, SGSTATUS)~Estirpe, data=datos)
summary(modelo_estirpe)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Estirpe, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## EstirpeT 0.6344 1.8858 0.1217 5.213 1.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## EstirpeT 1.886 0.5303 1.486 2.394
##
## Concordance= 0.544 (se = 0.009 )
## Likelihood ratio test= 23.62 on 1 df, p=1e-06
## Wald test = 27.17 on 1 df, p=2e-07
## Score (logrank) test = 28.1 on 1 df, p=1e-07
#Modelo con Ploidia
modelo_ploidia<- coxph( Surv(TIEMPOSG, SGSTATUS)~Ploidia, data=datos)
summary(modelo_ploidia)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Ploidia, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Ploidia2 -1.3654 0.2553 0.1869 -7.306 2.74e-13 ***
## Ploidia4 0.9572 2.6044 0.1369 6.990 2.75e-12 ***
## Ploidia5 -3.1259 0.0439 1.0084 -3.100 0.00194 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Ploidia2 0.2553 3.917 0.176982 0.3682
## Ploidia4 2.6044 0.384 1.991335 3.4061
## Ploidia5 0.0439 22.781 0.006082 0.3168
##
## Concordance= 0.744 (se = 0.009 )
## Likelihood ratio test= 433.8 on 3 df, p=<2e-16
## Wald test = 284.4 on 3 df, p=<2e-16
## Score (logrank) test = 433.4 on 3 df, p=<2e-16
#Modelo con respuesta a la Prednisona
modelo_pred<- coxph( Surv(TIEMPOSG, SGSTATUS)~RTA_PRED, data=datos)
summary(modelo_pred)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ RTA_PRED, data = datos)
##
## n= 2091, number of events= 464
## (5 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## RTA_PRED1 -1.1487 0.3170 0.1111 -10.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## RTA_PRED1 0.317 3.154 0.255 0.3942
##
## Concordance= 0.578 (se = 0.01 )
## Likelihood ratio test= 85.64 on 1 df, p=<2e-16
## Wald test = 107 on 1 df, p=<2e-16
## Score (logrank) test = 119.1 on 1 df, p=<2e-16
#Modelo con Sexo
modelo_sexo<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo, data=datos)
summary(modelo_sexo)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.12922 1.13794 0.09398 1.375 0.169
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.138 0.8788 0.9465 1.368
##
## Concordance= 0.515 (se = 0.012 )
## Likelihood ratio test= 1.9 on 1 df, p=0.2
## Wald test = 1.89 on 1 df, p=0.2
## Score (logrank) test = 1.89 on 1 df, p=0.2
#Modelo con edad como VA categorica
modelo_edad_cat<- coxph( Surv(TIEMPOSG, SGSTATUS)~Edad_cat, data=datos)
summary(modelo_edad_cat)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Edad_cat, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Edad_cat2 0.42730 1.53311 0.09299 4.595 4.32e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Edad_cat2 1.533 0.6523 1.278 1.84
##
## Concordance= 0.555 (se = 0.012 )
## Likelihood ratio test= 21.27 on 1 df, p=4e-06
## Wald test = 21.12 on 1 df, p=4e-06
## Score (logrank) test = 21.44 on 1 df, p=4e-06
#Modelo con globulos blancos como VA categorica
modelo_gb_cat<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos_cat, data=datos)
summary(modelo_gb_cat)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos_cat, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Blancos_cat2 0.70475 2.02333 0.09256 7.614 2.66e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Blancos_cat2 2.023 0.4942 1.688 2.426
##
## Concordance= 0.592 (se = 0.012 )
## Likelihood ratio test= 56.43 on 1 df, p=6e-14
## Wald test = 57.97 on 1 df, p=3e-14
## Score (logrank) test = 60.4 on 1 df, p=8e-15
#Modelo con edad como VA continua
modelo_edad<- coxph( Surv(TIEMPOSG, SGSTATUS)~Edad, data=datos)
summary(modelo_edad)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Edad, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Edad 0.049639 1.050891 0.009922 5.003 5.65e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Edad 1.051 0.9516 1.031 1.072
##
## Concordance= 0.557 (se = 0.015 )
## Likelihood ratio test= 23.94 on 1 df, p=1e-06
## Wald test = 25.03 on 1 df, p=6e-07
## Score (logrank) test = 25.31 on 1 df, p=5e-07
#Modelo con globulos blancos como VA continua
modelo_blancos<- coxph( Surv(TIEMPOSG, SGSTATUS)~Blancos, data=datos)
summary(modelo_blancos)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Blancos, data = datos)
##
## n= 2096, number of events= 468
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Blancos 0.0030767 1.0030814 0.0002609 11.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Blancos 1.003 0.9969 1.003 1.004
##
## Concordance= 0.615 (se = 0.014 )
## Likelihood ratio test= 82.27 on 1 df, p=<2e-16
## Wald test = 139.1 on 1 df, p=<2e-16
## Score (logrank) test = 153.8 on 1 df, p=<2e-16
De los analisis univariados efectuados, se excluyen para el analisis multivariado aquellas variables con pv> 0.2, MLL y MO.
#Modelo multivariado
#Modelo multivariado sin MLL ni MO
summary(coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos + CAT_ERM, data=datos))
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia +
## Blastos + CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0320119 1.0325298 0.1156128 0.277 0.781865
## Down1 1.2173951 3.3783760 0.2740249 4.443 8.89e-06 ***
## RTA_PRED1 -0.5315738 0.5876794 0.1613593 -3.294 0.000987 ***
## EstirpeT -0.0750978 0.9276527 0.1709595 -0.439 0.660464
## Edad 0.0199750 1.0201759 0.0125502 1.592 0.111474
## TEL1 -0.4480192 0.6388924 0.2133808 -2.100 0.035762 *
## Blancos 0.0020543 1.0020564 0.0005058 4.061 4.88e-05 ***
## SNC2 0.2882750 1.3341242 0.4183052 0.689 0.490729
## SNC3 0.3566726 1.4285681 0.2286605 1.560 0.118799
## Ploidia2 -1.2947669 0.2739617 0.2180549 -5.938 2.89e-09 ***
## Ploidia4 0.9981649 2.7132980 0.1540460 6.480 9.19e-11 ***
## Ploidia5 -2.1108716 0.1211323 1.0107490 -2.088 0.036760 *
## Blastos -0.0002697 0.9997303 0.0018073 -0.149 0.881364
## CAT_ERM2 0.3678790 1.4446673 0.1436321 2.561 0.010429 *
## CAT_ERM3 0.7351325 2.0857583 0.1759066 4.179 2.93e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0325 0.9685 0.82318 1.2951
## Down1 3.3784 0.2960 1.97450 5.7804
## RTA_PRED1 0.5877 1.7016 0.42834 0.8063
## EstirpeT 0.9277 1.0780 0.66354 1.2969
## Edad 1.0202 0.9802 0.99539 1.0456
## TEL1 0.6389 1.5652 0.42053 0.9706
## Blancos 1.0021 0.9979 1.00106 1.0031
## SNC2 1.3341 0.7496 0.58767 3.0287
## SNC3 1.4286 0.7000 0.91257 2.2363
## Ploidia2 0.2740 3.6501 0.17868 0.4200
## Ploidia4 2.7133 0.3686 2.00620 3.6696
## Ploidia5 0.1211 8.2554 0.01671 0.8782
## Blastos 0.9997 1.0003 0.99620 1.0033
## CAT_ERM2 1.4447 0.6922 1.09020 1.9144
## CAT_ERM3 2.0858 0.4794 1.47752 2.9444
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 406.8 on 15 df, p=<2e-16
## Wald test = 338.6 on 15 df, p=<2e-16
## Score (logrank) test = 458.3 on 15 df, p=<2e-16
coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos, data=datos)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia +
## Blastos, data = datos)
##
## coef exp(coef) se(coef) z p
## Sexo1 3.212e-02 1.033e+00 1.059e-01 0.303 0.7618
## Down1 1.169e+00 3.220e+00 2.539e-01 4.606 4.11e-06
## RTA_PRED1 -7.653e-01 4.652e-01 1.357e-01 -5.641 1.69e-08
## EstirpeT -5.905e-02 9.427e-01 1.551e-01 -0.381 0.7033
## Edad 2.590e-02 1.026e+00 1.159e-02 2.235 0.0254
## TEL1 -3.690e-01 6.914e-01 1.972e-01 -1.872 0.0613
## Blancos 2.085e-03 1.002e+00 4.166e-04 5.004 5.60e-07
## SNC2 3.270e-01 1.387e+00 3.628e-01 0.901 0.3675
## SNC3 4.102e-01 1.507e+00 2.086e-01 1.967 0.0492
## Ploidia2 -1.330e+00 2.644e-01 2.090e-01 -6.363 1.97e-10
## Ploidia4 1.040e+00 2.830e+00 1.462e-01 7.113 1.13e-12
## Ploidia5 -2.548e+00 7.821e-02 1.010e+00 -2.524 0.0116
## Blastos -8.867e-05 9.999e-01 1.636e-03 -0.054 0.9568
##
## Likelihood ratio test=481.2 on 13 df, p=< 2.2e-16
## n= 1680, number of events= 386
## (416 observations deleted due to missingness)
#con edad y gb como variables categoricas
modelo_mult<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad_cat + TEL+ Blancos_cat + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo_mult)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + Edad_cat + TEL + Blancos_cat + SNC + RTA_PRED +
## Ploidia + Blastos + CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0218378 1.0220780 0.1155556 0.189 0.85011
## Down1 1.1497799 3.1574980 0.2777192 4.140 3.47e-05 ***
## RTA_PRED1 -0.5089549 0.6011235 0.1590766 -3.199 0.00138 **
## EstirpeT 0.0468603 1.0479756 0.1636782 0.286 0.77465
## Edad_cat2 0.0790463 1.0822544 0.1165434 0.678 0.49761
## TEL1 -0.4917902 0.6115307 0.2141550 -2.296 0.02165 *
## Blancos_cat2 0.3682409 1.4451901 0.1490434 2.471 0.01349 *
## SNC2 0.4035746 1.4971670 0.4168909 0.968 0.33302
## SNC3 0.5695412 1.7674559 0.2197318 2.592 0.00954 **
## Ploidia2 -1.3034923 0.2715817 0.2175331 -5.992 2.07e-09 ***
## Ploidia4 0.9800643 2.6646276 0.1535633 6.382 1.75e-10 ***
## Ploidia5 -2.0791775 0.1250330 1.0106309 -2.057 0.03966 *
## Blastos -0.0005152 0.9994850 0.0020302 -0.254 0.79968
## CAT_ERM2 0.3756233 1.4558985 0.1438508 2.611 0.00902 **
## CAT_ERM3 0.7632108 2.1451529 0.1747163 4.368 1.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0221 0.9784 0.81493 1.2819
## Down1 3.1575 0.3167 1.83210 5.4417
## RTA_PRED1 0.6011 1.6636 0.44011 0.8211
## EstirpeT 1.0480 0.9542 0.76038 1.4444
## Edad_cat2 1.0823 0.9240 0.86125 1.3600
## TEL1 0.6115 1.6352 0.40191 0.9305
## Blancos_cat2 1.4452 0.6920 1.07909 1.9355
## SNC2 1.4972 0.6679 0.66132 3.3894
## SNC3 1.7675 0.5658 1.14898 2.7188
## Ploidia2 0.2716 3.6821 0.17731 0.4160
## Ploidia4 2.6646 0.3753 1.97207 3.6004
## Ploidia5 0.1250 7.9979 0.01725 0.9063
## Blastos 0.9995 1.0005 0.99552 1.0035
## CAT_ERM2 1.4559 0.6869 1.09821 1.9301
## CAT_ERM3 2.1452 0.4662 1.52314 3.0212
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 397.3 on 15 df, p=<2e-16
## Wald test = 323.8 on 15 df, p=<2e-16
## Score (logrank) test = 433.3 on 15 df, p=<2e-16
#Con edad y gb como variables continuas
modelo_mult1<- coxph( Surv(TIEMPOSG, SGSTATUS)~Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo_mult1)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia +
## Blastos + CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0320119 1.0325298 0.1156128 0.277 0.781865
## Down1 1.2173951 3.3783760 0.2740249 4.443 8.89e-06 ***
## RTA_PRED1 -0.5315738 0.5876794 0.1613593 -3.294 0.000987 ***
## EstirpeT -0.0750978 0.9276527 0.1709595 -0.439 0.660464
## Edad 0.0199750 1.0201759 0.0125502 1.592 0.111474
## TEL1 -0.4480192 0.6388924 0.2133808 -2.100 0.035762 *
## Blancos 0.0020543 1.0020564 0.0005058 4.061 4.88e-05 ***
## SNC2 0.2882750 1.3341242 0.4183052 0.689 0.490729
## SNC3 0.3566726 1.4285681 0.2286605 1.560 0.118799
## Ploidia2 -1.2947669 0.2739617 0.2180549 -5.938 2.89e-09 ***
## Ploidia4 0.9981649 2.7132980 0.1540460 6.480 9.19e-11 ***
## Ploidia5 -2.1108716 0.1211323 1.0107490 -2.088 0.036760 *
## Blastos -0.0002697 0.9997303 0.0018073 -0.149 0.881364
## CAT_ERM2 0.3678790 1.4446673 0.1436321 2.561 0.010429 *
## CAT_ERM3 0.7351325 2.0857583 0.1759066 4.179 2.93e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0325 0.9685 0.82318 1.2951
## Down1 3.3784 0.2960 1.97450 5.7804
## RTA_PRED1 0.5877 1.7016 0.42834 0.8063
## EstirpeT 0.9277 1.0780 0.66354 1.2969
## Edad 1.0202 0.9802 0.99539 1.0456
## TEL1 0.6389 1.5652 0.42053 0.9706
## Blancos 1.0021 0.9979 1.00106 1.0031
## SNC2 1.3341 0.7496 0.58767 3.0287
## SNC3 1.4286 0.7000 0.91257 2.2363
## Ploidia2 0.2740 3.6501 0.17868 0.4200
## Ploidia4 2.7133 0.3686 2.00620 3.6696
## Ploidia5 0.1211 8.2554 0.01671 0.8782
## Blastos 0.9997 1.0003 0.99620 1.0033
## CAT_ERM2 1.4447 0.6922 1.09020 1.9144
## CAT_ERM3 2.0858 0.4794 1.47752 2.9444
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 406.8 on 15 df, p=<2e-16
## Wald test = 338.6 on 15 df, p=<2e-16
## Score (logrank) test = 458.3 on 15 df, p=<2e-16
#impresion de HR en tabla (agregar)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#VIF del modelo
library(car)
vif_modelo<- vif(modelo_mult)
## Warning in vif.default(modelo_mult): No intercept: vifs may not be sensible.
print(vif_modelo)
## GVIF Df GVIF^(1/(2*Df))
## Sexo 1.037140 1 1.018401
## Down 1.093849 1 1.045872
## RTA_PRED 1.453125 1 1.205456
## Estirpe 1.287930 1 1.134870
## Edad_cat 1.081776 1 1.040085
## TEL 1.056968 1 1.028089
## Blancos_cat 1.779789 1 1.334087
## SNC 1.086985 2 1.021071
## Ploidia 1.026765 3 1.004412
## Blastos 1.726685 1 1.314034
## CAT_ERM 1.349149 2 1.077742
vif_modelo1<- vif(modelo_mult1)
## Warning in vif.default(modelo_mult1): No intercept: vifs may not be sensible.
print(vif_modelo1)
## GVIF Df GVIF^(1/(2*Df))
## Sexo 1.038088 1 1.018866
## Down 1.066044 1 1.032494
## RTA_PRED 1.488479 1 1.220032
## Estirpe 1.401772 1 1.183965
## Edad 1.075803 1 1.037209
## TEL 1.049241 1 1.024325
## Blancos 1.709111 1 1.307330
## SNC 1.180038 2 1.042255
## Ploidia 1.031431 3 1.005171
## Blastos 1.398638 1 1.182640
## CAT_ERM 1.365224 2 1.080939
#Forest plot para visualizacion de resultados del modelo de COX
#Modelo con edad y globulos blancos como VA categoricas
ggforest(modelo_mult,data=datos)
#Modelo con edad y globulos blancos como VA continuas
ggforest(modelo_mult1,data=datos)
En el analisis del VIF, se observa que los valores son menores a 5, con lo cual no existe problema de colinealidad entre las variables explicativas.
#modelo multivariado sin MO ni MLL (edad y gb como VA categoricas)
coxlla1<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe+ Edad_cat + TEL+ Blancos_cat + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
cox.zph(coxlla1)
## chisq df p
## Sexo 0.00218 1 0.96
## Down 2.13511 1 0.14
## RTA_PRED 0.70844 1 0.40
## Estirpe 2.19226 1 0.14
## Edad_cat 0.04011 1 0.84
## TEL 2.70359 1 0.10
## Blancos_cat 1.91151 1 0.17
## SNC 3.07656 2 0.21
## Ploidia 3.59139 3 0.31
## Blastos 0.58146 1 0.45
## CAT_ERM 3.12794 2 0.21
## GLOBAL 18.47390 15 0.24
#modelo multivariado sin MO ni MLL (edad y gb como VA continuas)
coxlla2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
cox.zph(coxlla2)
## chisq df p
## Sexo 0.0376 1 0.846
## Down 1.7193 1 0.190
## RTA_PRED 0.2446 1 0.621
## Estirpe 1.2804 1 0.258
## Edad 0.3118 1 0.577
## TEL 2.9190 1 0.088
## Blancos 2.1840 1 0.139
## SNC 3.5443 2 0.170
## Ploidia 3.4984 3 0.321
## Blastos 0.1216 1 0.727
## CAT_ERM 2.2974 2 0.317
## GLOBAL 22.4574 15 0.096
# Se cumple supuesto de riesgo proporcional, en todos los casos, los pv obtenidos son mayores a 0.05 ( de forma global y por cada variable predictora)
#De esta manera, el modelo supone que el HR para cada variable Xj es el mismo cualquiera sea el tiempo t
#Verificacion del supuesto de riesgo proporcional graficamente:
require(rms)
## Loading required package: rms
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'rms'
## The following objects are masked from 'package:car':
##
## Predict, vif
require(car)
survlla1<- npsurv(Surv(TIEMPOSG,SGSTATUS)~CAT_ERM,data=datos)
survplot(survlla1, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#Sexo
survlla2<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Sexo,data=datos)
survplot(survlla2, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#Down
survlla3<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Down,data=datos)
survplot(survlla3, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#SNC
survlla4<- npsurv(Surv(TIEMPOSG,SGSTATUS)~SNC,data=datos)
survplot(survlla4, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#Ploidia
survlla5<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Ploidia,data=datos)
#ver no da con ploidia
#Rta a la prednisona
survlla6<- npsurv(Surv(TIEMPOSG,SGSTATUS)~RTA_PRED,data=datos)
survplot(survlla6, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#Estirpe
survlla7<- npsurv(Surv(TIEMPOSG,SGSTATUS)~Estirpe,data=datos)
survplot(survlla7, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#TEL
survlla8<- npsurv(Surv(TIEMPOSG,SGSTATUS)~TEL,data=datos)
survplot(survlla8, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#MLL
survlla9<- npsurv(Surv(TIEMPOSG,SGSTATUS)~MLL,data=datos)
survplot(survlla9, loglog=T, logt=F, xlim = c(0,12),
xlab="Tiempo hasta la muerte", ylab = "Log(-Log (S(t))", label.curves = T, time.inc = 1,
levels.only = T, conf="none", type="kaplan-meier")
#Residuos
residuos_lla<-cox.zph(coxlla1)
residuos_lla
## chisq df p
## Sexo 0.00218 1 0.96
## Down 2.13511 1 0.14
## RTA_PRED 0.70844 1 0.40
## Estirpe 2.19226 1 0.14
## Edad_cat 0.04011 1 0.84
## TEL 2.70359 1 0.10
## Blancos_cat 1.91151 1 0.17
## SNC 3.07656 2 0.21
## Ploidia 3.59139 3 0.31
## Blastos 0.58146 1 0.45
## CAT_ERM 3.12794 2 0.21
## GLOBAL 18.47390 15 0.24
ggcoxzph(residuos_lla,font.main=10)
par(mfrow=c(2,2))
#Se muestran los residuos escalados de Schoenfeld vs. tiempo para cada variable del modelo. Estos residuos tambien permiten testear la hipotesis de riesgo proporcional.
#Residuos martingala
ggcoxdiagnostics(coxlla1, type = "martingale",
linear.predictions = FALSE, ggtheme = theme_bw())
## `geom_smooth()` using formula 'y ~ x'
#Los residuos se distribuyen de forma aproximadamente simetrica alrededor de cero.
#Residuos dfbeta
ggcoxdiagnostics(coxlla1, type = "dfbeta",
linear.predictions = FALSE, ggtheme = theme_bw())
## `geom_smooth()` using formula 'y ~ x'
#Residuos deviance para detctar outliers
mres <- resid(modelo_mult1, type="deviance")
##Riesgo competitivo (revisar)
library(readxl)
ERM <- read_excel("C:/Users/Magali/Desktop/TF CEECS/Base final.xlsx",
sheet = "riesgo comp")
head(ERM)
## # A tibble: 6 x 3
## ftime status dis
## <dbl> <dbl> <dbl>
## 1 37 2 1
## 2 19 2 1
## 3 16 2 1
## 4 31 2 2
## 5 34 2 3
## 6 40 2 2
table(ERM$dis,ERM$status)
##
## 0 1 2
## 1 577 22 80
## 2 635 37 175
## 3 198 38 80
ERM$dis<-factor(ERM$dis,levels = c(1,2,3),labels = c("<0.1 ","0.1-10",">10"))
#La funcion de incidencia acumulada CIF puede estimar diferentes causas de falla y permite comparaciones entre grupos
library(cmprsk)
CIFdata_lla <- cuminc(ftime = ERM$ftime, fstatus= ERM$status, cencode = 0)
cifdata_lladis <- cuminc(ftime = ERM$ftime, fstatus= ERM$status, group = ERM$dis)
plot(cifdata_lladis,xlab="Tiempo (meses")
#Prueba de Chi Cuadrado
cifdata_lladis$Tests
## stat pv df
## 1 38.28877 4.849526e-09 2
## 2 44.96465 1.722071e-10 2
#Grafico de incidencias acumulada por categoria de ERM
require(ggplot2)
ggcompetingrisks(cifdata_lladis, palette = "lancet",legend = "top", ggtheme = theme_bw())
#Modelo multivariado eliminando las variables no significativas (Sexo, Edad, Estirpe,SNC y Blastos)
modelo1<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + TEL+ Blancos+RTA_PRED +Ploidia+CAT_ERM, data=datos)
summary(modelo1)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Down + RTA_PRED +
## TEL + Blancos + RTA_PRED + Ploidia + CAT_ERM, data = datos)
##
## n= 1492, number of events= 324
## (604 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Down1 1.2519694 3.4972237 0.2687036 4.659 3.17e-06 ***
## RTA_PRED1 -0.4994671 0.6068540 0.1539842 -3.244 0.00118 **
## TEL1 -0.4934883 0.6104931 0.2095660 -2.355 0.01853 *
## Blancos 0.0021778 1.0021801 0.0004186 5.202 1.97e-07 ***
## Ploidia2 -1.2760240 0.2791450 0.2160617 -5.906 3.51e-09 ***
## Ploidia4 1.0036052 2.7280996 0.1532123 6.550 5.74e-11 ***
## Ploidia5 -2.1593846 0.1153961 1.0101705 -2.138 0.03255 *
## CAT_ERM2 0.3744556 1.4541995 0.1420055 2.637 0.00837 **
## CAT_ERM3 0.7600290 2.1383383 0.1739915 4.368 1.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Down1 3.4972 0.2859 2.06539 5.9217
## RTA_PRED1 0.6069 1.6478 0.44876 0.8206
## TEL1 0.6105 1.6380 0.40485 0.9206
## Blancos 1.0022 0.9978 1.00136 1.0030
## Ploidia2 0.2791 3.5824 0.18278 0.4263
## Ploidia4 2.7281 0.3666 2.02044 3.6836
## Ploidia5 0.1154 8.6658 0.01593 0.8357
## CAT_ERM2 1.4542 0.6877 1.10090 1.9209
## CAT_ERM3 2.1383 0.4677 1.52046 3.0073
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 401.9 on 9 df, p=<2e-16
## Wald test = 335.3 on 9 df, p=<2e-16
## Score (logrank) test = 451.6 on 9 df, p=<2e-16
#Modelo multivariado eliminando Sexo
modelo2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
summary(modelo2)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Down + RTA_PRED +
## Estirpe + Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia +
## Blastos + CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Down1 1.2113879 3.3581422 0.2731949 4.434 9.24e-06 ***
## RTA_PRED1 -0.5341630 0.5861597 0.1609960 -3.318 0.000907 ***
## EstirpeT -0.0694643 0.9328934 0.1697857 -0.409 0.682445
## Edad 0.0200406 1.0202428 0.0125512 1.597 0.110330
## TEL1 -0.4479239 0.6389533 0.2134239 -2.099 0.035839 *
## Blancos 0.0020472 1.0020493 0.0005055 4.050 5.12e-05 ***
## SNC2 0.2954843 1.3437769 0.4174822 0.708 0.479084
## SNC3 0.3557135 1.4271986 0.2285331 1.557 0.119587
## Ploidia2 -1.2952898 0.2738185 0.2180407 -5.941 2.84e-09 ***
## Ploidia4 0.9987291 2.7148294 0.1540185 6.484 8.90e-11 ***
## Ploidia5 -2.1156908 0.1205500 1.0106026 -2.093 0.036305 *
## Blastos -0.0002632 0.9997369 0.0018069 -0.146 0.884205
## CAT_ERM2 0.3689451 1.4462082 0.1435918 2.569 0.010187 *
## CAT_ERM3 0.7348562 2.0851821 0.1758722 4.178 2.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Down1 3.3581 0.2978 1.96587 5.7364
## RTA_PRED1 0.5862 1.7060 0.42754 0.8036
## EstirpeT 0.9329 1.0719 0.66882 1.3012
## Edad 1.0202 0.9802 0.99545 1.0457
## TEL1 0.6390 1.5651 0.42054 0.9708
## Blancos 1.0020 0.9980 1.00106 1.0030
## SNC2 1.3438 0.7442 0.59288 3.0457
## SNC3 1.4272 0.7007 0.91192 2.2336
## Ploidia2 0.2738 3.6521 0.17859 0.4198
## Ploidia4 2.7148 0.3683 2.00744 3.6715
## Ploidia5 0.1205 8.2953 0.01663 0.8738
## Blastos 0.9997 1.0003 0.99620 1.0033
## CAT_ERM2 1.4462 0.6915 1.09145 1.9163
## CAT_ERM3 2.0852 0.4796 1.47721 2.9434
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 406.7 on 14 df, p=<2e-16
## Wald test = 338.6 on 14 df, p=<2e-16
## Score (logrank) test = 458.3 on 14 df, p=<2e-16
#Modelo multivariado eliminando Estirpe
modelo3<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo+ Down + RTA_PRED + Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
summary(modelo3)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Edad + TEL + Blancos + SNC + RTA_PRED + Ploidia + Blastos +
## CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0258837 1.0262216 0.1147929 0.225 0.82160
## Down1 1.2256144 3.4062582 0.2733558 4.484 7.34e-06 ***
## RTA_PRED1 -0.5168242 0.5964116 0.1580203 -3.271 0.00107 **
## Edad 0.0192324 1.0194185 0.0124568 1.544 0.12261
## TEL1 -0.4424548 0.6424574 0.2130545 -2.077 0.03783 *
## Blancos 0.0019790 1.0019810 0.0004776 4.144 3.41e-05 ***
## SNC2 0.2959569 1.3444122 0.4178827 0.708 0.47880
## SNC3 0.3539496 1.4246833 0.2279209 1.553 0.12044
## Ploidia2 -1.2979229 0.2730985 0.2179297 -5.956 2.59e-09 ***
## Ploidia4 0.9982177 2.7134413 0.1540682 6.479 9.23e-11 ***
## Ploidia5 -2.1072579 0.1215709 1.0106949 -2.085 0.03707 *
## Blastos -0.0002456 0.9997545 0.0018084 -0.136 0.89198
## CAT_ERM2 0.3692158 1.4465997 0.1436174 2.571 0.01015 *
## CAT_ERM3 0.7333533 2.0820506 0.1758428 4.171 3.04e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0262 0.9744 0.81946 1.2851
## Down1 3.4063 0.2936 1.99341 5.8205
## RTA_PRED1 0.5964 1.6767 0.43756 0.8129
## Edad 1.0194 0.9810 0.99483 1.0446
## TEL1 0.6425 1.5565 0.42315 0.9754
## Blancos 1.0020 0.9980 1.00104 1.0029
## SNC2 1.3444 0.7438 0.59269 3.0495
## SNC3 1.4247 0.7019 0.91141 2.2270
## Ploidia2 0.2731 3.6617 0.17816 0.4186
## Ploidia4 2.7134 0.3685 2.00621 3.6700
## Ploidia5 0.1216 8.2257 0.01677 0.8813
## Blastos 0.9998 1.0002 0.99622 1.0033
## CAT_ERM2 1.4466 0.6913 1.09169 1.9169
## CAT_ERM3 2.0821 0.4803 1.47507 2.9388
##
## Concordance= 0.799 (se = 0.011 )
## Likelihood ratio test= 406.6 on 14 df, p=<2e-16
## Wald test = 337.9 on 14 df, p=<2e-16
## Score (logrank) test = 456.9 on 14 df, p=<2e-16
#Modelo multivariado eliminando Edad
modelo4<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo4)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + TEL + Blancos + SNC + RTA_PRED + Ploidia + Blastos +
## CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 3.509e-02 1.036e+00 1.154e-01 0.304 0.76099
## Down1 1.261e+00 3.529e+00 2.725e-01 4.627 3.71e-06 ***
## RTA_PRED1 -5.068e-01 6.024e-01 1.604e-01 -3.160 0.00158 **
## EstirpeT -3.809e-02 9.626e-01 1.690e-01 -0.225 0.82164
## TEL1 -4.941e-01 6.101e-01 2.113e-01 -2.338 0.01937 *
## Blancos 2.007e-03 1.002e+00 5.022e-04 3.997 6.40e-05 ***
## SNC2 3.143e-01 1.369e+00 4.176e-01 0.753 0.45168
## SNC3 3.449e-01 1.412e+00 2.290e-01 1.506 0.13201
## Ploidia2 -1.284e+00 2.770e-01 2.180e-01 -5.889 3.88e-09 ***
## Ploidia4 1.015e+00 2.760e+00 1.537e-01 6.606 3.95e-11 ***
## Ploidia5 -2.114e+00 1.208e-01 1.011e+00 -2.091 0.03652 *
## Blastos -9.143e-05 9.999e-01 1.803e-03 -0.051 0.95956
## CAT_ERM2 3.714e-01 1.450e+00 1.436e-01 2.587 0.00969 **
## CAT_ERM3 7.587e-01 2.135e+00 1.752e-01 4.331 1.49e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0357 0.9655 0.82613 1.2985
## Down1 3.5290 0.2834 2.06857 6.0204
## RTA_PRED1 0.6024 1.6599 0.43997 0.8249
## EstirpeT 0.9626 1.0388 0.69125 1.3405
## TEL1 0.6101 1.6390 0.40325 0.9232
## Blancos 1.0020 0.9980 1.00102 1.0030
## SNC2 1.3693 0.7303 0.60401 3.1041
## SNC3 1.4119 0.7083 0.90131 2.2116
## Ploidia2 0.2770 3.6103 0.18068 0.4246
## Ploidia4 2.7600 0.3623 2.04220 3.7302
## Ploidia5 0.1208 8.2778 0.01666 0.8759
## Blastos 0.9999 1.0001 0.99638 1.0034
## CAT_ERM2 1.4498 0.6898 1.09415 1.9210
## CAT_ERM3 2.1354 0.4683 1.51483 3.0103
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 404.3 on 14 df, p=<2e-16
## Wald test = 336.7 on 14 df, p=<2e-16
## Score (logrank) test = 456.5 on 14 df, p=<2e-16
#Modelo multivariado eliminando SNC
modelo4<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos + Sexo+RTA_PRED +Ploidia + Blastos+ CAT_ERM, data=datos)
summary(modelo4)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + TEL + Blancos + Sexo + RTA_PRED + Ploidia + Blastos +
## CAT_ERM, data = datos)
##
## n= 1480, number of events= 322
## (616 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0379587 1.0386883 0.1152330 0.329 0.74185
## Down1 1.2476843 3.4822696 0.2723825 4.581 4.64e-06 ***
## RTA_PRED1 -0.5054968 0.6032058 0.1600703 -3.158 0.00159 **
## EstirpeT -0.0398533 0.9609304 0.1695726 -0.235 0.81419
## TEL1 -0.4910419 0.6119884 0.2106862 -2.331 0.01977 *
## Blancos 0.0022424 1.0022449 0.0004896 4.580 4.64e-06 ***
## Ploidia2 -1.2901588 0.2752271 0.2179827 -5.919 3.25e-09 ***
## Ploidia4 1.0060053 2.7346550 0.1533347 6.561 5.35e-11 ***
## Ploidia5 -2.1510035 0.1163673 1.0104440 -2.129 0.03327 *
## Blastos -0.0002308 0.9997693 0.0018050 -0.128 0.89827
## CAT_ERM2 0.3674584 1.4440597 0.1433850 2.563 0.01038 *
## CAT_ERM3 0.7568939 2.1316448 0.1749745 4.326 1.52e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0387 0.9628 0.82870 1.3019
## Down1 3.4823 0.2872 2.04178 5.9390
## RTA_PRED1 0.6032 1.6578 0.44077 0.8255
## EstirpeT 0.9609 1.0407 0.68921 1.3398
## TEL1 0.6120 1.6340 0.40495 0.9249
## Blancos 1.0022 0.9978 1.00128 1.0032
## Ploidia2 0.2752 3.6334 0.17953 0.4219
## Ploidia4 2.7347 0.3657 2.02481 3.6934
## Ploidia5 0.1164 8.5935 0.01606 0.8432
## Blastos 0.9998 1.0002 0.99624 1.0033
## CAT_ERM2 1.4441 0.6925 1.09027 1.9126
## CAT_ERM3 2.1316 0.4691 1.51278 3.0037
##
## Concordance= 0.797 (se = 0.011 )
## Likelihood ratio test= 401.8 on 12 df, p=<2e-16
## Wald test = 334.1 on 12 df, p=<2e-16
## Score (logrank) test = 453.1 on 12 df, p=<2e-16
#Modelo multivariado eliminando Blastos
modelo5<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Sexo + Down + RTA_PRED + Estirpe + TEL+ Blancos + Sexo+RTA_PRED +Ploidia + SNC+ CAT_ERM, data=datos)
summary(modelo5)
## Call:
## coxph(formula = Surv(TIEMPOSG, SGSTATUS) ~ Sexo + Down + RTA_PRED +
## Estirpe + TEL + Blancos + Sexo + RTA_PRED + Ploidia + SNC +
## CAT_ERM, data = datos)
##
## n= 1492, number of events= 324
## (604 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Sexo1 0.0458101 1.0468756 0.1150859 0.398 0.69059
## Down1 1.2668673 3.5497149 0.2699213 4.693 2.69e-06 ***
## RTA_PRED1 -0.5060011 0.6029017 0.1574880 -3.213 0.00131 **
## EstirpeT -0.0448084 0.9561807 0.1688928 -0.265 0.79077
## TEL1 -0.4994694 0.6068526 0.2108179 -2.369 0.01783 *
## Blancos 0.0020071 1.0020092 0.0004663 4.304 1.68e-05 ***
## Ploidia2 -1.2675931 0.2815084 0.2162065 -5.863 4.55e-09 ***
## Ploidia4 1.0124136 2.7522359 0.1536285 6.590 4.40e-11 ***
## Ploidia5 -2.1180045 0.1202714 1.0107652 -2.095 0.03613 *
## SNC2 0.3111999 1.3650621 0.4164748 0.747 0.45493
## SNC3 0.3442877 1.4109846 0.2289780 1.504 0.13269
## CAT_ERM2 0.3770787 1.4580190 0.1423105 2.650 0.00806 **
## CAT_ERM3 0.7638754 2.1465790 0.1743111 4.382 1.17e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Sexo1 1.0469 0.9552 0.83548 1.3118
## Down1 3.5497 0.2817 2.09139 6.0249
## RTA_PRED1 0.6029 1.6586 0.44278 0.8209
## EstirpeT 0.9562 1.0458 0.68672 1.3314
## TEL1 0.6069 1.6478 0.40145 0.9173
## Blancos 1.0020 0.9980 1.00109 1.0029
## Ploidia2 0.2815 3.5523 0.18427 0.4301
## Ploidia4 2.7522 0.3633 2.03665 3.7192
## Ploidia5 0.1203 8.3145 0.01659 0.8720
## SNC2 1.3651 0.7326 0.60346 3.0879
## SNC3 1.4110 0.7087 0.90077 2.2102
## CAT_ERM2 1.4580 0.6859 1.10313 1.9271
## CAT_ERM3 2.1466 0.4659 1.52536 3.0208
##
## Concordance= 0.798 (se = 0.011 )
## Likelihood ratio test= 404.6 on 13 df, p=<2e-16
## Wald test = 338.3 on 13 df, p=<2e-16
## Score (logrank) test = 455.8 on 13 df, p=<2e-16
#Seleccion de modelo utilizando AIC
AIC(modelo1,modelo2, modelo3,modelo4,modelo5)
## Warning in AIC.default(modelo1, modelo2, modelo3, modelo4, modelo5): models are
## not all fitted to the same number of observations
## df AIC
## modelo1 9 4145.965
## modelo2 14 4120.666
## modelo3 14 4120.784
## modelo4 12 4121.604
## modelo5 13 4151.231
#El modelo 2 es aquel que tiene menor AIC
modelo2<-coxph( Surv(TIEMPOSG,SGSTATUS) ~ Down + RTA_PRED + Estirpe+ Edad + TEL+ Blancos + SNC+RTA_PRED +Ploidia + Blastos+CAT_ERM, data=datos)
ggforest(modelo2, data = datos)