Os dados da tabela abaixo referem-se aos tempos de sobrevivência (em
dias) de pacientes com câncer submetidos à radioterapia (o símbolo -
indica censura).
Obtenha: O ajuste dos modelos exponencial e Weibull para o conjunto de
dados de radioterapia, compare os modelos pelas estatísticas AIC, BIC e
CAIC e pelo teste TRV. Compare o ajuste dos modelos com o
Kaplan-Meier.
dados_t=c(7,34,42,63,64,-74,83,84,91,108,112,129,133,133,139,140,
140,146,149,154,160,160,165,173,176,-185,218,225,241,
248,273,277,-279,297,-319,405,417,420,440,-523,
583,594,1101,-1116,1146,-1226,-1349,-1412,1417)
cens = ifelse(dados_t<= 0,0,1)
tempos = abs(dados_t)
library(survival)
ekm=survfit(Surv(tempos,cens)~1)
summary(ekm)
## Call: survfit(formula = Surv(tempos, cens) ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 7 49 1 0.980 0.0202 0.9408 1.000
## 34 48 1 0.959 0.0283 0.9054 1.000
## 42 47 1 0.939 0.0342 0.8740 1.000
## 63 46 1 0.918 0.0391 0.8448 0.998
## 64 45 1 0.898 0.0432 0.8171 0.987
## 83 43 1 0.877 0.0470 0.7896 0.974
## 84 42 1 0.856 0.0503 0.7630 0.961
## 91 41 1 0.835 0.0532 0.7372 0.946
## 108 40 1 0.814 0.0559 0.7120 0.932
## 112 39 1 0.794 0.0582 0.6873 0.916
## 129 38 1 0.773 0.0603 0.6631 0.900
## 133 37 2 0.731 0.0639 0.6159 0.867
## 139 35 1 0.710 0.0654 0.5928 0.850
## 140 34 2 0.668 0.0679 0.5476 0.815
## 146 32 1 0.647 0.0689 0.5255 0.797
## 149 31 1 0.626 0.0698 0.5037 0.779
## 154 30 1 0.606 0.0705 0.4821 0.761
## 160 29 2 0.564 0.0715 0.4397 0.723
## 165 27 1 0.543 0.0719 0.4189 0.704
## 173 26 1 0.522 0.0721 0.3983 0.684
## 176 25 1 0.501 0.0722 0.3780 0.665
## 218 23 1 0.479 0.0722 0.3568 0.644
## 225 22 1 0.458 0.0722 0.3359 0.623
## 241 21 1 0.436 0.0719 0.3153 0.602
## 248 20 1 0.414 0.0716 0.2950 0.581
## 273 19 1 0.392 0.0710 0.2750 0.559
## 277 18 1 0.370 0.0704 0.2553 0.538
## 297 16 1 0.347 0.0697 0.2344 0.515
## 405 14 1 0.322 0.0690 0.2121 0.490
## 417 13 1 0.298 0.0680 0.1903 0.466
## 420 12 1 0.273 0.0667 0.1690 0.441
## 440 11 1 0.248 0.0651 0.1483 0.415
## 583 9 1 0.221 0.0634 0.1255 0.387
## 594 8 1 0.193 0.0612 0.1036 0.359
## 1101 7 1 0.165 0.0583 0.0828 0.330
## 1146 5 1 0.132 0.0552 0.0584 0.300
## 1417 1 1 0.000 NaN NA NA
library(ggplot2)
ggplot(,aes(x=ekm$time,y=ekm$surv))+
geom_step()+
labs(x = "Tempo", y = "Sobrevivência")+
theme(panel.grid = element_blank())
criterios=function(value,d,n){
#value - valor do optim
#d - numero de parametros
#n - numero da dados
l=2*value
AIC=l+2*d
BIC=l+d*log(n)
CAIC=AIC+(2*(d+2)*(d+3))/(n-d-3)
resul=cbind(l,AIC,CAIC,BIC)
return(resul)
}
IC=function(parametros,hessiana,n){
Var=solve(hessiana)
EP=diag(sqrt(Var))
tvalue=parametros/EP
LI=parametros-qt(0.975,n)*EP
LS=parametros+qt(0.975,n)*EP
valorp=pt(tvalue,n,lower.tail = FALSE)
resul=cbind(parametros,EP,tvalue,valorp,LI,LS)
return(resul)
}
t = tempos
censur = cens
verosi<-function(param){
alfa<-param[1]
if(any(alfa < 1e-20)) return(.Machine$double.xmax^.5)
lv<-censur*(log(1/alfa)-(t/alfa))+(1-censur)*(-(t/alfa))
sum(-lv)
}
valorin<-c(10)
estima<-optim(valorin,verosi,method="BFGS",hessian=T);
estima
## $par
## [1] 432.9823
##
## $value
## [1] 283.4069
##
## $counts
## function gradient
## 11 10
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## [,1]
## [1,] 0.0002195435
n=49
d1 = criterios(estima$value,1,n)
parametros=estima$par
hessiana=estima$hessian
IC(parametros,hessiana,n)
## parametros EP tvalue valorp LI LS
## [1,] 432.9823 67.49004 6.415498 2.669135e-08 297.3559 568.6086
verosi<-function(param){
alfa<-param[1]
tau<-param[2]
if(any(alfa < 1e-20)) return(.Machine$double.xmax^.5)
if(any(tau < 1e-20)) return(.Machine$double.xmax^.5)
lv<-censur*(log(tau)-tau*log(alfa)+(tau-1)*log(t)-((t/alfa)^(tau)))
+(1-censur)*(-(t/alfa)^(tau));
sum(-lv)
}
valorin<-c(432.9823,9)
estima1<-optim(valorin,verosi,method="BFGS",hessian=T)
estima1
## $par
## [1] 282.513212 1.070371
##
## $value
## [1] 264.8018
##
## $counts
## function gradient
## 107 63
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## [,1] [,2]
## [1,] 0.0005873488 -0.07329771
## [2,] -0.0732977128 76.52225855
d2 = criterios(estima1$value,2,n)
parametros=estima1$par
hessiana=estima1$hessian
IC(parametros,hessiana,n)
## parametros EP tvalue valorp LI LS
## [1,] 282.513212 43.9740051 6.424550 2.584376e-08 194.1441402 370.882284
## [2,] 1.070371 0.1218289 8.785861 6.161013e-12 0.8255472 1.315196
alfa= 278.883033
tau= 1.0654491
s=exp(-(t/alfa)^(tau))
alfa=432.9822698
s1=exp(-(t/alfa))
plot(ekm,conf.int=F, xlab="t",ylab="S(t)")
lines(t,s,col='blue', lty=1, lwd=2)
lines(t,s1,col='red', lty=1, lwd=2)
legend(1000,1,c('Kaplan-Meier','Weibull','Exponencial'),bty='o',cex=1,
col=c('black','blue','red'), lty=c(1,1,1), lwd=c(2,2,2))
trv= 566.8138-529.6237
trv
## [1] 37.1901
quiquadrado=1-pchisq(37.1901,1)
quiquadrado
## [1] 1.071564e-09
Pelo teste TRV podemos ver que há diferença entre os modelos
df = cbind(d1,d2)
print(df)
## l AIC CAIC BIC l AIC CAIC BIC
## [1,] 566.8138 568.8138 569.3471 570.7056 529.6036 533.6036 534.5127 537.3872
Portanto como os valores de AIC, BIC e CAIC do ajuste Weibull são melhores, podemos considera-lo, como o modelo que melhor se ajusta aos dados