Com pacotes extras para fazer gráfico e o pacote survival para sobrevivência
require(rjags)
require(coda)
require(DT)
require(tidyverse)
require(car)
#install.packages("MCMCpack")
require(MCMCpack)
#install.packages("bayesplot")
require(bayesplot)
#install.packages("ggrepel")
require(grid)
require(survival)
#para fazer o ggsurvplot: gráficos de sobrevivência
#install.packages("survminer")
require(survminer)
dados=read.table("http://www.mas.ncl.ac.uk/~nmf16/teaching/mas8391/renal.txt",header=TRUE)
datatable(dados, cap="Tempos de vida de pacientes após transplante renal",options = list(pageLength = 5))
fit = survfit(Surv(t, status) ~ 1, data = dados,type="kaplan-meier")
ggsurvplot(fit, censor.shape="|", censor.size = 4,main="KM")
fit2 = survfit(Surv(t, status) ~ x, data = dados,type="kaplan-meier")
ggsurvplot(fit2, censor.shape="|", censor.size = 4,main="KM por grupos")
Modelo paramétrico - distribuição exponencial para os tempos de vida
fit3=list()
lambda=c()
Meantime=c()
i=0
while(i<=4){
fit3[[i+1]]=survreg(Surv(t, status)~1, dist="exponential",data=dados[dados$x==i,])
lambda[i+1]=1/exp(fit3[[i+1]]$coefficients[1])
Meantime[i+1]=1/lambda[i+1]
i=i+1
}
lambda
## [1] 0.003715847 0.008820736 0.010601025 0.050626503 0.202347228
Meantime
## [1] 269.1177 113.3692 94.3305 19.7525 4.9420
fit3
## [[1]]
## Call:
## survreg(formula = Surv(t, status) ~ 1, data = dados[dados$x ==
## i, ], dist = "exponential")
##
## Coefficients:
## (Intercept)
## 5.595149
##
## Scale fixed at 1
##
## Loglik(model)= -19.8 Loglik(intercept only)= -19.8
## n= 39
##
## [[2]]
## Call:
## survreg(formula = Surv(t, status) ~ 1, data = dados[dados$x ==
## i, ], dist = "exponential")
##
## Coefficients:
## (Intercept)
## 4.73065
##
## Scale fixed at 1
##
## Loglik(model)= -51.6 Loglik(intercept only)= -51.6
## n= 54
##
## [[3]]
## Call:
## survreg(formula = Surv(t, status) ~ 1, data = dados[dados$x ==
## i, ], dist = "exponential")
##
## Coefficients:
## (Intercept)
## 4.546805
##
## Scale fixed at 1
##
## Loglik(model)= -33.3 Loglik(intercept only)= -33.3
## n= 32
##
## [[4]]
## Call:
## survreg(formula = Surv(t, status) ~ 1, data = dados[dados$x ==
## i, ], dist = "exponential")
##
## Coefficients:
## (Intercept)
## 2.98328
##
## Scale fixed at 1
##
## Loglik(model)= -31.9 Loglik(intercept only)= -31.9
## n= 16
##
## [[5]]
## Call:
## survreg(formula = Surv(t, status) ~ 1, data = dados[dados$x ==
## i, ], dist = "exponential")
##
## Coefficients:
## (Intercept)
## 1.59777
##
## Scale fixed at 1
##
## Loglik(model)= -13 Loglik(intercept only)= -13
## n= 7
Criando a função de sobrevivência - valores preditos pelo modelo
tempos=seq(0,50,1)
S_t=matrix(nrow=length(tempos),ncol=length(lambda))
S_t[,1]=exp(-lambda[1]*tempos)
S_t[,2]=exp(-lambda[2]*tempos)
S_t[,3]=exp(-lambda[3]*tempos)
S_t[,4]=exp(-lambda[4]*tempos)
S_t[,5]=exp(-lambda[5]*tempos)
S_t=data.frame(cbind(tempos,S_t))
names(S_t)=c("tempos","x_0","x_1","x_2","x_3","x_4")
datatable(S_t,options = list(pageLength = 5),cap="Função de sobrevivência para vários valores de x")
Representação Gráfica
colors <- c("x=0"="blue", "x=1"="red", "x=2"="green" ,"x=3" = "orange", "x=4" = "pink")
ggplot(S_t,aes(x=tempos,y=x_0,color="x=0"))+
geom_line() +
geom_line(aes(x=tempos,y=x_1,color="x=1")) +
geom_line(aes(x=tempos,y=x_2,color="x=2"))+
geom_line(aes(x=tempos,y=x_3,color="x=3"))+
geom_line(aes(x=tempos,y=x_4,color="x=4"))+
labs(x="t (em meses)",
y="S(t)",
title="Gráfico da função de sobrevivência - modelo exponencial",
color = "Número de Incompatibilidades (x)") +
scale_color_manual(labels=c(expression(x==0),expression(x==1),expression(x==2),expression(x==3),expression(x==4)),values = colors)
teste = ggsurvplot(fit2)
teste$plot +
geom_line(S_t,mapping=aes(x=tempos,y=x_0,color="x=0"))+
geom_line(S_t,mapping=aes(x=tempos,y=x_1,color="x=1"))+
geom_line(S_t,mapping=aes(x=tempos,y=x_2,color="x=2"))+
geom_line(S_t,mapping=aes(x=tempos,y=x_3,color="x=3"))+
geom_line(S_t,mapping=aes(x=tempos,y=x_4,color="x=4"))
Entrada dos dados no RJAGS
t=dados$t
is.na(t)=dados$status==0
is.censored=1-dados$status
t.cen=dados$t+dados$status
dados_jags=list(n=nrow(dados),t=dados$t,is.censored=is.censored,t.cen=t.cen,x=dados$x)
dados_jags=list(n=nrow(dados),t=dados$t,is.censored=is.censored,t.cen=t.cen,x=dados$x,new.n=50,new.t=rep(seq(1,50,5),5),new.x=c(rep(0,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10)))
dados_jags
## $n
## [1] 148
##
## $t
## [1] 0.035 0.068 0.100 0.101 0.167 0.168 0.197 0.213 0.233 0.234
## [11] 0.508 0.508 0.533 0.633 0.767 0.768 0.770 1.066 1.267 1.300
## [21] 1.600 1.639 1.803 1.867 2.180 2.667 2.967 3.328 3.393 3.700
## [31] 3.803 4.311 4.867 5.180 6.233 6.367 6.600 6.600 7.180 7.667
## [41] 7.733 7.800 7.933 7.967 8.016 8.300 8.410 8.607 8.667 8.800
## [51] 9.100 9.233 10.541 10.607 10.633 10.667 10.869 11.067 11.180 11.443
## [61] 12.213 12.508 12.533 13.467 13.800 14.267 14.475 14.500 15.213 15.333
## [71] 15.525 15.533 15.541 15.934 16.200 16.300 16.344 16.600 16.700 16.933
## [81] 17.033 17.067 17.475 17.667 17.700 17.967 18.115 18.115 18.933 18.934
## [91] 19.508 19.574 19.733 20.148 20.180 20.900 21.167 21.233 21.600 22.100
## [101] 22.148 22.180 22.180 22.267 22.300 22.500 22.533 22.867 23.738 24.082
## [111] 24.180 24.705 25.705 25.213 29.705 30.443 31.667 31.934 32.180 32.367
## [121] 32.672 32.705 33.148 33.567 33.770 33.869 34.836 34.869 34.934 35.738
## [131] 36.180 36.213 39.410 39.433 39.672 40.001 41.733 41.734 42.311 42.869
## [141] 43.180 43.279 43.902 44.267 44.475 44.900 45.148 46.451
##
## $is.censored
## [1] 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1
## [38] 1 0 1 0 1 1 1 0 0 1 1 0 1 1 0 1 1 1 0 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## $t.cen
## [1] 1.035 1.068 1.100 1.101 0.167 1.168 0.197 1.213 0.233 1.234
## [11] 1.508 0.508 1.533 0.633 1.767 1.768 0.770 2.066 1.267 2.300
## [21] 2.600 1.639 1.803 2.867 3.180 3.667 2.967 3.328 4.393 4.700
## [31] 3.803 4.311 4.867 6.180 6.233 6.367 6.600 6.600 8.180 7.667
## [41] 8.733 7.800 7.933 7.967 9.016 9.300 8.410 8.607 9.667 8.800
## [51] 9.100 10.233 10.541 10.607 10.633 11.667 10.869 12.067 11.180 11.443
## [61] 13.213 13.508 12.533 13.467 13.800 14.267 14.475 14.500 15.213 15.333
## [71] 15.525 15.533 15.541 15.934 16.200 16.300 16.344 16.600 16.700 16.933
## [81] 17.033 17.067 17.475 17.667 17.700 17.967 18.115 18.115 18.933 18.934
## [91] 19.508 19.574 19.733 20.148 20.180 21.900 21.167 21.233 21.600 22.100
## [101] 22.148 22.180 22.180 22.267 22.300 22.500 22.533 22.867 23.738 24.082
## [111] 24.180 24.705 25.705 25.213 29.705 30.443 31.667 31.934 32.180 32.367
## [121] 32.672 32.705 33.148 33.567 33.770 33.869 34.836 34.869 34.934 35.738
## [131] 36.180 36.213 39.410 39.433 39.672 40.001 41.733 41.734 42.311 42.869
## [141] 43.180 43.279 43.902 44.267 44.475 44.900 45.148 46.451
##
## $x
## [1] 3 0 0 1 4 2 1 1 1 2 0 2 3 0 3 4 0 4 2 3 1 2 2 4 3 4 1 2 3 4 3 1 0 1 2 2 1
## [38] 0 3 1 1 2 1 1 2 1 0 1 1 1 0 1 2 3 1 2 3 2 0 0 1 3 2 0 2 0 4 1 1 0 1 2 1 0
## [75] 1 0 1 0 1 3 3 0 1 1 1 1 2 1 0 1 2 3 0 2 0 2 0 0 3 1 2 0 0 0 2 1 1 1 1 1 0
## [112] 0 2 1 3 1 0 2 1 0 1 2 1 1 1 2 0 1 2 0 1 1 1 0 0 0 2 0 2 0 0 1 2 2 1 1 1 0
##
## $new.n
## [1] 50
##
## $new.t
## [1] 1 6 11 16 21 26 31 36 41 46 1 6 11 16 21 26 31 36 41 46 1 6 11 16 21
## [26] 26 31 36 41 46 1 6 11 16 21 26 31 36 41 46 1 6 11 16 21 26 31 36 41 46
##
## $new.x
## [1] 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
## [39] 3 3 4 4 4 4 4 4 4 4 4 4
Valors iniciais - Não preciso por valores iniciais - o algoritmo gera aleatoriamente, se necessário! - Também pode-se utilizar os valores iniciais sugeridos no material, mas não foi utilizado.
#tinits1<-dados$t+5
#is.na(tinits1)<-dados$status==1
#tinits2<-tinits1+5
#renalinits<-list(list(t=tinits1),list(t=tinits2))
Sintaxe do modelo
cat("
model
{
# priors
for(j in 1:5)
{
# prior lambda for each group
lambda[j] ~ dgamma(0.001, 0.001)
mu[j] <- 1/lambda[j] # mean time to death
}
# likelihood
for(i in 1:n)
{
is.censored[i] ~ dinterval(t[i], t.cen[i])
t[i] ~ dexp(lambda[x[i]+1])
}
# predictions
for(i in 1:new.n)
{
S[i] = exp(-lambda[new.x[i]+1]*new.t[i])
}
}
", file="modelo6.txt")
Reconhece o modelo no arquivo .txt
modelo<-jags.model("modelo6.txt",data=dados_jags,n.chains=2)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 296
## Unobserved stochastic nodes: 5
## Total graph size: 811
##
## Initializing model
Começa a rodar o modelo (update)
update(modelo,5000)
Coleta as amostras desejadas:
params=c("lambda","mu")
samples=coda.samples(modelo,params,n.iter=50000,thin=5)
Traços e densidades a posteriori
mcmc_trace(samples)
mcmc_dens(samples)
Diagnóstico de Gelman Rubin
gelman.diag(samples, confidence = 0.95, transform=FALSE)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## lambda[1] 1 1
## lambda[2] 1 1
## lambda[3] 1 1
## lambda[4] 1 1
## lambda[5] 1 1
## mu[1] 1 1
## mu[2] 1 1
## mu[3] 1 1
## mu[4] 1 1
## mu[5] 1 1
##
## Multivariate psrf
##
## 1
gelman.plot(samples)
Gráficos de autocorrelação
autocorr.plot(samples[[1]],col=1)
autocorr.plot(samples[[2]],col=2)
Sumários a posteriori
summary(samples)
##
## Iterations = 5005:55000
## Thinning interval = 5
## Number of chains = 2
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## lambda[1] 0.04833 0.007716 5.456e-05 5.477e-05
## lambda[2] 0.05289 0.007259 5.133e-05 5.228e-05
## lambda[3] 0.05669 0.010002 7.073e-05 7.165e-05
## lambda[4] 0.10145 0.025344 1.792e-04 1.792e-04
## lambda[5] 0.28244 0.105755 7.478e-04 7.622e-04
## mu[1] 21.23256 3.490902 2.468e-02 2.468e-02
## mu[2] 19.26769 2.689187 1.902e-02 1.924e-02
## mu[3] 18.20777 3.319402 2.347e-02 2.381e-02
## mu[4] 10.50930 2.786204 1.970e-02 1.970e-02
## mu[5] 4.11753 1.838581 1.300e-02 1.307e-02
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## lambda[1] 0.03442 0.04291 0.04790 0.05328 0.06450
## lambda[2] 0.03971 0.04785 0.05255 0.05751 0.06800
## lambda[3] 0.03862 0.04963 0.05608 0.06313 0.07786
## lambda[4] 0.05808 0.08339 0.09934 0.11707 0.15625
## lambda[5] 0.11552 0.20613 0.26946 0.34406 0.52371
## mu[1] 15.50286 18.76950 20.87750 23.30474 29.04913
## mu[2] 14.70555 17.38961 19.03087 20.89972 25.18316
## mu[3] 12.84338 15.83995 17.83113 20.14951 25.89422
## mu[4] 6.39999 8.54176 10.06610 11.99135 17.21878
## mu[5] 1.90946 2.90643 3.71106 4.85121 8.65639
Pedindo novas quantidades de interesse
params_novo=c("S")
samples_novo=coda.samples(modelo,params_novo,n.iter=50000,thin=5)
S=summary(samples_novo)
mean_S=S[["statistics"]][,1]
Representação Gráfica:
S_t_Baye=matrix(mean_S,nrow=10,ncol=5)
t=seq(1,50,5)
S_t_Baye=data.frame(cbind(t,S_t_Baye))
names(S_t_Baye)=c("tempos","x_0","x_1","x_2","x_3","x_4")
datatable(S_t_Baye)
colors <- c("x=0"="blue", "x=1"="red", "x=2"="green" ,"x=3" = "orange", "x=4" = "pink")
ggplot(S_t_Baye,aes(x=tempos,y=x_0,color="x=0"))+
geom_line() +
geom_line(aes(x=tempos,y=x_1,color="x=1")) +
geom_line(aes(x=tempos,y=x_2,color="x=2"))+
geom_line(aes(x=tempos,y=x_3,color="x=3"))+
geom_line(aes(x=tempos,y=x_4,color="x=4"))+
labs(x="t (em meses)",
y="S(t)",
title="Gráfico da função de sobrevivência - modelo exponencial",
color = "Número de Incompatibilidades (x)") +
scale_color_manual(labels=c(expression(x==0),expression(x==1),expression(x==2),expression(x==3),expression(x==4)),values = colors)
Considerar: