Code for section 3.1:
# First thing first, we define the packages are going to be used in this survival analysis.
rm(list=ls())
library(survival)
library(flexsurv)
library(eha)
##
## Attaching package: 'eha'
## The following objects are masked from 'package:flexsurv':
##
## dgompertz, dllogis, hgompertz, Hgompertz, hllogis, Hllogis, hlnorm,
## Hlnorm, hweibull, Hweibull, pgompertz, pllogis, qgompertz, qllogis,
## rgompertz, rllogis
# Then we show our selected data set.
data(kidney)
head(kidney)
## id time status age sex disease frail
## 1 1 8 1 28 1 Other 2.3
## 2 1 16 1 28 1 Other 2.3
## 3 2 23 1 48 2 GN 1.9
## 4 2 13 0 48 2 GN 1.9
## 5 3 22 1 32 1 Other 1.2
## 6 3 28 1 32 1 Other 1.2
# Now we fit survival models with no covariates, this corresponds to the models in section 3.1 of the paper.
fit.weibull <- flexsurvreg(formula = Surv(time, status) ~ 1, data = kidney, dist = "weibull")
fit.ggama <- flexsurvreg(formula = Surv(time, status) ~ 1, data = kidney, dist = "gengamma")
fit.lnorm <- flexsurvreg(formula = Surv(time, status) ~ 1, data = kidney, dist = "lognormal")
# Showing the graphs of them, the graphs are included in the paper.
plot.new()
plot.window(xlim = c(0,600), ylim = c(0,1))
axis(1, cex.axis = 1.5); axis(2, cex.axis = 1.5); box(); title(ylab="Survival", xlab="Time", cex.lab = 1.5)
lines(fit.weibull, col="red", lwd.ci=0, lty.ci=0)
lines(fit.lnorm, col="blue", lwd.ci=0, lty.ci=0)
lines(fit.ggama, col="green", lwd.ci=0, lty.ci=0)
legend("topright", legend = c("weibull", "lnorm", "gengamma"),
lty = 1, col = c("red","blue","green"))
plot.new()
plot.window(xlim = c(0,600), ylim = c(0,0.015))
axis(1, cex.axis = 1.5); axis(2, cex.axis = 1.5); box(); title(ylab="Hazard", xlab="Time", cex.lab = 1.5)
lines(fit.weibull, col="red", lwd.ci=0, lty.ci=0, type = "hazard")
lines(fit.lnorm, col="blue", lwd.ci=0, lty.ci=0, type = "hazard")
lines(fit.ggama, col="green", lwd.ci=0, lty.ci=0, type = "hazard")
legend("topright", legend = c("weibull", "lnorm", "gengamma"),
lty = 1, col = c("red","blue","green"))
# We compare their AICs
AIC(fit.weibull)
## [1] 685.8749
AIC(fit.lnorm)
## [1] 684.0963
AIC(fit.ggama)
## [1] 685.5893
# This corresponds to figure 10 in the paper.
plot(fit.weibull, col="red", lwd.ci=0, lty.ci=0, ylab="Survival", xlab="Time", cex.lab = 1.5,cex.axis = 1.5)
lines(fit.lnorm, col="blue", lwd.ci=0, lty.ci=0)
lines(fit.ggama, col="green", lwd.ci=0, lty.ci=0)
legend("topright", legend = c("weibull", "lnorm", "gengamma", "KM"),
lty = 1, col = c("red","blue","green","black"))
Code for section 3.2 and 3.3:
# Now we turn to full regression models.
# Following models are used in section 3.2.
# We start with the log-normal distribution with two hazard structures.
fit.aft <- aftreg(formula = Surv(time, status) ~ age + sex, data = kidney,
dist = "lognormal")
fit.ph <- phreg(formula = Surv(time, status) ~ age + sex , data = kidney,
dist = "lognormal")
# Then we define the how to calculate AICs for different hazard structure as this is not inlcuded in the packages.
AIC.aft <- -2*(fit.aft$loglik[2]) + 2*length(fit.aft$coefficients)
AIC.ph <- -2*(fit.ph$loglik[2]) + 2*length(fit.ph$coefficients)
# Again, showing their AICs and plots, AICs are used to do model selection in section 3.3.
AIC.aft
## [1] 671.9596
AIC.ph
## [1] 679.9417
plot(fit.aft)
plot(fit.ph)
# Next we do the samething using the Weibull distribution.
# note here there are models with three covariates and this is explained in section 3.3.
fit.aftwbn <- aftreg(formula = Surv(time, status) ~ age + sex + factor(disease), data = kidney,
dist = "weibull")
fit.aftwb <- aftreg(formula = Surv(time, status) ~ age + sex, data = kidney,
dist = "weibull")
fit.phwbn <- phreg(formula = Surv(time, status) ~ age + sex + factor(disease), data = kidney,
dist = "weibull")
fit.phwb <- phreg(formula = Surv(time, status) ~ age + sex, data = kidney,
dist = "weibull")
AIC.aftwb <- -2*(fit.aftwb$loglik[2]) + 2*length(fit.aftwb$coefficients)
AIC.aftwbn <- -2*(fit.aftwbn$loglik[2]) + 2*length(fit.aftwbn$coefficients)
AIC.phwb <- -2*(fit.phwb$loglik[2]) + 2*length(fit.phwb$coefficients)
AIC.phwbn <- -2*(fit.phwbn$loglik[2]) + 2*length(fit.phwbn$coefficients)
AIC.aftwb
## [1] 681.1083
AIC.aftwbn
## [1] 674.7269
AIC.phwb
## [1] 681.1083
AIC.phwbn
## [1] 674.7269
plot(fit.aftwb)
plot(fit.phwb)
Code for reparameterization to be used for prediction:
# Because the prediction function defined and used in section 4 uses parametrisation of mu,
# while the parametrisations of models in previous regression section uses median,
# we need to calculate the parameter mu of the model ourselves.
# This section of codes is not explained in the paper as it is pure coding,
# but it is crucial step through the development of this paper.
# Lognormal Hazard, as at this point we have selected it as our model.
hlnorm <- function(t,mu,sigma, log = FALSE){
lpdf0 <- dlnorm(t,mu,sigma, log = T)
ls0 <- plnorm(t,mu,sigma, lower.tail = FALSE, log.p = T)
val <- lpdf0 - ls0
if(log) return(val) else return(exp(val))
}
# Lognormal Cumulative hazard
chlnorm <- function(t,mu,sigma){
H0 <- -plnorm(t,mu,sigma, lower.tail = FALSE, log.p = TRUE)
return(H0)
}
x <- as.matrix(cbind(kidney$age,kidney$sex))
p <- ncol(x)
status <- as.logical(kidney$status)
times <- as.vector(kidney$time)
times.obs <- times[status]
log.lik <- function(par){
ae0 <- par[1]; be0 <- exp(par[2]); beta <- par[3:(2+p)];
exp.x.beta <- as.vector(exp(x%*%beta))
exp.x.beta.obs <- exp.x.beta[status]
haz0 <- hlnorm(times.obs*exp.x.beta.obs,ae0,be0)*exp.x.beta.obs
val <- - sum( log(haz0)) + sum(chlnorm(times*exp.x.beta,ae0,be0))
return(val)
}
OPT <- optim(c(0,0,0,0),log.lik,control=list(maxit=1000))
OPT
## $par
## [1] 2.067646568 0.169612686 0.005256262 -1.375966481
##
## $value
## [1] 331.9798
##
## $counts
## function gradient
## 257 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
Code for section 4, prediction:
# After finishing calculating the parameter, we can start our predictions, this corresponds to section 4.
# We start with the distribution.
chlnorm <- function(t, mu, sigma){-plnorm(t, mu, sigma, lower.tail = FALSE, log.p = TRUE)}
# t : time
# ti : censored survival time
# beta : regression parameters
# theta: parameters of the baseline hazard
# We import the estimated parameters of our regression model.
x <- as.matrix(cbind(kidney$age,kidney$sex))
beta <- c(0.005256262,-1.375966481)
aft.theta <- c(2.067646568, exp(0.169594952))
#We define the function of prediction using the formula in the paper.
pred.surv <- function(t,i,beta,theta){
x.beta <- as.numeric(x[i,]%*%beta)
ti <- kidney$time[i]
out <- ((exp(-chlnorm(ti*exp(x.beta), theta[1],theta[2]))) - exp(-chlnorm(t*exp(x.beta), theta[1],theta[2])))/(exp(-chlnorm(ti*exp(x.beta), theta[1],theta[2])))
return(1 - out)
}
# Next we sub in the three patients we selected and show their remaining life predictions.
G.fit1 <- Vectorize(function(t) {pred.surv(t, 9, beta, aft.theta)})
G.fit2 <- Vectorize(function(t) {pred.surv(t, 53, beta, aft.theta)})
G.fit3 <- Vectorize(function(t) {pred.surv(t, 17, beta, aft.theta)})
curve(G.fit1,29,100,ylim=c(0,1),lwd=2,xlab="Time",ylab="Probability",main="Remaining Life estimation of patient id 5",
sub="10-year-old man", col.lab="blue", col.sub="black")
curve(G.fit2,131,400,ylim=c(0,1),lwd=2,xlab="Time",ylab="Probability",main="Remaining Life estimation of patient id 27",
sub="10-year-old woman", col.lab="blue", col.sub="black")
curve(G.fit3,52,225,ylim=c(0,1),lwd=2,xlab="Time",ylab="Probability",main="Remaining Life estimation of patient id 9",
sub="69-year-old woman", col.lab="blue", col.sub="black")
# Here we show an example of calculating the median remaining life of a patient,
# others can be obtained by simply amending the numbers.
G.fit2m <- Vectorize(function(x) G.fit2(x)-0.5)
uniroot(G.fit2m, c(200,300))
## $root
## [1] 281.5537
##
## $f.root
## [1] -2.231892e-10
##
## $iter
## [1] 5
##
## $init.it
## [1] NA
##
## $estim.prec
## [1] 6.103516e-05