Accelerated Failure Time models with flexible errors

Log-twopiece distributions are univariate models with positive support obtained by transforming the class of two-piece distributions. This class of distributions is very flexible, easy to implement, and contains members that can capture different tail behaviours and shapes, producing also a variety of hazard functions. These distributions represent a flexible alternative to the classical choices such as the log-normal, Gamma, and Weibull distributions. Below, we present an application using real data in the contexts of lung cancer survival using Accelerated Failure Time models.

References.

  1. Inference in Two-Piece Location-Scale Models with Jeffreys Priors

  2. Survival and lifetime data analysis with a flexible class of distributions

rm(list=ls())

# Required packages
library(survival)
library(twopiece)
## Loading required package: flexclust
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: mvtnorm
## Loading required package: label.switching
# Lung Cancer data
data(lung)
str(lung)
## 'data.frame':    228 obs. of  10 variables:
##  $ inst     : num  3 3 3 5 1 12 7 11 1 7 ...
##  $ time     : num  306 455 1010 210 883 ...
##  $ status   : num  2 2 1 2 2 1 2 2 2 2 ...
##  $ age      : num  74 68 56 57 60 74 68 71 53 61 ...
##  $ sex      : num  1 1 1 1 1 1 2 2 1 1 ...
##  $ ph.ecog  : num  1 0 0 1 0 1 2 2 1 2 ...
##  $ ph.karno : num  90 90 90 90 100 50 70 60 70 70 ...
##  $ pat.karno: num  100 90 90 60 90 80 60 80 80 70 ...
##  $ meal.cal : num  1175 1225 NA 1150 NA ...
##  $ wt.loss  : num  NA 15 15 11 0 0 10 1 16 34 ...
vnames = c("age","sex","ph.ecog")

# Remove a missing observation
X = cbind(1,lung$age,lung$sex,lung$ph.ecog)[-14,]
logY = log(lung$time)[-14]
n = length(logY)

status = (1-(lung$status-1))[-14]

#######################################################################################################################################
# Weibull, lognormal and log-logistic fit using survreg
#######################################################################################################################################

summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="weibull") )
## 
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer, 
##     dist = "weibull")
##                Value Std. Error     z        p
## (Intercept)  6.27344    0.45358 13.83 1.66e-43
## age         -0.00748    0.00676 -1.11 2.69e-01
## sex          0.40109    0.12373  3.24 1.19e-03
## ph.ecog     -0.33964    0.08348 -4.07 4.73e-05
## Log(scale)  -0.31319    0.06135 -5.11 3.30e-07
## 
## Scale= 0.731 
## 
## Weibull distribution
## Loglik(model)= -1132.4   Loglik(intercept only)= -1147.4
##  Chisq= 29.98 on 3 degrees of freedom, p= 1.4e-06 
## Number of Newton-Raphson Iterations: 5 
## n=227 (1 observation deleted due to missingness)
summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="lognormal") )
## 
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer, 
##     dist = "lognormal")
##               Value Std. Error      z        p
## (Intercept)  6.4948    0.58276 11.145 7.58e-29
## age         -0.0192    0.00833 -2.303 2.13e-02
## sex          0.5220    0.15278  3.416 6.34e-04
## ph.ecog     -0.3556    0.10331 -3.442 5.78e-04
## Log(scale)   0.0282    0.05596  0.505 6.14e-01
## 
## Scale= 1.03 
## 
## Log Normal distribution
## Loglik(model)= -1146.9   Loglik(intercept only)= -1163.2
##  Chisq= 32.59 on 3 degrees of freedom, p= 3.9e-07 
## Number of Newton-Raphson Iterations: 3 
## n=227 (1 observation deleted due to missingness)
summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="loglogistic") )
## 
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer, 
##     dist = "loglogistic")
##                Value Std. Error     z        p
## (Intercept)  5.93669    0.51207 11.59 4.45e-31
## age         -0.00808    0.00748 -1.08 2.80e-01
## sex          0.48662    0.13489  3.61 3.09e-04
## ph.ecog     -0.40462    0.09301 -4.35 1.36e-05
## Log(scale)  -0.62336    0.06581 -9.47 2.76e-21
## 
## Scale= 0.536 
## 
## Log logistic distribution
## Loglik(model)= -1137.5   Loglik(intercept only)= -1154.5
##  Chisq= 34.12 on 3 degrees of freedom, p= 1.9e-07 
## Number of Newton-Raphson Iterations: 4 
## n=227 (1 observation deleted due to missingness)
#######################################################################################################################################
# Log Normal fit
#######################################################################################################################################

lln <- function(par){
if(par[5]>0){
var1 = log(dnorm(logY - X%*%par[1:4],0,par[5])^(1-status))
var2 = log( (1 - pnorm(logY - X%*%par[1:4],0,par[5]))^(status)  )
return(-sum(var1+var2))
}
return(Inf)
} 

OPT4 = optim(c(6,0,0,0,1),lln,control=list(maxit=10000))
AIC4 = 2*OPT4$value + 2*5

# MLE
OPT4$par
## [1]  6.49404503 -0.01916748  0.52190473 -0.35570106  1.02868869
#######################################################################################################################################
# Log two-piece normal fit
#######################################################################################################################################

lltpn <- function(par){
if(par[5]>0 & par[6]>-1 & par[6]<1){
var1 = log(dtp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=dnorm)^(1-status) )
var2 = log( (1 - ptp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=pnorm,log.p=F) )^(status) )
return(-sum(var1+var2))
}
return(Inf)
} 

OPT2 = optim(c(OPT4$par,0.3),lltpn,control=list(maxit=10000))
AIC2 = 2*OPT2$value + 2*6

# MLE
OPT2$par
## [1]  6.95056710 -0.01490558  0.42596622 -0.31212867  0.88355859  0.50511774
#######################################################################################################################################
# Log Logistic fit
#######################################################################################################################################

lll <- function(par){
if(par[5]>0){
var1 = log(dlogis(logY - X%*%par[1:4],0,par[5])^(1-status))
var2 = log( (1 - plogis(logY - X%*%par[1:4],0,par[5]))^(status)  )
return(-sum(var1+var2))
}
return(Inf)
} 

OPT5 = optim(c(6,0,0,0,0.5),lll,control=list(maxit=10000))
AIC5 = 2*OPT5$value + 2*5

# MLE
OPT5$par
## [1]  5.950092553 -0.008279673  0.485730043 -0.404283302  0.536088726
#######################################################################################################################################
# Log two-piece logistic fit 
#######################################################################################################################################

lltpl <- function(par){
if(par[5]>0 & par[6]>-1 & par[6]<1){
var1 = log(dtp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=dlogis)^(1-status)) 
var2 = log( (1 - ptp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=plogis,log.p=F))^(status)  )
return(-sum(var1+var2))
}
return(Inf)
} 

OPT6 = optim(c(OPT5$par,0.2),lltpl,control=list(maxit=10000))
AIC6 = 2*OPT6$value + 2*6

# MLE
OPT6$par
## [1]  6.5538686 -0.0100372  0.4242944 -0.3541706  0.4847812  0.4083184
# Comparison using AIC

c(AIC2,AIC4,AIC5,AIC6)
## [1] 545.9405 563.8323 545.0486 536.0556