Log-twopiece distributions are univariate models with positive support obtained by transforming the class of two-piece distributions. This class of distributions is very flexible, easy to implement, and contains members that can capture different tail behaviours and shapes, producing also a variety of hazard functions. These distributions represent a flexible alternative to the classical choices such as the log-normal, Gamma, and Weibull distributions. Below, we present an application using real data in the contexts of lung cancer survival using Accelerated Failure Time models.
References.
Inference in Two-Piece Location-Scale Models with Jeffreys Priors
Survival and lifetime data analysis with a flexible class of distributions
rm(list=ls())
# Required packages
library(survival)
library(twopiece)
## Loading required package: flexclust
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: mvtnorm
## Loading required package: label.switching
# Lung Cancer data
data(lung)
str(lung)
## 'data.frame': 228 obs. of 10 variables:
## $ inst : num 3 3 3 5 1 12 7 11 1 7 ...
## $ time : num 306 455 1010 210 883 ...
## $ status : num 2 2 1 2 2 1 2 2 2 2 ...
## $ age : num 74 68 56 57 60 74 68 71 53 61 ...
## $ sex : num 1 1 1 1 1 1 2 2 1 1 ...
## $ ph.ecog : num 1 0 0 1 0 1 2 2 1 2 ...
## $ ph.karno : num 90 90 90 90 100 50 70 60 70 70 ...
## $ pat.karno: num 100 90 90 60 90 80 60 80 80 70 ...
## $ meal.cal : num 1175 1225 NA 1150 NA ...
## $ wt.loss : num NA 15 15 11 0 0 10 1 16 34 ...
vnames = c("age","sex","ph.ecog")
# Remove a missing observation
X = cbind(1,lung$age,lung$sex,lung$ph.ecog)[-14,]
logY = log(lung$time)[-14]
n = length(logY)
status = (1-(lung$status-1))[-14]
#######################################################################################################################################
# Weibull, lognormal and log-logistic fit using survreg
#######################################################################################################################################
summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="weibull") )
##
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer,
## dist = "weibull")
## Value Std. Error z p
## (Intercept) 6.27344 0.45358 13.83 1.66e-43
## age -0.00748 0.00676 -1.11 2.69e-01
## sex 0.40109 0.12373 3.24 1.19e-03
## ph.ecog -0.33964 0.08348 -4.07 4.73e-05
## Log(scale) -0.31319 0.06135 -5.11 3.30e-07
##
## Scale= 0.731
##
## Weibull distribution
## Loglik(model)= -1132.4 Loglik(intercept only)= -1147.4
## Chisq= 29.98 on 3 degrees of freedom, p= 1.4e-06
## Number of Newton-Raphson Iterations: 5
## n=227 (1 observation deleted due to missingness)
summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="lognormal") )
##
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer,
## dist = "lognormal")
## Value Std. Error z p
## (Intercept) 6.4948 0.58276 11.145 7.58e-29
## age -0.0192 0.00833 -2.303 2.13e-02
## sex 0.5220 0.15278 3.416 6.34e-04
## ph.ecog -0.3556 0.10331 -3.442 5.78e-04
## Log(scale) 0.0282 0.05596 0.505 6.14e-01
##
## Scale= 1.03
##
## Log Normal distribution
## Loglik(model)= -1146.9 Loglik(intercept only)= -1163.2
## Chisq= 32.59 on 3 degrees of freedom, p= 3.9e-07
## Number of Newton-Raphson Iterations: 3
## n=227 (1 observation deleted due to missingness)
summary( survreg(Surv(time, status) ~ age+sex+ph.ecog, data=cancer, dist="loglogistic") )
##
## Call:
## survreg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = cancer,
## dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 5.93669 0.51207 11.59 4.45e-31
## age -0.00808 0.00748 -1.08 2.80e-01
## sex 0.48662 0.13489 3.61 3.09e-04
## ph.ecog -0.40462 0.09301 -4.35 1.36e-05
## Log(scale) -0.62336 0.06581 -9.47 2.76e-21
##
## Scale= 0.536
##
## Log logistic distribution
## Loglik(model)= -1137.5 Loglik(intercept only)= -1154.5
## Chisq= 34.12 on 3 degrees of freedom, p= 1.9e-07
## Number of Newton-Raphson Iterations: 4
## n=227 (1 observation deleted due to missingness)
#######################################################################################################################################
# Log Normal fit
#######################################################################################################################################
lln <- function(par){
if(par[5]>0){
var1 = log(dnorm(logY - X%*%par[1:4],0,par[5])^(1-status))
var2 = log( (1 - pnorm(logY - X%*%par[1:4],0,par[5]))^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
OPT4 = optim(c(6,0,0,0,1),lln,control=list(maxit=10000))
AIC4 = 2*OPT4$value + 2*5
# MLE
OPT4$par
## [1] 6.49404503 -0.01916748 0.52190473 -0.35570106 1.02868869
#######################################################################################################################################
# Log two-piece normal fit
#######################################################################################################################################
lltpn <- function(par){
if(par[5]>0 & par[6]>-1 & par[6]<1){
var1 = log(dtp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=dnorm)^(1-status) )
var2 = log( (1 - ptp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=pnorm,log.p=F) )^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
OPT2 = optim(c(OPT4$par,0.3),lltpn,control=list(maxit=10000))
AIC2 = 2*OPT2$value + 2*6
# MLE
OPT2$par
## [1] 6.95056710 -0.01490558 0.42596622 -0.31212867 0.88355859 0.50511774
#######################################################################################################################################
# Log Logistic fit
#######################################################################################################################################
lll <- function(par){
if(par[5]>0){
var1 = log(dlogis(logY - X%*%par[1:4],0,par[5])^(1-status))
var2 = log( (1 - plogis(logY - X%*%par[1:4],0,par[5]))^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
OPT5 = optim(c(6,0,0,0,0.5),lll,control=list(maxit=10000))
AIC5 = 2*OPT5$value + 2*5
# MLE
OPT5$par
## [1] 5.950092553 -0.008279673 0.485730043 -0.404283302 0.536088726
#######################################################################################################################################
# Log two-piece logistic fit
#######################################################################################################################################
lltpl <- function(par){
if(par[5]>0 & par[6]>-1 & par[6]<1){
var1 = log(dtp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=dlogis)^(1-status))
var2 = log( (1 - ptp3(logY - X%*%par[1:4],0,par[5],par[6],param="eps",FUN=plogis,log.p=F))^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
OPT6 = optim(c(OPT5$par,0.2),lltpl,control=list(maxit=10000))
AIC6 = 2*OPT6$value + 2*6
# MLE
OPT6$par
## [1] 6.5538686 -0.0100372 0.4242944 -0.3541706 0.4847812 0.4083184
# Comparison using AIC
c(AIC2,AIC4,AIC5,AIC6)
## [1] 545.9405 563.8323 545.0486 536.0556