Log-two piece distributions are univariate models with positive support obtained by transforming the class of two-piece distributions. This class of distributions is very flexible, easy to implement, and contains members that can capture different tail behaviours and shapes, producing also a variety of hazard functions. These distributions represent a flexible alternative to the classical choices such as the log-normal, Gamma, and Weibull distributions. Below, we present an application using real data in the contexts of small-cell lung cancer survival using Bayesian Accelerated Failure Time models. Linear regression models with two-piece errors are closely related to quantile regression (for two-piece Laplace errors) and asymmetric least squares (for two-piece normal errors): see Tractable Bayesian variable selection: beyond normality for more details.
References
Flexible objective Bayesian linear regression with applications in survival analysis
Survival and lifetime data analysis with a flexible class of distributions
rm(list=ls())
library(emplik)
## Loading required package: quantreg
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
library(survival)
##
## Attaching package: 'survival'
## The following object is masked from 'package:quantreg':
##
## untangle.specials
library(Rtwalk)
## Package: Rtwalk, Version: 1.8.0
## The R Implementation of 't-walk' MCMC Sampler
## http://www.cimat.mx/~jac/twalk/
## For citations, please use:
## Christen JA and Fox C (2010). A general purpose sampling algorithm for
## continuous distributions (the t-walk). Bayesian Analysis, 5(2),
## pp. 263-282. <URL:
## http://ba.stat.cmu.edu/journal/2010/vol05/issue02/christen.pdf>.
library(twopiece)
## Loading required package: flexclust
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: mvtnorm
## Loading required package: label.switching
library(TeachingDemos)
library(LaplacesDemon)
##
## Attaching package: 'LaplacesDemon'
## The following objects are masked from 'package:mvtnorm':
##
## dmvt, rmvt
# Small Cell Cancer Data
data(smallcell)
str(smallcell)
## 'data.frame': 121 obs. of 4 variables:
## $ arm : int 0 0 0 0 0 0 0 0 0 0 ...
## $ entry : int 56 70 56 54 74 65 60 66 74 63 ...
## $ survival : int 730 1980 260 1883 1194 1624 967 1779 643 1645 ...
## $ indicator: int 1 0 1 0 1 0 1 0 1 0 ...
vnames = c("age","arm")
X = cbind(1,smallcell$entry,smallcell$arm)
logY = log(smallcell$survival)
n = length(logY)
status = 1-smallcell$indicator
#######################################################################################################################################
# Logistic fit
#######################################################################################################################################
summary( survreg(Surv(survival,smallcell$indicator) ~ entry+arm, data=smallcell, dist="loglogistic") )
##
## Call:
## survreg(formula = Surv(survival, smallcell$indicator) ~ entry +
## arm, data = smallcell, dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 7.4382 0.50916 14.61 2.47e-48
## entry -0.0148 0.00816 -1.81 7.01e-02
## arm -0.4174 0.14091 -2.96 3.05e-03
## Log(scale) -0.8253 0.08413 -9.81 1.03e-22
##
## Scale= 0.438
##
## Log logistic distribution
## Loglik(model)= -730.3 Loglik(intercept only)= -737.1
## Chisq= 13.51 on 2 degrees of freedom, p= 0.0012
## Number of Newton-Raphson Iterations: 3
## n= 121
#######################################################################################################################################
# Log Skew Laplace AFT Model
#######################################################################################################################################
plaplace1 <- function(q, location = 0, scale = 1,log.p=FALSE) plaplace(q, location = 0, scale = 1)
# Log likelihood
llsl <- function(par){
if(par[4]>0 & par[5]<1 & par[5]>-1){
var1 = log(dtp3(logY - X%*%par[1:3],0,par[4],par[5],param="eps",FUN=dlaplace)^(1-status))
var2 = log( (1 - ptp3(logY - X%*%par[1:3],0,par[4],par[5],param="eps",FUN=plaplace1))^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
# Maximum Likelihood Estimation
OPT1 = optim(c(7,0,0,1,0),llsl,method="BFGS",control=list(maxit=10000))
init=OPT1$par
# Log Posterior
lpsl <- function(par){
var1 = log(dtp3(logY - X%*%par[1:3],0,par[4],par[5],param="eps",FUN=dlaplace)^(1-status))
var2 = log( (1 - ptp3(logY - X%*%par[1:3],0,par[4],par[5],param="eps",FUN=plaplace1))^(status) )
return(-sum(var1+var2)+log(par[4]) )
}
# Simulation from the posterior
Support <- function(x) {
((0.1 < x[4])&(-1<x[5])&(x[5]<1))
}
X0 <- function(x) { init + 0.1*runif(5,-0.1,0.1) }
set.seed(1234)
info <- Runtwalk( dim=5, Tr=105000, Obj=lpsl, Supp=Support, x0=X0(), xp0=X0(),PlotLogPost = FALSE)
## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 12 X 105001 matrix to save output. Sampling (no graphics mode).
# Thinning and burning the chain
ind=seq(5000,105000,25)
str(info)
## List of 8
## $ dim : num 5
## $ Tr : num 105000
## $ acc : num 9895
## $ Us : num [1:105001] 133 133 133 133 133 ...
## $ Ups : num [1:105001] 166 166 167 145 145 ...
## $ output : num [1:105001, 1:5] 6.8 6.8 6.8 6.8 6.8 ...
## $ outputp: num [1:105001, 1:5] 6.81 6.81 6.81 6.82 6.82 ...
## $ recacc : num [1:105000, 1:2] 2 1 2 2 2 1 1 1 1 2 ...
# Some histograms of the posterior samples
beta1 = info$output[,1][ind]
beta2 = info$output[,2][ind]
beta3 = info$output[,3][ind]
sigma = info$output[,4][ind]
gamma = info$output[,5][ind]
hist(beta1)
hist(beta2)
hist(beta3)
hist(sigma)
hist(gamma)
#########################################################################
# Savage-Dickey ratio for log-skew/twopiece-Laplace vs log-Laplace
#########################################################################
h = bw.nrd0(gamma)
postgammav <- Vectorize(function(x) mean(dnorm((x-gamma)/h)/h))
curve(postgammav,-1,1)
BF1 = postgammav(0)/dunif(0,-1,0)
# Bayes factor against the model with Laplace (instead of skew/twopiece-Laplace) errors
# Strong evidence in favour of the log-skew-Laplace model
BF1
## [1] 0.0002249956
log(BF1)
## [1] -8.39943