Bayesian AFT models with skew-symmetric errors

Log-skew-symmetric distributions are univariate models with positive support obtained by transforming the class of skew-symmetric distributions. This class of distributions is very flexible, easy to implement, and contains members that can capture different tail behaviours and shapes, producing also a variety of hazard functions. These distributions represent a flexible alternative to the classical choices such as the log-normal, Gamma, and Weibull distributions. Below, we present an application using real data in the contexts of lung cancer survival using Bayesian Accelerated Failure Time models.:

References

  1. Bayesian linear regression with skew-symmetric error distributions with applications to survival analysis
rm(list=ls())

# Required packages
library(survival)
library(Rtwalk)
## Package: Rtwalk, Version: 1.8.0
## The R Implementation of 't-walk' MCMC Sampler
## http://www.cimat.mx/~jac/twalk/
## For citations, please use:
## Christen JA and Fox C (2010). A general purpose sampling algorithm for
## continuous distributions (the t-walk). Bayesian Analysis, 5(2),
## pp. 263-282. <URL:
## http://ba.stat.cmu.edu/journal/2010/vol05/issue02/christen.pdf>.
library(sn)
## Loading required package: stats4
## 
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
## 
##     sd
library(TeachingDemos)

# Lung cancer data
data(lung)
str(lung)
## 'data.frame':    228 obs. of  10 variables:
##  $ inst     : num  3 3 3 5 1 12 7 11 1 7 ...
##  $ time     : num  306 455 1010 210 883 ...
##  $ status   : num  2 2 1 2 2 1 2 2 2 2 ...
##  $ age      : num  74 68 56 57 60 74 68 71 53 61 ...
##  $ sex      : num  1 1 1 1 1 1 2 2 1 1 ...
##  $ ph.ecog  : num  1 0 0 1 0 1 2 2 1 2 ...
##  $ ph.karno : num  90 90 90 90 100 50 70 60 70 70 ...
##  $ pat.karno: num  100 90 90 60 90 80 60 80 80 70 ...
##  $ meal.cal : num  1175 1225 NA 1150 NA ...
##  $ wt.loss  : num  NA 15 15 11 0 0 10 1 16 34 ...
vnames = c("age","sex","ph.ecog")

X = cbind(1,lung$age,lung$sex,lung$ph.ecog)[-14,]
logY = log(lung$time)[-14]
n = length(logY)

status = (1-(lung$status-1))[-14]
#######################################################################################################################################
# Prior on lambda
#######################################################################################################################################

c = pi/2
lpriorlambda = function(lambda) return( dt(c*lambda,df=0.5,log=T) + log(c) )

#######################################################################################################################################
# Log Skew Normal AFT Model
#######################################################################################################################################

# Log-likelihood
llsn <- function(par){
if(par[5]>0){
var1 = log(dsn(logY - X%*%par[1:4],0,par[5],par[6])^(1-status)) 
var2 = log( (1 - psn(logY - X%*%par[1:4],0,par[5],par[6]))^(status)  )
return(-sum(var1+var2))
}
return(Inf)
} 

# Maximum Likelihood Estimation
OPT1 = optim(c(7,0,0,0,1,-2),llsn,method="BFGS",control=list(maxit=10000))

init=OPT1$par

# Log Posterior

lpsl <- function(par){
var1 = log(dsn(logY - X%*%par[1:4],0,par[5],par[6])^(1-status)) 
var2 = log( (1 - psn(logY - X%*%par[1:4],0,par[5],par[6]))^(status)  )
#return(-sum(var1+var2)+log(par[5]) )
ifelse(is.na(sum(var1+var2)),
return(10000000),
return(-sum(var1+var2)+log(par[5]) - lpriorlambda(par[6]) ))
} 

# Simulation from the posterior

Support <- function(x) {
    ((0.1 < x[5]) ) 
}

X0 <- function(x) { init + 0.1*runif(6,-0.1,0.1) }

# It takes a couple of minutes to run
set.seed(1234)
info <- Runtwalk( dim=6,  Tr=105000,  Obj=lpsl, Supp=Support, x0=X0(), xp0=X0(), PlotLogPost = FALSE) 
## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 14 X 105001 matrix to save output.  Sampling (no graphics mode).
# Thinning and burning the chain
ind=seq(5000,105000,50)
str(info)
## List of 8
##  $ dim    : num 6
##  $ Tr     : num 105000
##  $ acc    : num 5898
##  $ Us     : num [1:105001] 273 273 273 273 270 ...
##  $ Ups    : num [1:105001] 289 289 289 289 289 ...
##  $ output : num [1:105001, 1:6] 7.17 7.17 7.17 7.17 7.17 ...
##  $ outputp: num [1:105001, 1:6] 7.17 7.17 7.17 7.17 7.17 ...
##  $ recacc : num [1:105000, 1:2] 1 1 1 2 2 2 1 2 2 2 ...
beta1 = info$output[,1][ind]
beta2 = info$output[,2][ind]
beta3 = info$output[,3][ind]
beta4 = info$output[,4][ind]
sigma = info$output[,5][ind]
lambda = info$output[,6][ind]

# Some histograms of the posterior samples

hist(beta1)

hist(beta2)

hist(beta3)

hist(beta4)

hist(sigma)

hist(lambda)

#########################################################################
# Savage-Dickey ratio for log-skew-normal vs log-normal
#########################################################################

h = bw.nrd0(lambda)
postgammav <- Vectorize(function(x) mean(dnorm((x-lambda)/h)/h))
curve(postgammav,-10,1)

BF1 = postgammav(0)/exp(lpriorlambda(0))

# Bayes factor against the model with normal (instead of skew-normal) errors
# Strong evidence in favour of the log-skew-normal model
BF1
## [1] 0.001084645
log(BF1)
## [1] -6.826503