Log-skew-symmetric distributions are univariate models with positive support obtained by transforming the class of skew-symmetric distributions. This class of distributions is very flexible, easy to implement, and contains members that can capture different tail behaviours and shapes, producing also a variety of hazard functions. These distributions represent a flexible alternative to the classical choices such as the log-normal, Gamma, and Weibull distributions. Below, we present an application using real data in the contexts of lung cancer survival using Bayesian Accelerated Failure Time models.:
References
rm(list=ls())
# Required packages
library(survival)
library(Rtwalk)
## Package: Rtwalk, Version: 1.8.0
## The R Implementation of 't-walk' MCMC Sampler
## http://www.cimat.mx/~jac/twalk/
## For citations, please use:
## Christen JA and Fox C (2010). A general purpose sampling algorithm for
## continuous distributions (the t-walk). Bayesian Analysis, 5(2),
## pp. 263-282. <URL:
## http://ba.stat.cmu.edu/journal/2010/vol05/issue02/christen.pdf>.
library(sn)
## Loading required package: stats4
##
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
##
## sd
library(TeachingDemos)
# Lung cancer data
data(lung)
str(lung)
## 'data.frame': 228 obs. of 10 variables:
## $ inst : num 3 3 3 5 1 12 7 11 1 7 ...
## $ time : num 306 455 1010 210 883 ...
## $ status : num 2 2 1 2 2 1 2 2 2 2 ...
## $ age : num 74 68 56 57 60 74 68 71 53 61 ...
## $ sex : num 1 1 1 1 1 1 2 2 1 1 ...
## $ ph.ecog : num 1 0 0 1 0 1 2 2 1 2 ...
## $ ph.karno : num 90 90 90 90 100 50 70 60 70 70 ...
## $ pat.karno: num 100 90 90 60 90 80 60 80 80 70 ...
## $ meal.cal : num 1175 1225 NA 1150 NA ...
## $ wt.loss : num NA 15 15 11 0 0 10 1 16 34 ...
vnames = c("age","sex","ph.ecog")
X = cbind(1,lung$age,lung$sex,lung$ph.ecog)[-14,]
logY = log(lung$time)[-14]
n = length(logY)
status = (1-(lung$status-1))[-14]
#######################################################################################################################################
# Prior on lambda
#######################################################################################################################################
c = pi/2
lpriorlambda = function(lambda) return( dt(c*lambda,df=0.5,log=T) + log(c) )
#######################################################################################################################################
# Log Skew Normal AFT Model
#######################################################################################################################################
# Log-likelihood
llsn <- function(par){
if(par[5]>0){
var1 = log(dsn(logY - X%*%par[1:4],0,par[5],par[6])^(1-status))
var2 = log( (1 - psn(logY - X%*%par[1:4],0,par[5],par[6]))^(status) )
return(-sum(var1+var2))
}
return(Inf)
}
# Maximum Likelihood Estimation
OPT1 = optim(c(7,0,0,0,1,-2),llsn,method="BFGS",control=list(maxit=10000))
init=OPT1$par
# Log Posterior
lpsl <- function(par){
var1 = log(dsn(logY - X%*%par[1:4],0,par[5],par[6])^(1-status))
var2 = log( (1 - psn(logY - X%*%par[1:4],0,par[5],par[6]))^(status) )
#return(-sum(var1+var2)+log(par[5]) )
ifelse(is.na(sum(var1+var2)),
return(10000000),
return(-sum(var1+var2)+log(par[5]) - lpriorlambda(par[6]) ))
}
# Simulation from the posterior
Support <- function(x) {
((0.1 < x[5]) )
}
X0 <- function(x) { init + 0.1*runif(6,-0.1,0.1) }
# It takes a couple of minutes to run
set.seed(1234)
info <- Runtwalk( dim=6, Tr=105000, Obj=lpsl, Supp=Support, x0=X0(), xp0=X0(), PlotLogPost = FALSE)
## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 14 X 105001 matrix to save output. Sampling (no graphics mode).
# Thinning and burning the chain
ind=seq(5000,105000,50)
str(info)
## List of 8
## $ dim : num 6
## $ Tr : num 105000
## $ acc : num 5898
## $ Us : num [1:105001] 273 273 273 273 270 ...
## $ Ups : num [1:105001] 289 289 289 289 289 ...
## $ output : num [1:105001, 1:6] 7.17 7.17 7.17 7.17 7.17 ...
## $ outputp: num [1:105001, 1:6] 7.17 7.17 7.17 7.17 7.17 ...
## $ recacc : num [1:105000, 1:2] 1 1 1 2 2 2 1 2 2 2 ...
beta1 = info$output[,1][ind]
beta2 = info$output[,2][ind]
beta3 = info$output[,3][ind]
beta4 = info$output[,4][ind]
sigma = info$output[,5][ind]
lambda = info$output[,6][ind]
# Some histograms of the posterior samples
hist(beta1)
hist(beta2)
hist(beta3)
hist(beta4)
hist(sigma)
hist(lambda)
#########################################################################
# Savage-Dickey ratio for log-skew-normal vs log-normal
#########################################################################
h = bw.nrd0(lambda)
postgammav <- Vectorize(function(x) mean(dnorm((x-lambda)/h)/h))
curve(postgammav,-10,1)
BF1 = postgammav(0)/exp(lpriorlambda(0))
# Bayes factor against the model with normal (instead of skew-normal) errors
# Strong evidence in favour of the log-skew-normal model
BF1
## [1] 0.001084645
log(BF1)
## [1] -6.826503