Natural (non-) informative priors for the skew-normal distribution

The skew-normal distribution probability density function is given by [1]:

\[s(x \,\vert \, \mu,\sigma,\lambda) = \dfrac{2}{\sigma}\phi\left(\dfrac{x-\mu}{\sigma}\right)\Phi\left(\lambda\dfrac{x-\mu}{\sigma}\right),\] where \(\phi\) and \(\Phi\) are the standard normal density and distribution functions, respectively, \(\lambda \in {\mathbb R}\) is a shape parameter than induces skewness, \(\mu\in {\mathbb R}\) is a location parameter, and \(\sigma>0\) is a scale parameter. [1,2] have studied priors for the shape (skewness) parameter \(\lambda\) in the family of skew-symmetric models [3], which contains the skew normal distribution. In particular, [2,4] studied the Jeffreys (reference) prior of \(\lambda\), and found that it can be approximated with a Student-t distribution with \(1/2\) degrees of freedom. [1] proposed a novel approach to construct informative and non-informative priors for \(\lambda\) based on the Total Variation distance. Such strategy leads to closed-form, interpretable priors, with similar tails to that of the Jeffreys prior. As a joint prior for the parameters \((\mu,\sigma,\lambda)\) [1,2] consider the prior structure: \[\pi(\mu,\sigma,\lambda) \propto \dfrac{1}{\sigma}\pi(\lambda),\] for different choices of \(\pi(\lambda)\).

The following R code presents an example with real data that illustrate the use of the Jeffreys [2] and Total Variation priors [1]. It contains the implementation of the posterior distribution using (i) the BTV(1,1) prior, (ii) the BTV(0.5,0.5) prior, (iii) the BTV(3,0.5) prior, and (4) the Jeffreys prior. The fit is illustrated by comparing the histogram with the posterior predictive density function associated to each prior.

rm(list=ls())

# Required packages
library(sn)

## Loading required package: stats4

## 
## Attaching package: 'sn'

## The following object is masked from 'package:stats':
## 
##     sd

library(Rtwalk)

## Package: Rtwalk, Version: 1.8.0

## The R Implementation of 't-walk' MCMC Sampler

## http://www.cimat.mx/~jac/twalk/

## For citations, please use:

## Christen JA and Fox C (2010). A general purpose sampling algorithm for

## continuous distributions (the t-walk). Bayesian Analysis, 5(2),

## pp. 263-282. <URL:

## http://ba.stat.cmu.edu/journal/2010/vol05/issue02/christen.pdf>.

library(TeachingDemos)

# Data
data(ais)

bmi = ais$BMI[1:100]

hist(bmi)

# Log-likelihood and MLE (infinite)
ll <- function(par){
if(par[2]>0) return(-sum(dsn(bmi,par[1],par[2],par[3],log=T)))
else return(Inf)
}

optim(c(20,1,2),ll)

## $par
## [1] 19.229308  3.810285  2.233281
## 
## $value
## [1] 235.504
## 
## $counts
## function gradient 
##       76       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

##########################################################################################################################
# Log-posterior with BTV(1,1) prior
##########################################################################################################################
# -Log posterior
lptv <- function(par){
return(-sum(dsn(bmi,par[1],par[2],par[3],log=T)) + log(par[2]) - dt(par[3],df=1,log=T) )
}

# Number of iterations 
NMH = 110000

# Support function 
Support <- function(x) {((0 < x[2]))}

# Function to generate the initial points in the sampler
X0 <- function(x) { c( runif(1, min=-0.1, max=0.1), 1+runif(1,min=-0.25,max=0.25),7+runif(1, min=-0.5, max=0.5)) }

# Posterior samples
set.seed(1234)
outtv <- Runtwalk( dim=3,  Tr=NMH,  Obj=lptv, Supp=Support, x0=X0(), xp0=X0(),PlotLogPost = FALSE)

## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 8 X 110001 matrix to save output.  Sampling (no graphics mode).

# thin-in and burn-in
burn = 10000
thin = 100
ind = seq(burn,NMH,thin)

muptv = outtv$output[ , 1][ind]
sigmaptv = outtv$output[ , 2][ind]
lambdaptv = outtv$output[ , 3][ind]

# Some histograms and summaries
hist(muptv)

hist(sigmaptv)

hist(lambdaptv)

median(muptv)

## [1] 19.56176

median(sigmaptv)

## [1] 3.604874

median(lambdaptv)

## [1] 1.728608

emp.hpd(muptv)

## [1] 18.17581 23.07018

emp.hpd(sigmaptv)

## [1] 2.421726 4.515195

emp.hpd(lambdaptv)

## [1] -0.7979746  3.8141542

##########################################################################################################################
# Log-posterior with BTV(0.5,0.5) prior
##########################################################################################################################

# Log prior
lpriorbtv = Vectorize(function(lambda){
val = dbeta(atan(lambda)/pi + 0.5,1/2,1/2,log=T) + dt(lambda,df=1,log=T)
return(val)
})

# - Log posterior
lpbtv <- function(par){
if(par[2]>0) return( log(par[2]) - sum(log(dsn(bmi,par[1],par[2],par[3]))) -  lpriorbtv(par[3])    )
}

# Posterior samples
set.seed(1234)
outbtv <- Runtwalk( dim=3,  Tr=NMH,  Obj=lpbtv, Supp=Support, x0=X0(), xp0=X0(),PlotLogPost = FALSE)

## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 8 X 110001 matrix to save output.  Sampling (no graphics mode).

ind = seq(burn,NMH,thin)

mupbtv = outbtv$output[ , 1][ind]
sigmapbtv = outbtv$output[ , 2][ind]
lambdapbtv = outbtv$output[ , 3][ind]

# Some histograms and summaries
hist(mupbtv)

hist(sigmapbtv)

hist(lambdapbtv)

median(mupbtv)

## [1] 19.51081

median(sigmapbtv)

## [1] 3.629451

median(lambdapbtv)

## [1] 1.777081

emp.hpd(mupbtv)

## [1] 18.20072 23.12962

emp.hpd(sigmapbtv)

## [1] 2.646438 4.603110

emp.hpd(lambdapbtv)

## [1] -0.652033  4.321629

##########################################################################################################################
# Log-posterior with BTV informative prior
##########################################################################################################################

# Log prior
lpriorbtvi = Vectorize(function(lambda){
val = dbeta(atan(lambda)/pi + 0.5,3,1/2,log=T) + dt(lambda,df=1,log=T)
return(val)
})

# - Log posterior
lpbtvi <- function(par){
if(par[2]>0) return( log(par[2]) - sum(log(dsn(bmi,par[1],par[2],par[3]))) -  lpriorbtvi(par[3])    )
}

# Posterior samples
set.seed(1234)
outbtvi <- Runtwalk( dim=3,  Tr=NMH,  Obj=lpbtvi, Supp=Support, x0=X0(), xp0=X0(),PlotLogPost = FALSE)

## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 8 X 110001 matrix to save output.  Sampling (no graphics mode).

ind = seq(burn,NMH,thin)

mupbtvi = outbtvi$output[ , 1][ind]
sigmapbtvi = outbtvi$output[ , 2][ind]
lambdapbtvi = outbtvi$output[ , 3][ind]

# Some histograms and summaries
hist(mupbtvi)

hist(sigmapbtvi)

hist(lambdapbtvi)

median(mupbtvi)

## [1] 19.26803

median(sigmapbtvi)

## [1] 3.796684

median(lambdapbtvi)

## [1] 2.143555

emp.hpd(mupbtvi)

## [1] 18.26624 20.91671

emp.hpd(sigmapbtvi)

## [1] 2.669896 4.676249

emp.hpd(lambdapbtvi)

## [1] 0.3194122 4.0209780

##########################################################################################################################
# Log-posterior with Reference prior
##########################################################################################################################

# - Log posterior
c=pi/2
lpr <- function(par){
  return(-sum(dsn(bmi,par[1],par[2],par[3],log=T)) + log(par[2]) - dt(par[3]/c,df=1/2,log=T) )
}

# Posterior samples
set.seed(1234)
outr <- Runtwalk( dim=3,  Tr=NMH,  Obj=lpr, Supp=Support, x0=X0(), xp0=X0(),PlotLogPost = FALSE)

## This is the twalk for R.
## Evaluating the objective density at initial values.
## Opening 8 X 110001 matrix to save output.  Sampling (no graphics mode).

mupr = outr$output[ , 1][ind]
sigmapr = outr$output[ , 2][ind]
lambdapr = outr$output[ , 3][ind]

# Some histograms and summaries
hist(mupr)

hist(sigmapr)

hist(lambdapr)

median(mupr)

## [1] 19.41992

median(sigmapr)

## [1] 3.731819

median(lambdapr)

## [1] 1.912515

emp.hpd(mupr)

## [1] 18.24755 23.63246

emp.hpd(sigmapr)

## [1] 2.735461 4.767317

emp.hpd(lambdapr)

## [1] -1.223716  4.161240

##########################################################################################################################
# Predictive densities
##########################################################################################################################

# Predictive densities for the different priors
predtv<- Vectorize(function(x) {
  tempf <- function(par) dsn(x,par[1],par[2],par[3])
  var <- mean(apply(cbind(muptv,sigmaptv,lambdaptv),1,tempf))
  return(var)
})

predbtv<- Vectorize(function(x) {
  tempf <- function(par) dsn(x,par[1],par[2],par[3])
  var <- mean(apply(cbind(mupbtv,sigmapbtv,lambdapbtv),1,tempf))
  return(var)
})

predbtvi<- Vectorize(function(x) {
  tempf <- function(par) dsn(x,par[1],par[2],par[3])
  var <- mean(apply(cbind(mupbtvi,sigmapbtvi,lambdapbtvi),1,tempf))
  return(var)
})

predr<- Vectorize(function(x) {
tempf <- function(par) dsn(x,par[1],par[2],par[3])
var <- mean(apply(cbind(mupr,sigmapr,lambdapr),1,tempf))
  return(var)
})

hist(bmi,breaks=20,probability = TRUE, xlim=c(15,35))
curve(predtv,15,35,add=T,lwd=2,lty=1)
curve(predbtv,15,35,add=T,lwd=2,lty=2)
curve(predbtvi,15,35,add=T,lwd=2,lty=3)
curve(predr,15,35,add=T,lwd=2,lty=4)
box()
legend(30, 0.15, c("BTV(1,1)","BTV(0.5,0.5)","BTV(3,0.5)","Jeffreys"),
       text.col = "black", lty = c(1, 2, 3, 4),
       merge = TRUE, bg = "gray90")

Natural (non-) informative priors for the skew-normal distribution

F. Javier Rubio

01/07/2017

Natural (non-) informative priors for the skew-normal distribution