The two-piece normal distribution

The two-piece normal, binormal, split normal, or asymmetric normal (Wallis 2014), is obtained by continuously joining two half-normal densities with different scale parameters but with the same location parameter: \[s(x \mid \mu, \sigma_1, \sigma_2) = \dfrac{2}{\sigma_1 + \sigma_2} \left[\phi\left(\dfrac{x-\mu}{\sigma_1}\right)I(x<\mu) + \phi\left(\dfrac{x-\mu}{\sigma_2}\right)I(x\geq\mu)\right],\] where \(\phi(x) = \dfrac{1}{\sqrt{2\pi}}\exp\left(-\dfrac{x^2}{2}\right)\). This distribution has a peculiar history as it has been re-invented several times, under different levels of generality, and different parameterisations (Rubio and Steel 2020; Wallis 2014). The two-piece normal density is unimodal by construction, it is continuous, and differentiable everywhere. However, it is not twice differentiable at the mode. Simulating from the two-piece normal distribution is easy using the twopiece R package, as illustrated in our first example. The two-piece normal is a member of the larger family of two-piece distributions (Rubio and Steel 2020).

The two-piece normal distribution has been applied in many areas such as finance, medicine, economy, psychology, and etcetera (Rubio and Steel 2020). See The Two Piece Normal Distribution and Fan Charts for applications in finance.

Bayesian inference using rstan

Rubio and Steel (2014) studied different types of priors for two-piece distributions. For illustrative purposes, we will adopt the following priors for the parameters \((\mu,\sigma_1,\sigma_2)\):

\[\begin{align} \mu &\sim N(0,100),\\ \sigma_1 &\sim InvGamma(0.01,0.01),\\ \sigma_2 &\sim InvGamma(0.01,0.01). \end{align}\]

Next, we present two illustrative examples. Example 1 describes how to simulate from the two-piece normal distribution using the twopiece R package, how to sample from the posterior distribution of \((\mu,\sigma_1,\sigma_2)\) using rstan, and how to calculate the posterior predictive distribution. Example 2 presents a real data application. The rstan code used in both examples can be found at the end of this document.

Example 1: Simulated data

rm(list=ls())
#------------------------------------------------
# Data simulation
#------------------------------------------------
# install.packages("devtools")
#devtools::install_github("medewitt/twopiece")
library(twopiece)

N <- 1000 # Sample size
set.seed(123) # Seed
# Simulated sample from a twopiece normal using the twopiece parameterisation
y <- rtp3(N,10,3,1,FUN=rnorm,param="tp")

# Data list
datasim <- list(y = y, N = N)

#------------------------------------------------
# Posterior simulation
#------------------------------------------------
library(rstan)

fit1 <- stan(
  file = "TPN.stan",  # Stan program
  data = datasim,    # named list of data
  chains = 1,             # number of Markov chains
  warmup = 1000,          # number of warmup iterations per chain
  iter = 11000,            # total number of iterations per chain
  cores = 2,              # number of cores (could use one per chain)
  refresh = 0             # no progress shown
)

# Trace plots and histograms (thinned samples)
index = seq(1,10000, by = 5) # thinning index
# thinned samples
mup <- extract(fit1, "mu")$mu[index]
sigma1p <- extract(fit1, "sigma1")$sigma1[index]
sigma2p <- extract(fit1, "sigma2")$sigma2[index]

# summaries
summary(mup)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   9.553   9.975  10.069  10.066  10.159  10.458

summary(sigma1p)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.653   2.994   3.062   3.063   3.129   3.424

summary(sigma2p)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6875  0.8705  0.9233  0.9268  0.9804  1.2051

# traceplots
plot(mup, type="l", ylab = expression(mu), cex.axis = 1.5, cex.lab = 1.5)

plot(sigma1p,type="l", ylab = expression(sigma[1]), cex.axis = 1.5, cex.lab = 1.5)

plot(sigma2p,type="l", ylab = expression(sigma[2]), cex.axis = 1.5, cex.lab = 1.5)

# histograms
hist(mup, breaks = 30, probability = T, 
     main = "", xlab = expression(mu), cex.axis = 1.5, cex.lab = 1.5)
points(density(mup), type="l", col="red", lwd = 2)
box()

hist(sigma1p, breaks = 30, probability = T, 
     main = "", xlab = expression(sigma[1]), cex.axis = 1.5, cex.lab = 1.5)
points(density(sigma1p), type="l", col="red", lwd = 2)
box()

hist(sigma2p, breaks = 30, probability = T, 
     main = "", xlab = expression(sigma[2]), cex.axis = 1.5, cex.lab = 1.5)
points(density(sigma2p), type="l", col="red", lwd = 2)
box()

#------------------------------------------------
# Posterior predictive density function
#------------------------------------------------
dpred <- Vectorize(function(x){
  val <- vector()
  for(i in 1:length(index)) val[i] <- dtp3(x,mup[i],sigma1p[i],sigma2p[i], FUN=dnorm, param ="tp")
  return(mean(val))
})

hist(y,probability = T, breaks = 20, xlim = c(0,15),
     main="Predictive PDF", xlab = "y", cex.axis = 1.5, cex.lab = 1.5)
curve(dpred, 0, 15, n = 250, add = T, col="red", lwd = 2 )
box()

Example 2: Real Data - Body Mass Index

rm(list=ls())
# Data: BMI of 100 female Australian athletes
y <- c(20.56,20.67,21.86,21.88,18.96,21.04,21.69,20.62,22.64,19.44,25.75,21.20,22.03,25.44,22.63,
       21.86,22.27,21.27,23.47,23.19, 23.17,24.54,22.96,19.76,23.36,22.67,24.24,24.21,20.46,20.81,
       20.17,23.06,24.40,23.97,22.62,19.16,21.15,21.40,21.03,21.77,21.38,21.47,24.45,22.63,22.80,
       23.58,20.06,23.01,24.64,18.26,24.47,23.99,26.24,20.04,25.72,25.64,19.87,23.35,22.42,20.42,
       22.13,25.17,23.72,21.28,20.87,19.00,22.04,20.12,21.35,28.57,26.95,28.13,26.85,25.27,
       31.93,16.75,19.54,20.42,22.76,20.12,22.35,19.16,20.77,19.37,22.37,17.54,19.06,20.30,20.15,
       25.36,22.12,21.25,20.53,17.06,18.29,18.37,18.93,17.79,17.05,20.31)
summary(y)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.75   20.27   21.82   21.99   23.39   31.93

N <- length(y)

# Data list
datasim <- list(y = y, N = N)

#------------------------------------------------
# Posterior simulation
#------------------------------------------------
library(rstan)

fit1 <- stan(
  file = "TPN.stan",  # Stan program
  data = datasim,    # named list of data
  chains = 1,             # number of Markov chains
  warmup = 1000,          # number of warmup iterations per chain
  iter = 11000,            # total number of iterations per chain
  cores = 2,              # number of cores (could use one per chain)
  refresh = 0             # no progress shown
)

# Trace plots and histograms (thinned samples)
index = seq(1,10000, by = 5) # thinning index
# thinned samples
mup <- extract(fit1, "mu")$mu[index]
sigma1p <- extract(fit1, "sigma1")$sigma1[index]
sigma2p <- extract(fit1, "sigma2")$sigma2[index]

# summaries
summary(mup)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.45   20.37   20.80   20.76   21.16   22.60

summary(sigma1p)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6642  1.5769  1.8018  1.8172  2.0459  3.1841

summary(sigma2p)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.197   3.103   3.371   3.395   3.667   4.969

# traceplots
plot(mup, type="l", ylab = expression(mu), cex.axis = 1.5, cex.lab = 1.5)

plot(sigma1p,type="l", ylab = expression(sigma[1]), cex.axis = 1.5, cex.lab = 1.5)

plot(sigma2p,type="l", ylab = expression(sigma[2]), cex.axis = 1.5, cex.lab = 1.5)

# histograms
hist(mup, breaks = 30, probability = T, 
     main = "", xlab = expression(mu), cex.axis = 1.5, cex.lab = 1.5)
points(density(mup), type="l", col="red", lwd = 2)
box()

hist(sigma1p, breaks = 30, probability = T, 
     main = "", xlab = expression(sigma[1]), cex.axis = 1.5, cex.lab = 1.5)
points(density(sigma1p), type="l", col="red", lwd = 2)
box()

hist(sigma2p, breaks = 30, probability = T, 
     main = "", xlab = expression(sigma[2]), cex.axis = 1.5, cex.lab = 1.5)
points(density(sigma2p), type="l", col="red", lwd = 2)
box()

#------------------------------------------------
# Posterior predictive density function
#------------------------------------------------
dpred <- Vectorize(function(x){
  val <- vector()
  for(i in 1:length(index)) val[i] <- dtp3(x,mup[i],sigma1p[i],sigma2p[i], FUN=dnorm, param ="tp")
  return(mean(val))
})

hist(y, probability = T, breaks = 20, xlim = c(15,35),
     main="Predictive PDF", xlab = "BMI", cex.axis = 1.5, cex.lab = 1.5)
curve(dpred, 15, 35, n = 250, add = T, col="red", lwd = 2 )
box()

TPN.stan code

data {
  int < lower = 1 > N; // Sample size
  vector[N] y; // Sample
}

parameters {
  real mu; // mean
  real < lower = 0 > sigma1;
  real < lower = 0 > sigma2;
}

model {
  // Priors
  target += normal_lpdf(mu | 0, 100);
  target += inv_gamma_lpdf(sigma1 | 0.01, 0.01);
  target += inv_gamma_lpdf(sigma2 | 0.01, 0.01);
  // Likelihood
  for(n in 1:N){
    if(y[n] <  mu)
    {
      target += normal_lpdf( (y[n]-mu)/sigma1 | 0 , 1) + log(2) - log(sigma1 + sigma2);
    }
    if(y[n] >=  mu){
      target += normal_lpdf( (y[n]-mu)/sigma2 | 0 , 1) + log(2) - log(sigma1 + sigma2);
    }
  }
}

generated quantities {
}

References

References

Rubio, F.J., and M.F.J. Steel. 2014. “Inference in Two-Piece Location-Scale Models with Jeffreys Priors.” Bayesian Analysis 9 (1): 1–22.

———. 2020. “The Family of Two-Piece Distributions.” Significance 17 (1): 12–13.

Wallis, K.F. 2014. “The Two-Piece Normal, Binormal, or Double Gaussian Distribution: Its Origin and Rediscoveries.” Statistical Science 29 (1): 106–12.

Bayesian inference on the two-piece normal distribution using rstan

F. Javier Rubio

May 01, 2020