Accept-Reject, Metropolis-Hastings and Gibbs sampling algorithms

```

References

Question of Interest

In Bayesian statistics, one common question of interest is to be able to sample from a distribution with no directly available sampler. Here we are interested in sampling from

\[f(x) \propto \exp\bigg\{\frac{(x - \mu)^2}{\sigma^2}\bigg\} \times x^2.\]

Load packages

library(magrittr)
library(ggplot2)

Examine the target distribution and covering normal distribution

## Target unnormalized density constructor
construct_target_density <- function(mu, sigma, C = 1) {
    function(x) {
        exp(-(x - mu)^2 / (sigma^2)) * x^2 / C
    }
}

For \(\mu = 0, \sigma^2 = 1\), one can find the normalizing constant for \(f(x) \propto \exp(-x^2) \times x^2\) is \(C = \sqrt{\pi}/2\).

## Simple case with mu = 0, sigma = 1
target_density0 <- construct_target_density(mu = 0, sigma = 1, C = sqrt(pi)/2)

## Examine visually to examine peak height
g1 <- ggplot(data = data.frame(x = 0), mapping = aes(x = x)) +
    geom_path(stat = "function", fun = target_density0,
              n = 10001, mapping = aes(color = "1")) +
    geom_path(stat = "function", fun = dnorm,
              n = 10001, mapping = aes(color = "2")) +
    geom_path(stat = "function", fun = function(x) {5*dnorm(x, sd = 3)},
              n = 10001, mapping = aes(color = "3")) +
    scale_x_continuous(limits = c(-10,10)) +
    scale_color_manual(name = "Functions",
                       values = c("blue", "red", "green"), # Color specification
                       labels = c("Target", "N(0,1^2)", "5*N(0,3^2)")) +
    theme_bw() + theme(legend.key = element_blank())
print(g1)

Implement the Accept-Reject sampler

RejectionSampling <- function(target_density, proposal_density, proposal_generator, R) {

    ## Initialize a vector (all 0's)
    v <- numeric(length = R) 
    ## Acceptance indicator (all FALSE)
    a <- logical(length = R)

    for (r in seq_len(R)) {
        ## Obtain a proposed value
        proposal <- proposal_generator()
        v[r] <- proposal
        ## Evaluate both densities at the proposed value
        target_density_value   <- target_density(proposal)
        proposal_density_value <- proposal_density(proposal)

        ## This is assured to be [0,1]
        acceptance_ratio <- target_density_value / proposal_density_value

        ## If U(0,1) is less than or equal to acceptance ratio, accept
        if(runif(n = 1) <= acceptance_ratio) {
            a[r] <- TRUE
        }
    }

    data.frame(v = v,
               a = a)
}

out0 <- RejectionSampling(target_density = target_density0,
                          proposal_density = function(x) {5*dnorm(x, sd = 3)},
                          proposal_generator = function() {rnorm(n = 1, sd = 3)},
                          R = 10^5)
## Acceptance ratio (should be close to 1/5 = 0.2)
mean(out0$a)

## [1] 0.19822

g1 + geom_density(data = subset(out0, a == TRUE), mapping = aes(x = v))

Implement the Metropolis(-Hastings) sampler

Here we adopt the proposal distribution as

\[q(x^*|x_{t-1}) = N(x^*; x_{t-1}, 3^2),\]

which is symmetric, i.e. \(q(x^*|x_{t-1}) = q(x_{t-1}|x^*) = \frac{1}{\sqrt{2\pi\times 3^2}}\exp\big\{-\frac{(x^* - x_{t-1})^2}{2\times 3^2}\big\}\). Thus we are in fact using the Metropolis algorithm.

MHSampling <- function(target_density, proposal_generator, R) {

    ## Initialize a vector (all 0's)
    v <- numeric(length = R)
    ## Acceptance indicator (all FALSE)
    a <- logical(length = R)

    for (r in seq_len(R)[-1]) {
        ## Obtain a proposed value (here the proposal distribution is symmetric)
        proposal <- proposal_generator(v[r-1])
        ## Evaluate both densities at the proposed value
        target_density_at_proposal <- target_density(proposal)
        target_density_at_previous <- target_density(v[r-1])

        ## This is assured to be [0,1]
        acceptance_ratio <- min(target_density_at_proposal / target_density_at_previous, 1)

        if (runif(n = 1) <= acceptance_ratio) {
            ## If U(0,1) is less than or equal to acceptance ratio
            ## Make a move
            v[r] <- proposal
            a[r] <- TRUE
        } else {
            ## Otherwise, don't move
            v[r] <- v[r-1]
        }
    }

    data.frame(v = v,
               a = a)
}

out1 <- MHSampling(target_density = target_density0,
                   proposal_generator = function(par) {rnorm(n = 1, mean = par, sd = 3)},
                   R = 10^5)
out1$index <- seq_along(out1$v)
## Acceptance ratio
mean(out1$a)

## [1] 0.33803

## Plot of chain
ggplot(data = out1, mapping = aes(x = index, y = v)) +
    geom_point(alpha = 1/10) +
    theme_bw() + theme(legend.key = element_blank())

## Check auto-correlation (slicing by 10 looks ok)
acf(out1$v)

## Plot of chain (every 10-th point)
ggplot(data = subset(out1, index %% 10 == 0), mapping = aes(x = index, y = v)) +
    geom_point(alpha = 1/10) +
    theme_bw() + theme(legend.key = element_blank())

## Density plot after slicing
g1 + geom_density(data = subset(out1, index %% 10 == 0),
                  mapping = aes(x = v))

This time we adopt the new proposal distribution as

\[q(x^*|x_{t-1}) = U(x^*; -5,5).\]

Again, it is symmetric.

## Another attempt using a different proposal distribution
out2 <- MHSampling(target_density = target_density0,
                   proposal_generator = function(par) {runif(n = 1, min = -5, max = 5)},
                   R = 10^5)
out2$index <- seq_along(out2$v)
## Acceptance ratio
mean(out2$a)

## [1] 0.29826

## Plot of chain
ggplot(data = out2, mapping = aes(x = index, y = v)) +
    geom_point(alpha = 1/10) +
    theme_bw() + theme(legend.key = element_blank())

## Check auto-correlation (slicing by 10 looks ok)
acf(out2$v)

## Plot of chain (every 10-th point)
ggplot(data = subset(out2, index %% 10 == 0), mapping = aes(x = index, y = v)) +
    geom_point(alpha = 1/10) +
    theme_bw() + theme(legend.key = element_blank())

## Density plot after slicing
g1 + geom_density(data = subset(out2, index %% 10 == 0),
                  mapping = aes(x = v))

Implement the Gibbs sampler through Data Augmentation

Gibbs sampler is designed to sample from joint distribution of a set of random variables. Here we consider introducing a new random variable \(Z\) and consider the following joing density \((X, Z)\) as

\[f_{X,Z}(x,z) \propto \exp\{-x^2\} I(z < |x|, z \geq 0)z.\]

If we are able to sample from this joint density, the marginally we have

\[\begin{align*} f_X(x) &\propto \int \exp\{-x^2\} I(z < |x|, z \geq 0)zdz\\ &\propto \exp\{-x^2\}x^2, \end{align*}\]

which is what we want.

In this case, Gibbs sampler provides a way to sample from the joint density by specifying the full conditional distributions as

\[\begin{align*} f_{X|Z}(x|z) &\propto \exp\{-x^2\}I(z < |x|, z \geq 0) \quad \text{(which is a truncated normal)}\\ f_{Z|X}(z|x) &\propto zI(z < |x|, z \geq 0) \quad \text{(which is a scaled Beta(2,1) distribution)}\\ \end{align*}\]

Gibbs_Sampling <- function(R, burnin = 0.1){
  rtrunc <- function(Z){    # Define the truncated normal sampler based on A-R sampling
    X = 0
    while (abs(X) <= Z){
      X = rnorm(1, sd = sqrt(1/2))
    }
    return(X)
  }
  X_list = numeric(length = R)
  Z_list = numeric(length = R)
  for (i in seq_len(R)[-1]){
      X_list[i] = rtrunc(Z_list[i-1])                 # truncated normal
      Z_list[i] = rbeta(n = 1, 2, 1) * abs(X_list[i]) # proportional to beta(2,1)
  }
  afterburnin = X_list[seq(R * burnin + 1, R)]
  data.frame(v = afterburnin)
}

out4 <- Gibbs_Sampling(R = 10^5)
out4$index <- seq_along(out4$v)

## Plot of chain
ggplot(data = out4, mapping = aes(x = index, y = v)) +
    geom_point(alpha = 1/10) +
    theme_bw() + theme(legend.key = element_blank())

## Density plot after slicing
g1 + geom_density(data = out4,
                  mapping = aes(x = v))

Top Page: http://rpubs.com/kaz_yos/; http://rpubs.com/xihaoli/
github: https://github.com/kaz-yos/; https://github.com/xihaoli/