Metropolis-Hastings and Gibbs sampler for Bank data

```

References

Load packages

library(magrittr)
library(dplyr)
library(tidyr)
library(mvtnorm)
library(assertthat)
library(tidyr)
library(ggplot2)

Load data

data1 <- read.table(file = "./bank", sep = "")
names(data1) <- c("X1","X2","X3","X4","Y")
head(data1)

##      X1    X2    X3   X4 Y
## 1 214.8 131.0 131.1  9.0 0
## 2 214.6 129.7 129.7  8.1 0
## 3 214.8 129.7 129.7  8.7 0
## 4 214.8 129.7 129.6  7.5 0
## 5 215.0 129.6 129.7 10.4 0
## 6 215.7 130.8 130.5  9.0 0

data1matrix <- as.matrix(data1)

Set up model

## Prior constructor
log_prior_constructor <- function(sigma) {
    assert_that(length(sigma) == 1)
    assert_that(is.numeric(sigma))
    ## Given sigma, return a prior function
    function(beta) {
        assert_that(is.vector(beta))
        out <- dmvnorm(x = beta,
                       mean = rep(0, length(beta)),
                       sigma = sigma^2 * diag(1, nrow = length(beta)),
                       log = TRUE)
        assert_that(length(out) == 1)
        out
    }
}
## for a diffuse normal prior, assume $d^2 = 10^2 = 100$
log_prior <- log_prior_constructor(10)

## Likelihood constructor
log_likelihood_constructor <- function(X, Y) {
    assert_that(is.matrix(X))
    assert_that(is.vector(Y))
    assert_that(nrow(X) == length(Y))
    ## Given data, return a likelihood function
    function(beta) {
        assert_that(is.vector(beta))
        ## Linear predictor
        lp <- as.numeric(X %*% beta)
        ## Probit link
        p  <- pnorm(q = lp)
        ## Independent Bernoulli outcome
        out <- sum((Y * log(p) + (1 - Y) * log(1 - p)))
        assert_that(length(out) == 1)
        out
    }
}
logL <- log_likelihood_constructor(X = as.matrix(data1[,1:4]), Y = data1$Y)

## Define unnormalized posterior (the same normalizing constant cancels when calculating the acceptance ratio)
log_posterior <- function(beta) {
    log_prior(beta) + logL(beta)
}

## Use MVN centered at current beta as proposal distribution (the proposal is symmetric, thus again we are using the Metropolis algorithm as in Lab 4)
## Random proposal generation function
proposal_generator <- function(beta, sigma) {
    assert_that(is.vector(beta))
    assert_that(is.matrix(sigma))
    out <- rmvnorm(n = 1,
                   mean = beta,
                   sigma = sigma)
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}
## Proposal evaluation function q(new_beta | beta)
log_proposal_density <- function(new_beta, beta, sigma) {
    assert_that(is.vector(beta))
    assert_that(is.matrix(sigma))
    out <- dmvnorm(x = new_beta,
                   mean = beta,
                   sigma = sigma,
                   log = TRUE)
    assert_that(length(out) == 1)
    out
}

## MH sampler
mh_sampler <- function(log_target_density, log_proposal_density, proposal_generator, init, R) {
    assert_that(is.function(log_target_density))
    assert_that(is.function(log_proposal_density))
    assert_that(is.function(proposal_generator))
    assert_that(is.vector(init))
    assert_that(is.numeric(init))
    assert_that(is.numeric(R))

    ## Initialize a vector
    x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
    ## Acceptance indicator (initialized with FALSE)
    a <- logical(length = R)
    ## Initial value (init is a vector of length(beta))
    x[1,] <- init

    for (r in seq_len(R)[-1]) {

        ## Obtain a proposed value conditioning on the previous value
        proposal <- proposal_generator(x[r-1,])

        ## Evaluate both densities at the proposed value
        log_target_at_proposal <- log_target_density(proposal)
        log_target_at_previous <- log_target_density(x[r-1,])

        ## This is assured to be [0,1], in fact the proposal density part will cancel due to symmetry
        log_acceptance_ratio <- min((log_target_at_proposal + log_proposal_density(x[r-1,], proposal)) -
                                    (log_target_at_previous + log_proposal_density(proposal, x[r-1,])),
                                    0)

        ## Failure detector
        if (is.na(log_acceptance_ratio)) {
            cat("Iteration", r, "\n")
            cat("Proposal", proposal, "\n")
            cat("log_target_at_proposal", log_target_at_proposal, "\n")
            cat("log_target_at_previous", log_target_at_previous, "\n")
            cat("log_proposal_density(x[r-1,], proposal)", log_proposal_density(x[r-1,], proposal), "\n")
            cat("log_proposal_density(proposal, x[r-1,])", log_proposal_density(proposal, x[r-1,]), "\n")
        }

        if (runif(n = 1) <= exp(log_acceptance_ratio)) {
            ## If U(0,1) is less than or equal to acceptance ratio
            ## Make a move
            x[r,] <- proposal
            a[r] <- TRUE
        } else {
            ## Otherwise, don't move
            x[r,] <- x[r-1,]
        }
    }

    data.frame(beta = x,
               a = a,
               r = seq_along(a))
}

## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
               family  = binomial(link = "probit"),
               data    = data1)
summary(glmfit1)

## 
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"), 
##     data = data1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5349  -0.3438  -0.0148   0.2000   3.7332  
## 
## Coefficients:
##    Estimate Std. Error z value Pr(>|z|)    
## X1  -1.1810     0.2747  -4.299 1.72e-05 ***
## X2   0.9515     0.5715   1.665   0.0959 .  
## X3   0.9217     0.5144   1.792   0.0731 .  
## X4   1.1028     0.1683   6.554 5.60e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.259  on 200  degrees of freedom
## Residual deviance:  92.109  on 196  degrees of freedom
## AIC: 100.11
## 
## Number of Fisher Scoring iterations: 7

## Bayesian model
out1 <- mh_sampler(log_target_density = log_posterior,
                   log_proposal_density = function(x_next, x) {log_proposal_density(x_next, x, sigma = 0.1 * vcov(glmfit1))},
                   proposal_generator = function(x) {proposal_generator(x, sigma = 0.1 * vcov(glmfit1))},
                   init = coef(glmfit1),
                   R = 10^4)

## Acceptance ratio
cat("### Acceance ratio\n")

## ### Acceance ratio

mean(out1$a)

## [1] 0.7594

cat("### Summary\n")

## ### Summary

summary(out1)

##      beta.1            beta.2            beta.3            beta.4           a                 r        
##  Min.   :-2.2576   Min.   :-0.6920   Min.   :-0.7922   Min.   :0.6081   Mode :logical   Min.   :    1  
##  1st Qu.:-1.3968   1st Qu.: 0.5590   1st Qu.: 0.5467   1st Qu.:1.0191   FALSE:2406      1st Qu.: 2501  
##  Median :-1.2000   Median : 0.9881   Median : 0.9131   Median :1.1417   TRUE :7594      Median : 5000  
##  Mean   :-1.2193   Mean   : 0.9945   Mean   : 0.9388   Mean   :1.1483   NA's :0         Mean   : 5000  
##  3rd Qu.:-1.0286   3rd Qu.: 1.4166   3rd Qu.: 1.3006   3rd Qu.:1.2706                   3rd Qu.: 7500  
##  Max.   :-0.4148   Max.   : 3.1923   Max.   : 3.1359   Max.   :2.0007                   Max.   :10000

Plotting

namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta

## [1] "beta.1" "beta.2" "beta.3" "beta.4"

out2 <- gather_(data = out1, key_col = "param", value_col = "value", gather_cols = namesBeta)


## Plot of chain
ggplot(data = out2, mapping = aes(x = r, y = value)) +
    geom_line(alpha = 1 / 5) +
    facet_grid(param ~ .) +
    theme_bw() + theme(legend.key = element_blank())

Now we implement the Gibbs sampler (with latent continuous data) for this Bank data

Set up model

The full conditional for \(\boldsymbol{\beta}\) is MVN.

## Random proposal generation function for beta
proposal_generator_beta <- function(data, z, sigma) {
    assert_that(is.matrix(data))
    assert_that(is.vector(z))
    assert_that(is.numeric(sigma))
    dataX <- data[,c(1:4)]
    out <- rmvnorm(n = 1,
                   mean = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX) %*% (t(dataX) %*% z),
                   sigma = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX))
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}

The full conditional for \(z_i\) is truncated normal.

## Random proposal generation function for z
proposal_generator_z <- function(data, beta) {
    assert_that(is.matrix(data))
    assert_that(is.vector(beta))
    out <- rep(NA, nrow(data))
    rtrunc <- function(row){    # Define the truncated normal sampler
      if (row[5] == 1){
        X = 0
        while (X <= 0){
          X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
        }
      }else{
        X = 0
        while (X >= 0){
          X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
        }
      }
      return(X)
    }
    out <- apply(data, 1, rtrunc)
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}

## Gibbs sampler
Gibbs_sampler <- function(proposal_generator_beta, proposal_generator_z, init, R) {
    assert_that(is.function(proposal_generator_beta))
    assert_that(is.function(proposal_generator_z))
    assert_that(is.vector(init))
    assert_that(is.numeric(init))
    assert_that(is.numeric(R))

    ## Initialize a vector
    x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
    z <- matrix(as.numeric(NA), nrow = R, ncol = nrow(data1matrix))
    ## Initial value (init is a vector of length(beta))
    x[1,] <- init

    for (r in seq_len(R)[-1]) {
        
        ## Obtain a proposed value for z
        z[r,] <- proposal_generator_z(x[r-1,]) 
        
        ## Obtain a proposed value for beta
        x[r,] <- proposal_generator_beta(z[r,])
    }

    data.frame(beta = x,
               #Z = z,
               r = seq_along(x[,1]))
}

## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
               family  = binomial(link = "probit"),
               data    = data1)
summary(glmfit1)

## 
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"), 
##     data = data1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5349  -0.3438  -0.0148   0.2000   3.7332  
## 
## Coefficients:
##    Estimate Std. Error z value Pr(>|z|)    
## X1  -1.1810     0.2747  -4.299 1.72e-05 ***
## X2   0.9515     0.5715   1.665   0.0959 .  
## X3   0.9217     0.5144   1.792   0.0731 .  
## X4   1.1028     0.1683   6.554 5.60e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.259  on 200  degrees of freedom
## Residual deviance:  92.109  on 196  degrees of freedom
## AIC: 100.11
## 
## Number of Fisher Scoring iterations: 7

## Bayesian model
out3 <- Gibbs_sampler(proposal_generator_beta = function(prev_z){proposal_generator_beta(data = data1matrix, z = prev_z, sigma = 10)}, proposal_generator_z= function(prev_beta) {proposal_generator_z(data = data1matrix, beta = prev_beta)},
                   init = coef(glmfit1),
                   R = 10^4)

cat("### Summary\n")

## ### Summary

summary(out3)

##      beta.1            beta.2            beta.3            beta.4             r        
##  Min.   :-2.1991   Min.   :-1.2446   Min.   :-1.0636   Min.   :0.5738   Min.   :    1  
##  1st Qu.:-1.3810   1st Qu.: 0.5536   1st Qu.: 0.5947   1st Qu.:1.0127   1st Qu.: 2501  
##  Median :-1.2041   Median : 0.9604   Median : 0.9510   Median :1.1260   Median : 5000  
##  Mean   :-1.2090   Mean   : 0.9674   Mean   : 0.9501   Mean   :1.1332   Mean   : 5000  
##  3rd Qu.:-1.0341   3rd Qu.: 1.3667   3rd Qu.: 1.3105   3rd Qu.:1.2445   3rd Qu.: 7500  
##  Max.   :-0.2347   Max.   : 3.2334   Max.   : 2.9147   Max.   :1.7286   Max.   :10000

Plotting

namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta

## [1] "beta.1" "beta.2" "beta.3" "beta.4"

out4 <- gather_(data = out3, key_col = "param", value_col = "value", gather_cols = namesBeta)


## Plot of chain
ggplot(data = out4, mapping = aes(x = r, y = value)) +
    geom_line(alpha = 1 / 5) +
    facet_grid(param ~ .) +
    theme_bw() + theme(legend.key = element_blank())

From the above results we can see that Gibbs sampler seems to have a better converge than Metropolis-Hastings algorighm, but it takes longer time for Gibbs due to the truncated normal sampler for \(z_i's\).

Top Page: http://rpubs.com/kaz_yos/; http://rpubs.com/xihaoli/
github: https://github.com/kaz-yos/; https://github.com/xihaoli/

Metropolis-Hastings and Gibbs sampler for Bank data

Kazuki Yoshida, Xihao Li

2016-10-15

References

Load packages

Load data

Set up model

Plotting

Now we implement the Gibbs sampler (with latent continuous data) for this Bank data

Set up model

The full conditional for \(\boldsymbol{\beta}\) is MVN.

The full conditional for \(z_i\) is truncated normal.

Plotting