```

References

Load packages

library(magrittr)
library(dplyr)
library(tidyr)
library(mvtnorm)
library(assertthat)
library(tidyr)
library(ggplot2)

Load data

data1 <- read.table(file = "./bank", sep = "")
names(data1) <- c("X1","X2","X3","X4","Y")
head(data1)
##      X1    X2    X3   X4 Y
## 1 214.8 131.0 131.1  9.0 0
## 2 214.6 129.7 129.7  8.1 0
## 3 214.8 129.7 129.7  8.7 0
## 4 214.8 129.7 129.6  7.5 0
## 5 215.0 129.6 129.7 10.4 0
## 6 215.7 130.8 130.5  9.0 0
data1matrix <- as.matrix(data1)

Set up model

## Prior constructor
log_prior_constructor <- function(sigma) {
    assert_that(length(sigma) == 1)
    assert_that(is.numeric(sigma))
    ## Given sigma, return a prior function
    function(beta) {
        assert_that(is.vector(beta))
        out <- dmvnorm(x = beta,
                       mean = rep(0, length(beta)),
                       sigma = sigma^2 * diag(1, nrow = length(beta)),
                       log = TRUE)
        assert_that(length(out) == 1)
        out
    }
}
## for a diffuse normal prior, assume $d^2 = 10^2 = 100$
log_prior <- log_prior_constructor(10)

## Likelihood constructor
log_likelihood_constructor <- function(X, Y) {
    assert_that(is.matrix(X))
    assert_that(is.vector(Y))
    assert_that(nrow(X) == length(Y))
    ## Given data, return a likelihood function
    function(beta) {
        assert_that(is.vector(beta))
        ## Linear predictor
        lp <- as.numeric(X %*% beta)
        ## Probit link
        p  <- pnorm(q = lp)
        ## Independent Bernoulli outcome
        out <- sum((Y * log(p) + (1 - Y) * log(1 - p)))
        assert_that(length(out) == 1)
        out
    }
}
logL <- log_likelihood_constructor(X = as.matrix(data1[,1:4]), Y = data1$Y)

## Define unnormalized posterior (the same normalizing constant cancels when calculating the acceptance ratio)
log_posterior <- function(beta) {
    log_prior(beta) + logL(beta)
}

## Use MVN centered at current beta as proposal distribution (the proposal is symmetric, thus again we are using the Metropolis algorithm as in Lab 4)
## Random proposal generation function
proposal_generator <- function(beta, sigma) {
    assert_that(is.vector(beta))
    assert_that(is.matrix(sigma))
    out <- rmvnorm(n = 1,
                   mean = beta,
                   sigma = sigma)
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}
## Proposal evaluation function q(new_beta | beta)
log_proposal_density <- function(new_beta, beta, sigma) {
    assert_that(is.vector(beta))
    assert_that(is.matrix(sigma))
    out <- dmvnorm(x = new_beta,
                   mean = beta,
                   sigma = sigma,
                   log = TRUE)
    assert_that(length(out) == 1)
    out
}

## MH sampler
mh_sampler <- function(log_target_density, log_proposal_density, proposal_generator, init, R) {
    assert_that(is.function(log_target_density))
    assert_that(is.function(log_proposal_density))
    assert_that(is.function(proposal_generator))
    assert_that(is.vector(init))
    assert_that(is.numeric(init))
    assert_that(is.numeric(R))

    ## Initialize a vector
    x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
    ## Acceptance indicator (initialized with FALSE)
    a <- logical(length = R)
    ## Initial value (init is a vector of length(beta))
    x[1,] <- init

    for (r in seq_len(R)[-1]) {

        ## Obtain a proposed value conditioning on the previous value
        proposal <- proposal_generator(x[r-1,])

        ## Evaluate both densities at the proposed value
        log_target_at_proposal <- log_target_density(proposal)
        log_target_at_previous <- log_target_density(x[r-1,])

        ## This is assured to be [0,1], in fact the proposal density part will cancel due to symmetry
        log_acceptance_ratio <- min((log_target_at_proposal + log_proposal_density(x[r-1,], proposal)) -
                                    (log_target_at_previous + log_proposal_density(proposal, x[r-1,])),
                                    0)

        ## Failure detector
        if (is.na(log_acceptance_ratio)) {
            cat("Iteration", r, "\n")
            cat("Proposal", proposal, "\n")
            cat("log_target_at_proposal", log_target_at_proposal, "\n")
            cat("log_target_at_previous", log_target_at_previous, "\n")
            cat("log_proposal_density(x[r-1,], proposal)", log_proposal_density(x[r-1,], proposal), "\n")
            cat("log_proposal_density(proposal, x[r-1,])", log_proposal_density(proposal, x[r-1,]), "\n")
        }

        if (runif(n = 1) <= exp(log_acceptance_ratio)) {
            ## If U(0,1) is less than or equal to acceptance ratio
            ## Make a move
            x[r,] <- proposal
            a[r] <- TRUE
        } else {
            ## Otherwise, don't move
            x[r,] <- x[r-1,]
        }
    }

    data.frame(beta = x,
               a = a,
               r = seq_along(a))
}

## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
               family  = binomial(link = "probit"),
               data    = data1)
summary(glmfit1)
## 
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"), 
##     data = data1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5349  -0.3438  -0.0148   0.2000   3.7332  
## 
## Coefficients:
##    Estimate Std. Error z value Pr(>|z|)    
## X1  -1.1810     0.2747  -4.299 1.72e-05 ***
## X2   0.9515     0.5715   1.665   0.0959 .  
## X3   0.9217     0.5144   1.792   0.0731 .  
## X4   1.1028     0.1683   6.554 5.60e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.259  on 200  degrees of freedom
## Residual deviance:  92.109  on 196  degrees of freedom
## AIC: 100.11
## 
## Number of Fisher Scoring iterations: 7
## Bayesian model
out1 <- mh_sampler(log_target_density = log_posterior,
                   log_proposal_density = function(x_next, x) {log_proposal_density(x_next, x, sigma = 0.1 * vcov(glmfit1))},
                   proposal_generator = function(x) {proposal_generator(x, sigma = 0.1 * vcov(glmfit1))},
                   init = coef(glmfit1),
                   R = 10^4)

## Acceptance ratio
cat("### Acceance ratio\n")
## ### Acceance ratio
mean(out1$a)
## [1] 0.7594
cat("### Summary\n")
## ### Summary
summary(out1)
##      beta.1            beta.2            beta.3            beta.4           a                 r        
##  Min.   :-2.2576   Min.   :-0.6920   Min.   :-0.7922   Min.   :0.6081   Mode :logical   Min.   :    1  
##  1st Qu.:-1.3968   1st Qu.: 0.5590   1st Qu.: 0.5467   1st Qu.:1.0191   FALSE:2406      1st Qu.: 2501  
##  Median :-1.2000   Median : 0.9881   Median : 0.9131   Median :1.1417   TRUE :7594      Median : 5000  
##  Mean   :-1.2193   Mean   : 0.9945   Mean   : 0.9388   Mean   :1.1483   NA's :0         Mean   : 5000  
##  3rd Qu.:-1.0286   3rd Qu.: 1.4166   3rd Qu.: 1.3006   3rd Qu.:1.2706                   3rd Qu.: 7500  
##  Max.   :-0.4148   Max.   : 3.1923   Max.   : 3.1359   Max.   :2.0007                   Max.   :10000

Plotting

namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta
## [1] "beta.1" "beta.2" "beta.3" "beta.4"
out2 <- gather_(data = out1, key_col = "param", value_col = "value", gather_cols = namesBeta)


## Plot of chain
ggplot(data = out2, mapping = aes(x = r, y = value)) +
    geom_line(alpha = 1 / 5) +
    facet_grid(param ~ .) +
    theme_bw() + theme(legend.key = element_blank())

Now we implement the Gibbs sampler (with latent continuous data) for this Bank data

Set up model

The full conditional for \(\boldsymbol{\beta}\) is MVN.

## Random proposal generation function for beta
proposal_generator_beta <- function(data, z, sigma) {
    assert_that(is.matrix(data))
    assert_that(is.vector(z))
    assert_that(is.numeric(sigma))
    dataX <- data[,c(1:4)]
    out <- rmvnorm(n = 1,
                   mean = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX) %*% (t(dataX) %*% z),
                   sigma = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX))
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}

The full conditional for \(z_i\) is truncated normal.

## Random proposal generation function for z
proposal_generator_z <- function(data, beta) {
    assert_that(is.matrix(data))
    assert_that(is.vector(beta))
    out <- rep(NA, nrow(data))
    rtrunc <- function(row){    # Define the truncated normal sampler
      if (row[5] == 1){
        X = 0
        while (X <= 0){
          X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
        }
      }else{
        X = 0
        while (X >= 0){
          X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
        }
      }
      return(X)
    }
    out <- apply(data, 1, rtrunc)
    out <- as.vector(out)
    assert_that(is.vector(out))
    out
}

## Gibbs sampler
Gibbs_sampler <- function(proposal_generator_beta, proposal_generator_z, init, R) {
    assert_that(is.function(proposal_generator_beta))
    assert_that(is.function(proposal_generator_z))
    assert_that(is.vector(init))
    assert_that(is.numeric(init))
    assert_that(is.numeric(R))

    ## Initialize a vector
    x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
    z <- matrix(as.numeric(NA), nrow = R, ncol = nrow(data1matrix))
    ## Initial value (init is a vector of length(beta))
    x[1,] <- init

    for (r in seq_len(R)[-1]) {
        
        ## Obtain a proposed value for z
        z[r,] <- proposal_generator_z(x[r-1,]) 
        
        ## Obtain a proposed value for beta
        x[r,] <- proposal_generator_beta(z[r,])
    }

    data.frame(beta = x,
               #Z = z,
               r = seq_along(x[,1]))
}

## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
               family  = binomial(link = "probit"),
               data    = data1)
summary(glmfit1)
## 
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"), 
##     data = data1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5349  -0.3438  -0.0148   0.2000   3.7332  
## 
## Coefficients:
##    Estimate Std. Error z value Pr(>|z|)    
## X1  -1.1810     0.2747  -4.299 1.72e-05 ***
## X2   0.9515     0.5715   1.665   0.0959 .  
## X3   0.9217     0.5144   1.792   0.0731 .  
## X4   1.1028     0.1683   6.554 5.60e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 277.259  on 200  degrees of freedom
## Residual deviance:  92.109  on 196  degrees of freedom
## AIC: 100.11
## 
## Number of Fisher Scoring iterations: 7
## Bayesian model
out3 <- Gibbs_sampler(proposal_generator_beta = function(prev_z){proposal_generator_beta(data = data1matrix, z = prev_z, sigma = 10)}, proposal_generator_z= function(prev_beta) {proposal_generator_z(data = data1matrix, beta = prev_beta)},
                   init = coef(glmfit1),
                   R = 10^4)

cat("### Summary\n")
## ### Summary
summary(out3)
##      beta.1            beta.2            beta.3            beta.4             r        
##  Min.   :-2.1991   Min.   :-1.2446   Min.   :-1.0636   Min.   :0.5738   Min.   :    1  
##  1st Qu.:-1.3810   1st Qu.: 0.5536   1st Qu.: 0.5947   1st Qu.:1.0127   1st Qu.: 2501  
##  Median :-1.2041   Median : 0.9604   Median : 0.9510   Median :1.1260   Median : 5000  
##  Mean   :-1.2090   Mean   : 0.9674   Mean   : 0.9501   Mean   :1.1332   Mean   : 5000  
##  3rd Qu.:-1.0341   3rd Qu.: 1.3667   3rd Qu.: 1.3105   3rd Qu.:1.2445   3rd Qu.: 7500  
##  Max.   :-0.2347   Max.   : 3.2334   Max.   : 2.9147   Max.   :1.7286   Max.   :10000

Plotting

namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta
## [1] "beta.1" "beta.2" "beta.3" "beta.4"
out4 <- gather_(data = out3, key_col = "param", value_col = "value", gather_cols = namesBeta)


## Plot of chain
ggplot(data = out4, mapping = aes(x = r, y = value)) +
    geom_line(alpha = 1 / 5) +
    facet_grid(param ~ .) +
    theme_bw() + theme(legend.key = element_blank())

From the above results we can see that Gibbs sampler seems to have a better converge than Metropolis-Hastings algorighm, but it takes longer time for Gibbs due to the truncated normal sampler for \(z_i's\).