```
library(magrittr)
library(dplyr)
library(tidyr)
library(mvtnorm)
library(assertthat)
library(tidyr)
library(ggplot2)
data1 <- read.table(file = "./bank", sep = "")
names(data1) <- c("X1","X2","X3","X4","Y")
head(data1)
## X1 X2 X3 X4 Y
## 1 214.8 131.0 131.1 9.0 0
## 2 214.6 129.7 129.7 8.1 0
## 3 214.8 129.7 129.7 8.7 0
## 4 214.8 129.7 129.6 7.5 0
## 5 215.0 129.6 129.7 10.4 0
## 6 215.7 130.8 130.5 9.0 0
data1matrix <- as.matrix(data1)
## Prior constructor
log_prior_constructor <- function(sigma) {
assert_that(length(sigma) == 1)
assert_that(is.numeric(sigma))
## Given sigma, return a prior function
function(beta) {
assert_that(is.vector(beta))
out <- dmvnorm(x = beta,
mean = rep(0, length(beta)),
sigma = sigma^2 * diag(1, nrow = length(beta)),
log = TRUE)
assert_that(length(out) == 1)
out
}
}
## for a diffuse normal prior, assume $d^2 = 10^2 = 100$
log_prior <- log_prior_constructor(10)
## Likelihood constructor
log_likelihood_constructor <- function(X, Y) {
assert_that(is.matrix(X))
assert_that(is.vector(Y))
assert_that(nrow(X) == length(Y))
## Given data, return a likelihood function
function(beta) {
assert_that(is.vector(beta))
## Linear predictor
lp <- as.numeric(X %*% beta)
## Probit link
p <- pnorm(q = lp)
## Independent Bernoulli outcome
out <- sum((Y * log(p) + (1 - Y) * log(1 - p)))
assert_that(length(out) == 1)
out
}
}
logL <- log_likelihood_constructor(X = as.matrix(data1[,1:4]), Y = data1$Y)
## Define unnormalized posterior (the same normalizing constant cancels when calculating the acceptance ratio)
log_posterior <- function(beta) {
log_prior(beta) + logL(beta)
}
## Use MVN centered at current beta as proposal distribution (the proposal is symmetric, thus again we are using the Metropolis algorithm as in Lab 4)
## Random proposal generation function
proposal_generator <- function(beta, sigma) {
assert_that(is.vector(beta))
assert_that(is.matrix(sigma))
out <- rmvnorm(n = 1,
mean = beta,
sigma = sigma)
out <- as.vector(out)
assert_that(is.vector(out))
out
}
## Proposal evaluation function q(new_beta | beta)
log_proposal_density <- function(new_beta, beta, sigma) {
assert_that(is.vector(beta))
assert_that(is.matrix(sigma))
out <- dmvnorm(x = new_beta,
mean = beta,
sigma = sigma,
log = TRUE)
assert_that(length(out) == 1)
out
}
## MH sampler
mh_sampler <- function(log_target_density, log_proposal_density, proposal_generator, init, R) {
assert_that(is.function(log_target_density))
assert_that(is.function(log_proposal_density))
assert_that(is.function(proposal_generator))
assert_that(is.vector(init))
assert_that(is.numeric(init))
assert_that(is.numeric(R))
## Initialize a vector
x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
## Acceptance indicator (initialized with FALSE)
a <- logical(length = R)
## Initial value (init is a vector of length(beta))
x[1,] <- init
for (r in seq_len(R)[-1]) {
## Obtain a proposed value conditioning on the previous value
proposal <- proposal_generator(x[r-1,])
## Evaluate both densities at the proposed value
log_target_at_proposal <- log_target_density(proposal)
log_target_at_previous <- log_target_density(x[r-1,])
## This is assured to be [0,1], in fact the proposal density part will cancel due to symmetry
log_acceptance_ratio <- min((log_target_at_proposal + log_proposal_density(x[r-1,], proposal)) -
(log_target_at_previous + log_proposal_density(proposal, x[r-1,])),
0)
## Failure detector
if (is.na(log_acceptance_ratio)) {
cat("Iteration", r, "\n")
cat("Proposal", proposal, "\n")
cat("log_target_at_proposal", log_target_at_proposal, "\n")
cat("log_target_at_previous", log_target_at_previous, "\n")
cat("log_proposal_density(x[r-1,], proposal)", log_proposal_density(x[r-1,], proposal), "\n")
cat("log_proposal_density(proposal, x[r-1,])", log_proposal_density(proposal, x[r-1,]), "\n")
}
if (runif(n = 1) <= exp(log_acceptance_ratio)) {
## If U(0,1) is less than or equal to acceptance ratio
## Make a move
x[r,] <- proposal
a[r] <- TRUE
} else {
## Otherwise, don't move
x[r,] <- x[r-1,]
}
}
data.frame(beta = x,
a = a,
r = seq_along(a))
}
## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
family = binomial(link = "probit"),
data = data1)
summary(glmfit1)
##
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"),
## data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5349 -0.3438 -0.0148 0.2000 3.7332
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## X1 -1.1810 0.2747 -4.299 1.72e-05 ***
## X2 0.9515 0.5715 1.665 0.0959 .
## X3 0.9217 0.5144 1.792 0.0731 .
## X4 1.1028 0.1683 6.554 5.60e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 277.259 on 200 degrees of freedom
## Residual deviance: 92.109 on 196 degrees of freedom
## AIC: 100.11
##
## Number of Fisher Scoring iterations: 7
## Bayesian model
out1 <- mh_sampler(log_target_density = log_posterior,
log_proposal_density = function(x_next, x) {log_proposal_density(x_next, x, sigma = 0.1 * vcov(glmfit1))},
proposal_generator = function(x) {proposal_generator(x, sigma = 0.1 * vcov(glmfit1))},
init = coef(glmfit1),
R = 10^4)
## Acceptance ratio
cat("### Acceance ratio\n")
## ### Acceance ratio
mean(out1$a)
## [1] 0.7594
cat("### Summary\n")
## ### Summary
summary(out1)
## beta.1 beta.2 beta.3 beta.4 a r
## Min. :-2.2576 Min. :-0.6920 Min. :-0.7922 Min. :0.6081 Mode :logical Min. : 1
## 1st Qu.:-1.3968 1st Qu.: 0.5590 1st Qu.: 0.5467 1st Qu.:1.0191 FALSE:2406 1st Qu.: 2501
## Median :-1.2000 Median : 0.9881 Median : 0.9131 Median :1.1417 TRUE :7594 Median : 5000
## Mean :-1.2193 Mean : 0.9945 Mean : 0.9388 Mean :1.1483 NA's :0 Mean : 5000
## 3rd Qu.:-1.0286 3rd Qu.: 1.4166 3rd Qu.: 1.3006 3rd Qu.:1.2706 3rd Qu.: 7500
## Max. :-0.4148 Max. : 3.1923 Max. : 3.1359 Max. :2.0007 Max. :10000
namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta
## [1] "beta.1" "beta.2" "beta.3" "beta.4"
out2 <- gather_(data = out1, key_col = "param", value_col = "value", gather_cols = namesBeta)
## Plot of chain
ggplot(data = out2, mapping = aes(x = r, y = value)) +
geom_line(alpha = 1 / 5) +
facet_grid(param ~ .) +
theme_bw() + theme(legend.key = element_blank())
## Random proposal generation function for beta
proposal_generator_beta <- function(data, z, sigma) {
assert_that(is.matrix(data))
assert_that(is.vector(z))
assert_that(is.numeric(sigma))
dataX <- data[,c(1:4)]
out <- rmvnorm(n = 1,
mean = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX) %*% (t(dataX) %*% z),
sigma = solve(sigma^{-2} * diag(ncol(dataX)) + t(dataX) %*% dataX))
out <- as.vector(out)
assert_that(is.vector(out))
out
}
## Random proposal generation function for z
proposal_generator_z <- function(data, beta) {
assert_that(is.matrix(data))
assert_that(is.vector(beta))
out <- rep(NA, nrow(data))
rtrunc <- function(row){ # Define the truncated normal sampler
if (row[5] == 1){
X = 0
while (X <= 0){
X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
}
}else{
X = 0
while (X >= 0){
X = rnorm(n = 1, row[c(1:4)] %*% beta, 1)
}
}
return(X)
}
out <- apply(data, 1, rtrunc)
out <- as.vector(out)
assert_that(is.vector(out))
out
}
## Gibbs sampler
Gibbs_sampler <- function(proposal_generator_beta, proposal_generator_z, init, R) {
assert_that(is.function(proposal_generator_beta))
assert_that(is.function(proposal_generator_z))
assert_that(is.vector(init))
assert_that(is.numeric(init))
assert_that(is.numeric(R))
## Initialize a vector
x <- matrix(as.numeric(NA), nrow = R, ncol = length(init))
z <- matrix(as.numeric(NA), nrow = R, ncol = nrow(data1matrix))
## Initial value (init is a vector of length(beta))
x[1,] <- init
for (r in seq_len(R)[-1]) {
## Obtain a proposed value for z
z[r,] <- proposal_generator_z(x[r-1,])
## Obtain a proposed value for beta
x[r,] <- proposal_generator_beta(z[r,])
}
data.frame(beta = x,
#Z = z,
r = seq_along(x[,1]))
}
## Fit Frequentist model for initial value and covariance matrix
glmfit1 <- glm(formula = Y ~ -1 + X1 + X2 + X3 + X4,
family = binomial(link = "probit"),
data = data1)
summary(glmfit1)
##
## Call:
## glm(formula = Y ~ -1 + X1 + X2 + X3 + X4, family = binomial(link = "probit"),
## data = data1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5349 -0.3438 -0.0148 0.2000 3.7332
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## X1 -1.1810 0.2747 -4.299 1.72e-05 ***
## X2 0.9515 0.5715 1.665 0.0959 .
## X3 0.9217 0.5144 1.792 0.0731 .
## X4 1.1028 0.1683 6.554 5.60e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 277.259 on 200 degrees of freedom
## Residual deviance: 92.109 on 196 degrees of freedom
## AIC: 100.11
##
## Number of Fisher Scoring iterations: 7
## Bayesian model
out3 <- Gibbs_sampler(proposal_generator_beta = function(prev_z){proposal_generator_beta(data = data1matrix, z = prev_z, sigma = 10)}, proposal_generator_z= function(prev_beta) {proposal_generator_z(data = data1matrix, beta = prev_beta)},
init = coef(glmfit1),
R = 10^4)
cat("### Summary\n")
## ### Summary
summary(out3)
## beta.1 beta.2 beta.3 beta.4 r
## Min. :-2.1991 Min. :-1.2446 Min. :-1.0636 Min. :0.5738 Min. : 1
## 1st Qu.:-1.3810 1st Qu.: 0.5536 1st Qu.: 0.5947 1st Qu.:1.0127 1st Qu.: 2501
## Median :-1.2041 Median : 0.9604 Median : 0.9510 Median :1.1260 Median : 5000
## Mean :-1.2090 Mean : 0.9674 Mean : 0.9501 Mean :1.1332 Mean : 5000
## 3rd Qu.:-1.0341 3rd Qu.: 1.3667 3rd Qu.: 1.3105 3rd Qu.:1.2445 3rd Qu.: 7500
## Max. :-0.2347 Max. : 3.2334 Max. : 2.9147 Max. :1.7286 Max. :10000
namesBeta <- Filter(f = function(elt) {grepl("beta", elt)}, x = names(out1))
namesBeta
## [1] "beta.1" "beta.2" "beta.3" "beta.4"
out4 <- gather_(data = out3, key_col = "param", value_col = "value", gather_cols = namesBeta)
## Plot of chain
ggplot(data = out4, mapping = aes(x = r, y = value)) +
geom_line(alpha = 1 / 5) +
facet_grid(param ~ .) +
theme_bw() + theme(legend.key = element_blank())
From the above results we can see that Gibbs sampler seems to have a better converge than Metropolis-Hastings algorighm, but it takes longer time for Gibbs due to the truncated normal sampler for \(z_i's\).