By Akihiko Nishimura for “Bayesian Methods II” (Biostat 140.763) at Johns Hopkins
rand_walk_metropolis <- function(eval_logp, init, n_iter, prop_sd, seed = NA) {
if (!is.na(seed)) { set.seed(seed) }
n_dim <- length(init)
samples <- matrix(NA, nrow = n_dim, ncol = n_iter + 1L)
logp_samples <- vector("numeric", n_iter + 1L)
n_accepted <- 0L
x <- init
logp <- eval_logp(x)
samples[, 1] <- init
logp_samples[1] <- logp
for (i in 1:n_iter) {
x_prop <- x + prop_sd * rnorm(n_dim)
logp_prop <- eval_logp(x_prop)
accepted <- logp_prop - logp > log(runif(1))
if (accepted) {
x <- x_prop
logp <- logp_prop
}
samples[, i + 1] <- x
logp_samples[i + 1] <- logp
n_accepted <- n_accepted + accepted
}
rownames(samples) <- paste0("param_", 1:n_dim)
return(list(
samples = samples,
logp = logp_samples,
accept_rate = n_accepted / n_iter
))
}d <- 16
iid_target_logp <- function(x) { - 0.5 * sum(x^2) }
# Pick theoretically optimal (for sufficiently large `d`) proposal sd
prop_sd <- 2.38 / sqrt(d)
init <- rep(0, d)
n_iter <- 10^5
rwm_output <- rand_walk_metropolis(
iid_target_logp, init, n_iter, prop_sd, seed = 1918
)ess_est <- list(
ar_proc = coda::effectiveSize(coord_samples)[[1]],
batch_means = mcmcse::ess(coord_samples, method = "bm"),
init_pos_seq = n_iter / mcmc::initseq(coord_samples)$var.pos
)
print(ess_est) ## $ar_proc
## [1] 2026.527
##
## $batch_means
## [1] 1885.564
##
## $init_pos_seq
## [1] 1937.285
## $ar_proc
## [1] 49.34551
##
## $batch_means
## [1] 53.03453
##
## $init_pos_seq
## [1] 51.61863
Consider a Gaussian target with covariance \(\Sigma_{ij} = \rho\) for all \(i \neq j\).
d <- 16
rho <- 0.9
Sigma <- (1 - rho) * diag(d) + rho * array(1., dim = c(d, d))
# Analytically invert `Sigma`
Prec <- 1 / (1 - rho) * (
diag(d) - rho / (1 - rho + rho * d) * array(1., dim = c(d, d))
)
cs_target_logp <- function(x) { - 0.5 * t(x) %*% Prec %*% x }
# Approximate, based on the compound symmetry structure, the optimal proposal sd
smaller_target_sd <- sqrt(min(eigen(Sigma)$values)) # only two unique eigenvalues
cs_prop_sd <- smaller_target_sd * 2.38 / sqrt(d)
init <- rep(0, d)
n_iter <- 10^5
rwm_output <- rand_walk_metropolis(
cs_target_logp, init, n_iter, cs_prop_sd, seed=1
)ess_est <- list(
ar_proc = coda::effectiveSize(coord_samples)[[1]],
batch_means = mcmcse::ess(coord_samples, method = "bm"),
init_pos_seq = n_iter / mcmc::initseq(coord_samples)$var.pos
)
print(ess_est) ## $ar_proc
## [1] 73.92964
##
## $batch_means
## [1] 47.39285
##
## $init_pos_seq
## [1] 24.41295
## $ar_proc
## [1] 1352.637
##
## $batch_means
## [1] 2110.023
##
## $init_pos_seq
## [1] 4096.186
rwm_output <- rand_walk_metropolis(
cs_target_logp, init, n_iter, cs_prop_sd, seed=615
)
coord_samples <- rwm_output$samples[1, ]
trace_plot(coord_samples, thin = 100L)ess_est <- list(
ar_proc = coda::effectiveSize(coord_samples)[[1]],
batch_means = mcmcse::ess(coord_samples, method = "bm"),
init_pos_seq = n_iter / mcmc::initseq(coord_samples)$var.pos
)
print(ess_est) ## $ar_proc
## [1] 38.80634
##
## $batch_means
## [1] 36.57983
##
## $init_pos_seq
## [1] 5.265385
set.seed(410)
init <- rnorm(d)
init[c(1, 2)] <- 3.5
n_iter <- 10^4
rwm_output <- rand_walk_metropolis(
cs_target_logp, init, n_iter, cs_prop_sd, seed=21205
)index <- c(1, 2)
target_density <- function(xy) {
mvtnorm::dmvnorm(xy, sigma = Sigma[index, index])
}
n_iter_to_plot <- 1000L
plot_trajectory(rwm_output$samples, index, n_iter_to_plot, target_density)n_iter <- 10^3
init <- .1
prop_sd <- .1
rwm_output <- rand_walk_metropolis(
unknown_target, init, n_iter, prop_sd, seed=1876
)
samples_1 <- rwm_output$samples
trace_plot(samples_1, ylim=param_range)init <- -.1
rwm_output <- rand_walk_metropolis(
unknown_target, init, n_iter, prop_sd, seed=1876
)
samples_2 <- rwm_output$samples
trace_plot(samples_2, ylim=param_range)## [1] 113.5144
n_iter <- 4000
prop_sd <- 1
init <- 1
rwm_output <- rand_walk_metropolis(
bimodal_target_logp, init, n_iter, prop_sd, seed=111
)
samples_1 <- rwm_output$samples
init <- -1
rwm_output <- rand_walk_metropolis(
bimodal_target_logp, init, n_iter, prop_sd, seed=222
)
samples_2 <- rwm_output$samplesquarter_samples_1 <- samples_1[1:ceiling(n_iter / 4)]
quarter_samples_2 <- samples_2[1:ceiling(n_iter / 4)]
trace_plot(quarter_samples_1, col = tableau10[1])
trace_plot(quarter_samples_2, col = tableau10[2], add = TRUE)combined_quarter_samples <- rbind(quarter_samples_1, quarter_samples_2)
c(posterior::rhat_basic(combined_quarter_samples),
posterior::rhat(combined_quarter_samples))## [1] 1.044920 1.066895
mu <- c(-1.5, -.5, .5, 1.5)
sigma <- c(.1, .04, .1, .1)
w <- c(.05, .1, .25, .6)
quadrimodal_target_logp <- function(x) {
log_dnorm_mixture(x, mu, sigma, w)
}
param_range <- c(-2, 2)prop_sd <- .1
n_iter <- 10^3
n_chain <- 4L
samples_list <- lapply(
1:n_chain,
function(chain_index) {
init <- mu[chain_index]
rwm_output <- rand_walk_metropolis(
quadrimodal_target_logp, init, n_iter, prop_sd, seed=chain_index
)
rwm_output$samples
}
)for (chain_index in 1:n_chain) {
trace_plot(
samples_list[[chain_index]],
col = tableau10[chain_index],
add = (chain_index > 1),
ylim = param_range
)
}true_density <- function(x) {
sapply(
1:length(x),
function (i) { exp(quadrimodal_target_logp(x[i])) }
)
}
hist_against_truth(
do.call(cbind, samples_list),
true_density, xlim = param_range, breaks = 201
)combined_samples <- do.call(rbind, samples_list)
c(posterior::rhat_basic(combined_samples),
posterior::rhat(combined_samples))## [1] 1.580313 1.311234
set.seed(955)
init_list <- MASS::mvrnorm(n_chain, mu = rep(0, d), Sigma = Sigma)
n_iter <- 10^4
samples_list <- lapply(
1:n_chain,
function(chain_index) {
init <- init_list[chain_index, ]
rwm_output <- rand_walk_metropolis(
cs_target_logp, init, n_iter, cs_prop_sd, seed=chain_index
)
rwm_output$samples
}
)param_index <- 1L
for (chain_index in 1:n_chain) {
trace_plot(
samples_list[[chain_index]][param_index, ],
col = tableau10[chain_index],
add = (chain_index > 1),
ylim = c(-3, 3), thin = 10L
)
}combined_samples <- do.call(rbind, samples_list)
c(posterior::rhat_basic(combined_samples),
posterior::rhat(combined_samples))## [1] 1.334540 1.332496