ST2195 Part 1(a) - R Implementation

library(ggplot2)  # For plotting histogram and KDE

# Log of the target density f(x) = 0.5 * exp(-|x|)
log_f <- function(x) {
  log(0.5) - abs(x)
}

# Random Walk Metropolis function
random_walk_metropolis <- function(N, s, x0 = 0) {
  samples <- numeric(N + 1)  # Create vector to store samples
  samples[1] <- x0  # Initial value
  for (i in 2:(N + 1)) {  # Loop from 1 to N
    x_current <- samples[i - 1]  # Current sample
    x_star <- rnorm(1, mean = x_current, sd = s)  # Propose new value from Normal
    log_r <- log_f(x_star) - log_f(x_current)  # Log acceptance ratio
    u <- runif(1, min = 0, max = 1)  # Uniform random number
    if (log(u) < log_r) {  # Accept if condition true
      samples[i] <- x_star
    } else {  # Reject, stay at current
      samples[i] <- x_current
    }
  }
  return(samples[2:(N + 1)])  # Return x1 to xN
}

# Run Simulation
N <- 10000  # Number of samples
s <- 1      # Proposal standard deviation
set.seed(42) # For reproducibility (same results every time)
samples <- random_walk_metropolis(N, s)

mean_mc <- mean(samples)  # Monte Carlo mean estimate
sd_mc <- sd(samples)  # Monte Carlo SD estimate
cat(sprintf("Sample Mean: %.4f\n", mean_mc))  # Should be close to 0

## Sample Mean: -0.0006

cat(sprintf("Sample Standard Deviation: %.4f\n", sd_mc))  # Should be close to 1.4142

## Sample Standard Deviation: 1.3479

# FINAL CLEAN PLOT CHUNK - ZERO ERRORS, ZERO WARNINGS

# Prepare data
df_samples <- data.frame(x = samples)
x_vals <- seq(-10, 10, length.out = 1000)
df_true <- data.frame(x = x_vals, y = 0.5 * exp(-abs(x_vals)))

# Plot
ggplot(df_samples, aes(x = x)) +
  geom_histogram(aes(y = after_stat(density)), 
                 bins = 50, fill = "skyblue", color = "black", alpha = 0.7) +
  geom_density(color = "red", linewidth = 1) +
  geom_line(data = df_true, aes(x = x, y = y), 
            color = "black", linewidth = 1) +
  labs(title = "Histogram, KDE, and True Density (N=10000, s=1)",
       x = "x", y = "Density") +
  coord_cartesian(xlim = c(-10, 10)) +
  theme_minimal()

ggsave("part1a_plot_R.png")

## Saving 7 x 5 in image

# === PART 1(b): R-hat convergence diagnostic function ===
compute_rhat <- function(N, s, J, initial_values) {
  chains <- lapply(initial_values, function(iv) random_walk_metropolis(N, s, iv))
  
  Mj <- sapply(chains, mean)
  Vj <- sapply(chains, function(chain) mean((chain - mean(chain))^2))  # as per assignment (N denominator)
  
  W <- mean(Vj)
  M <- mean(Mj)
  B <- mean((Mj - M)^2)
  
  R <- (B + W) / W          # EXACT formula from the assignment
  return(R)
}

# === PART 1(b) RESULTS AND PLOT ===

N <- 2000
J <- 4
initial_values <- c(-10, -5, 5, 10)
set.seed(42)

# 1. R for s = 0.001 (as required)
r_fixed <- compute_rhat(N, 0.001, J, initial_values)
cat("R-hat for s=0.001 (N=2000, J=4):", round(r_fixed, 4), "\n")

## R-hat for s=0.001 (N=2000, J=4): 203808.7

# 2. Grid of s values and R-hat plot
s_grid <- exp(seq(log(0.001), log(1), length.out = 20))
r_values <- sapply(s_grid, function(s) compute_rhat(N, s, J, initial_values))

df_r <- data.frame(s = s_grid, R = r_values)

ggplot(df_r, aes(x = s, y = R)) +
  geom_point(color = "blue", size = 2) +
  geom_line(color = "blue") +
  scale_x_log10() +
  geom_hline(yintercept = 1.05, color = "red", linetype = "dashed", linewidth = 1) +
  labs(title = "R-hat vs s (N=2000, J=4)",
       x = "s (log scale)",
       y = "R-hat") +
  theme_minimal()

ggsave("part1b_plot_R.png")

## Saving 7 x 5 in image