Mohammad Soleimani
Question 1
library(ggplot2)
library(tidyr)
library(gridExtra)
set.seed(170)
sample_size <- 200
intercept <- 0.1
coef_x1 <- 1.1
coef_x2 <- -0.9
create_logistic_data <- function(sample_size) {
x_var1 <- runif(sample_size, min = -2, max = 2)
x_var2 <- runif(sample_size, min = -2, max = 2)
linear_predictor <- intercept + coef_x1 * x_var1 + coef_x2 * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
return(binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
num_zeros <- sum(binary_data == 0)
num_ones <- sum(binary_data == 1)
summary_df <- data.frame(Outcome_Type = c("Zeros", "Ones"), Count = c(num_zeros, num_ones))
bar_plot <- ggplot(summary_df, aes(x = Outcome_Type, y = Count, fill = Outcome_Type)) +
geom_bar(stat = "identity", show.legend = FALSE) +
labs(title = "Distribution of Binary Outcomes (Zeros and Ones)",
x = "Outcome Type",
y = "Count") +
scale_fill_manual(values = c("Zeros" = "purple", "Ones" = "lightblue")) +
theme_minimal()
grid.arrange(bar_plot, tableGrob(summary_df), nrow = 2)

probabilities_df <- data.frame(Outcome = binary_data, Probabilities = 1 / (1 + exp(-(intercept + coef_x1 * runif(sample_size, -2, 2) + coef_x2 * runif(sample_size, -2, 2)))))
ggplot(probabilities_df, aes(x = Probabilities, fill = as.factor(Outcome))) +
geom_histogram(binwidth = 0.05, position = "dodge", alpha = 0.7) +
labs(title = "Histogram of Probabilities for Binary Outcomes",
x = "Probability", y = "Count", fill = "Outcome") +
scale_fill_manual(values = c("0" = "purple", "1" = "lightblue")) +
theme_minimal()

cat("Number of Zeros:", num_zeros, "\n")
## Number of Zeros: 100
cat("Number of Ones:", num_ones, "\n")
## Number of Ones: 100
Question 1 with change in intercept
library(ggplot2)
library(tidyr)
library(gridExtra)
set.seed(170)
sample_size <- 200
intercept <- 0.2
coef_x1 <- 1.1
coef_x2 <- -0.9
create_logistic_data <- function(sample_size) {
x_var1 <- runif(sample_size, min = -2, max = 2)
x_var2 <- runif(sample_size, min = -2, max = 2)
linear_predictor <- intercept + coef_x1 * x_var1 + coef_x2 * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
return(binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
num_zeros <- sum(binary_data == 0)
num_ones <- sum(binary_data == 1)
summary_df <- data.frame(Outcome_Type = c("Zeros", "Ones"), Count = c(num_zeros, num_ones))
bar_plot <- ggplot(summary_df, aes(x = Outcome_Type, y = Count, fill = Outcome_Type)) +
geom_bar(stat = "identity", show.legend = FALSE) +
labs(title = "Distribution of Binary Outcomes (Zeros and Ones)",
x = "Outcome Type",
y = "Count") +
scale_fill_manual(values = c("Zeros" = "purple", "Ones" = "lightblue")) +
theme_minimal()
grid.arrange(bar_plot, tableGrob(summary_df), nrow = 2)

Question 2: Phase 1
set.seed(170)
intercept <- 0.1
coef_x1 <- 1.1
coef_x2 <- -0.9
sample_size <- 200
create_logistic_data <- function(sample_size) {
x_var1 <- runif(sample_size, min = -2, max = 2)
x_var2 <- runif(sample_size, min = -2, max = 2)
linear_predictor <- intercept + coef_x1 * x_var1 + coef_x2 * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
return(data.frame(x_var1 = x_var1, x_var2 = x_var2, binary_outcomes = binary_outcomes))
}
binary_data <- create_logistic_data(sample_size)
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + beta[2] * data$x_var1 + beta[3] * data$x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
log_likelihood_value <- sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
return(log_likelihood_value)
}
mh_step <- function(current_beta, proposal_scale, data) {
proposed_beta <- current_beta + proposal_scale * rnorm(3) # Propose a new candidate
log_acceptance_ratio <- log_likelihood(proposed_beta, data) - log_likelihood(current_beta, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(beta = current_beta, accepted = 0)) # Reject the proposal if the log-likelihood is invalid
}
acceptance_prob <- min(1, exp(log_acceptance_ratio))
if (runif(1) < acceptance_prob) {
return(list(beta = proposed_beta, accepted = 1))
} else {
return(list(beta = current_beta, accepted = 0))
}
}
# Initialize values
num_iterations <- 20000
initial_beta <- c(0, 0, 0)
accepted_count <- 0
total_count <- 0
acceptance_rate_target <- 0.234
proposal_scale <- 1.5
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 3)
current_beta <- initial_beta
for (i in 1:num_iterations) {
step_result <- mh_step(current_beta, proposal_scale, binary_data)
current_beta <- step_result$beta
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
if (acceptance_rate < acceptance_rate_target) {
proposal_scale <- proposal_scale * 0.95
} else if (acceptance_rate > acceptance_rate_target) {
proposal_scale <- proposal_scale * 1.05
}
post_betas[i, ] <- current_beta
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.238
## Iteration: 2000 Acceptance rate: 0.239
## Iteration: 3000 Acceptance rate: 0.2356667
## Iteration: 4000 Acceptance rate: 0.238
## Iteration: 5000 Acceptance rate: 0.2336
## Iteration: 6000 Acceptance rate: 0.2383333
## Iteration: 7000 Acceptance rate: 0.2304286
## Iteration: 8000 Acceptance rate: 0.232125
## Iteration: 9000 Acceptance rate: 0.2358889
## Iteration: 10000 Acceptance rate: 0.2361
## Iteration: 11000 Acceptance rate: 0.231
## Iteration: 12000 Acceptance rate: 0.2333333
## Iteration: 13000 Acceptance rate: 0.2352308
## Iteration: 14000 Acceptance rate: 0.2337857
## Iteration: 15000 Acceptance rate: 0.2330667
## Iteration: 16000 Acceptance rate: 0.23575
## Iteration: 17000 Acceptance rate: 0.2335294
## Iteration: 18000 Acceptance rate: 0.2325
## Iteration: 19000 Acceptance rate: 0.236
## Iteration: 20000 Acceptance rate: 0.2339
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
par(mfrow = c(1, 3))
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
plot(post_betas[, 2], type = 'l', col = "blue", main = "Trace of β1", xlab = "Iteration", ylab = "β1")
abline(h = coef_x1, col = "red", lwd = 2)
plot(post_betas[, 3], type = 'l', col = "blue", main = "Trace of β2", xlab = "Iteration", ylab = "β2")
abline(h = coef_x2, col = "red", lwd = 2)

par(mfrow = c(1, 3))
hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2) # Posterior mean
abline(v = intercept, col = "blue", lwd = 2) # True value for β0
hist(post_betas_burned[, 2], main = "Posterior of β1", xlab = "β1", col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, 2]), col = "red", lwd = 2) # Posterior mean
abline(v = coef_x1, col = "blue", lwd = 2) # True value for β1
# Posterior of β2
hist(post_betas_burned[, 3], main = "Posterior of β2", xlab = "β2", col = "lightgreen", breaks = 50)
abline(v = mean(post_betas_burned[, 3]), col = "red", lwd = 2) # Posterior mean
abline(v = coef_x2, col = "blue", lwd = 2) # True value for β2

Question 2: Phase 2
library(ggplot2)
library(coda)
set.seed(170)
intercept <- 0.1
coef_x1 <- 1.1
coef_x2 <- -0.9
create_logistic_data <- function(sample_size) {
x_var1 <- runif(sample_size, min = -2, max = 2)
x_var2 <- runif(sample_size, min = -2, max = 2)
linear_predictor <- intercept + coef_x1 * x_var1 + coef_x2 * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
return(binary_outcomes)
}
binary_data <- create_logistic_data(200)
log_likelihood <- function(beta, binary_data) {
x_var1 <- runif(length(binary_data), min = -2, max = 2)
x_var2 <- runif(length(binary_data), min = -2, max = 2)
linear_predictor <- beta[1] + beta[2] * x_var1 + beta[3] * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
return(sum(binary_data * log(probabilities) + (1 - binary_data) * log(1 - probabilities)))
}
prior_log_density <- function(beta) {
return(-0.5 * sum(beta^2))
}
run_metropolis_hastings <- function(binary_data, iterations, initial_beta, proposal_scale) {
beta <- initial_beta
samples <- matrix(NA, nrow = iterations, ncol = length(beta))
for (i in 1:iterations) {
proposed_beta <- beta + proposal_scale * rnorm(length(beta))
current_log_likelihood <- log_likelihood(beta, binary_data) + prior_log_density(beta)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data) + prior_log_density(proposed_beta)
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
samples[i, ] <- beta
}
return(samples)
}
M <- 20
iterations <- 10000
initial_betas <- matrix(rnorm(M * 3), nrow = M, ncol = 3)
proposal_scale <- 1.5
chains <- lapply(1:M, function(i) run_metropolis_hastings(binary_data, iterations, initial_betas[i, ], proposal_scale))
combined_samples <- do.call(rbind, chains)
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
return(sqrt((B / num_iterations + W) / W))
}
# Gelman-Rubin Diagnostic Calculation
gelman_rubin_values <- numeric(iterations)
for (i in 1:iterations) {
if (i > 1) {
current_samples <- combined_samples[1:i, ]
gelman_rubin_values[i] <- gelman_rubin_stat(current_samples)
} else {
gelman_rubin_values[i] <- 1.1 # To avoid errors in the first iteration
}
}
plot(1:iterations, gelman_rubin_values, type = "l", col = "blue", xlab = "Iteration", ylab = "Gelman-Rubin Statistic", main = "Gelman-Rubin Diagnostic")
abline(h = 1.1, col = "red", lty = 2)
legend("topright", legend = c("Gelman-Rubin", "Convergence Threshold"), col = c("blue", "red"), lty = c(1, 2))

par(mfrow = c(3, 3))
for (i in 1:3) {
plot(combined_samples[, i], type = "l", main = paste("Trace Plot for Beta", i-1), xlab = "Iterations", ylab = paste("Beta", i-1))
}
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
par(mfrow = c(3, 1))

for (i in 1:3) {
hist(post_burnin_samples[, i], main = paste("Posterior Distribution of Beta", i-1), xlab = paste("Beta", i-1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, i]), col = "red", lwd = 2)
abline(v = c(0.1, 1.1, -0.9)[i], col = "blue", lwd = 2)
}

Combination of Phase 1 and 2
set.seed(170)
intercept <- 0.1
coef_x1 <- 1.1
coef_x2 <- -0.9
create_logistic_data <- function(sample_size) {
x_var1 <- runif(sample_size, min = -2, max = 2)
x_var2 <- runif(sample_size, min = -2, max = 2)
linear_predictor <- intercept + coef_x1 * x_var1 + coef_x2 * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
return(data.frame(x_var1 = x_var1, x_var2 = x_var2, binary_outcomes = binary_outcomes))
}
sample_size <- 200
binary_data <- create_logistic_data(sample_size)
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + beta[2] * data$x_var1 + beta[3] * data$x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
log_likelihood_value <- sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
return(log_likelihood_value)
}
mh_step <- function(current_beta, proposal_scale, data) {
proposed_beta <- current_beta + proposal_scale * rnorm(3) # Propose a new candidate
log_acceptance_ratio <- log_likelihood(proposed_beta, data) - log_likelihood(current_beta, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(beta = current_beta, accepted = 0)) # Reject the proposal if the log-likelihood is invalid
}
acceptance_prob <- min(1, exp(log_acceptance_ratio)) # Acceptance probability
# Accept or reject the proposed step
if (runif(1) < acceptance_prob) {
return(list(beta = proposed_beta, accepted = 1))
} else {
return(list(beta = current_beta, accepted = 0))
}
}
num_iterations <- 20000
initial_beta <- c(0, 0, 0) # Start with an initial guess for betas
accepted_count <- 0
total_count <- 0
acceptance_rate_target <- 0.234
proposal_scale <- 1.5 # Initial proposal scale (sigma)
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 3)
current_beta <- initial_beta
for (i in 1:num_iterations) {
step_result <- mh_step(current_beta, proposal_scale, binary_data)
current_beta <- step_result$beta
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
if (acceptance_rate < acceptance_rate_target) {
proposal_scale <- proposal_scale * 0.95 # Decrease step size if acceptance is low
} else if (acceptance_rate > acceptance_rate_target) {
proposal_scale <- proposal_scale * 1.05 # Increase step size if acceptance is high
}
post_betas[i, ] <- current_beta
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.238
## Iteration: 2000 Acceptance rate: 0.239
## Iteration: 3000 Acceptance rate: 0.2356667
## Iteration: 4000 Acceptance rate: 0.238
## Iteration: 5000 Acceptance rate: 0.2336
## Iteration: 6000 Acceptance rate: 0.2383333
## Iteration: 7000 Acceptance rate: 0.2304286
## Iteration: 8000 Acceptance rate: 0.232125
## Iteration: 9000 Acceptance rate: 0.2358889
## Iteration: 10000 Acceptance rate: 0.2361
## Iteration: 11000 Acceptance rate: 0.231
## Iteration: 12000 Acceptance rate: 0.2333333
## Iteration: 13000 Acceptance rate: 0.2352308
## Iteration: 14000 Acceptance rate: 0.2337857
## Iteration: 15000 Acceptance rate: 0.2330667
## Iteration: 16000 Acceptance rate: 0.23575
## Iteration: 17000 Acceptance rate: 0.2335294
## Iteration: 18000 Acceptance rate: 0.2325
## Iteration: 19000 Acceptance rate: 0.236
## Iteration: 20000 Acceptance rate: 0.2339
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
par(mfrow = c(1, 3))
# Trace plot for β0
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
# Trace plot for β1
plot(post_betas[, 2], type = 'l', col = "blue", main = "Trace of β1", xlab = "Iteration", ylab = "β1")
abline(h = coef_x1, col = "red", lwd = 2)
# Trace plot for β2
plot(post_betas[, 3], type = 'l', col = "blue", main = "Trace of β2", xlab = "Iteration", ylab = "β2")
abline(h = coef_x2, col = "red", lwd = 2)

par(mfrow = c(1, 3))
# Posterior of β0
hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2) # Posterior mean
abline(v = intercept, col = "blue", lwd = 2) # True value for β0
# Posterior of β1
hist(post_betas_burned[, 2], main = "Posterior of β1", xlab = "β1", col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, 2]), col = "red", lwd = 2) # Posterior mean
abline(v = coef_x1, col = "blue", lwd = 2) # True value for β1
# Posterior of β2
hist(post_betas_burned[, 3], main = "Posterior of β2", xlab = "β2", col = "lightgreen", breaks = 50)
abline(v = mean(post_betas_burned[, 3]), col = "red", lwd = 2) # Posterior mean
abline(v = coef_x2, col = "blue", lwd = 2) # True value for β2

# Phase 2: Gelman-Rubin Diagnostic and Multiple Chains
library(ggplot2)
library(coda)
# Function to calculate the log-likelihood
log_likelihood <- function(beta, binary_data) {
x_var1 <- runif(length(binary_data), min = -2, max = 2)
x_var2 <- runif(length(binary_data), min = -2, max = 2)
linear_predictor <- beta[1] + beta[2] * x_var1 + beta[3] * x_var2
probabilities <- 1 / (1 + exp(-linear_predictor))
return(sum(binary_data * log(probabilities) + (1 - binary_data) * log(1 - probabilities)))
}
# Function to calculate prior log density
prior_log_density <- function(beta) {
return(-0.5 * sum(beta^2))
}
# Run Metropolis-Hastings for multiple chains
run_metropolis_hastings <- function(binary_data, iterations, initial_beta, proposal_scale) {
beta <- initial_beta
samples <- matrix(NA, nrow = iterations, ncol = length(beta)) # Define the matrix to store the samples
for (i in 1:iterations) {
proposed_beta <- beta + proposal_scale * rnorm(length(beta))
current_log_likelihood <- log_likelihood(beta, binary_data) + prior_log_density(beta)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data) + prior_log_density(proposed_beta)
# Ensure the log likelihood is valid
if (is.na(current_log_likelihood) || is.na(proposed_log_likelihood) || is.infinite(current_log_likelihood) || is.infinite(proposed_log_likelihood)) {
acceptance_prob <- 0 # Reject the proposal if likelihood is invalid
} else {
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
}
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
samples[i, ] <- beta # Store the sample
}
return(samples) # Return the samples
}
# Set up the number of chains and iterations
M <- 20
iterations <- 10000
initial_betas <- matrix(rnorm(M * 3), nrow = M, ncol = 3)
proposal_scale <- 1.5
# Run the chains
chains <- lapply(1:M, function(i) run_metropolis_hastings(binary_data, iterations, initial_betas[i, ], proposal_scale))
# Combine the samples from all chains
combined_samples <- do.call(rbind, chains)
# Gelman-Rubin Diagnostic
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
return(sqrt((B / num_iterations + W) / W))
}
# Gelman-Rubin Diagnostic Calculation
gelman_rubin_values <- numeric(iterations)
for (i in 1:iterations) {
if (i > 1) {
current_samples <- combined_samples[1:i, ]
gelman_rubin_values[i] <- gelman_rubin_stat(current_samples)
} else {
gelman_rubin_values[i] <- 1.1 # To avoid errors in the first iteration
}
}
# Plot Gelman-Rubin diagnostic
plot(1:iterations, gelman_rubin_values, type = "l", col = "blue", xlab = "Iteration", ylab = "Gelman-Rubin Statistic", main = "Gelman-Rubin Diagnostic")
abline(h = 1.1, col = "red", lty = 2)
legend("topright", legend = c("Gelman-Rubin", "Convergence Threshold"), col = c("blue", "red"), lty = c(1, 2))
# Trace Plots
par(mfrow = c(3, 3))

for (i in 1:3) {
plot(combined_samples[, i], type = "l", main = paste("Trace Plot for Beta", i-1), xlab = "Iterations", ylab = paste("Beta", i-1))
}
# Burn-in period
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
# Posterior Distributions
par(mfrow = c(3, 1))

for (i in 1:3) {
hist(post_burnin_samples[, i], main = paste("Posterior Distribution of Beta", i-1), xlab = paste("Beta", i-1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, i]), col = "red", lwd = 2)
abline(v = c(0.1, 1.1, -0.9)[i], col = "blue", lwd = 2)
}

Question 3
library(ggplot2)
library(tidyr)
library(gridExtra)
library(coda)
set.seed(170)
sample_size <- 200
intercept <- 0.1
coef_x <- c(1.1, -0.9, 0.8, -0.7, 0.6, -0.5, 0.4, -0.3, 0.2)
create_logistic_data <- function(sample_size) {
x_vars <- replicate(length(coef_x), runif(sample_size, min = -2, max = 2))
linear_predictor <- intercept + rowSums(t(t(x_vars) * coef_x))
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
data.frame(x_vars, binary_outcomes = binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + as.matrix(data[, 1:9]) %*% beta[-1]
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
}
mh_step <- function(current_beta, proposal_scale, data) {
proposed_beta <- current_beta + proposal_scale * rnorm(length(current_beta))
log_acceptance_ratio <- log_likelihood(proposed_beta, data) - log_likelihood(current_beta, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(beta = current_beta, accepted = 0))
}
acceptance_prob <- min(1, exp(log_acceptance_ratio))
if (runif(1) < acceptance_prob) {
list(beta = proposed_beta, accepted = 1)
} else {
list(beta = current_beta, accepted = 0)
}
}
num_iterations <- 20000
initial_beta <- rep(0, 10)
accepted_count <- 0
total_count <- 0
acceptance_rate_target <- 0.234
proposal_scale <- 1.5
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 10)
current_beta <- initial_beta
for (i in 1:num_iterations) {
step_result <- mh_step(current_beta, proposal_scale, binary_data)
current_beta <- step_result$beta
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
if (acceptance_rate < acceptance_rate_target) {
proposal_scale <- proposal_scale * 0.95
} else if (acceptance_rate > acceptance_rate_target) {
proposal_scale <- proposal_scale * 1.05
}
post_betas[i, ] <- current_beta
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.235
## Iteration: 2000 Acceptance rate: 0.241
## Iteration: 3000 Acceptance rate: 0.2323333
## Iteration: 4000 Acceptance rate: 0.23225
## Iteration: 5000 Acceptance rate: 0.235
## Iteration: 6000 Acceptance rate: 0.2318333
## Iteration: 7000 Acceptance rate: 0.232
## Iteration: 8000 Acceptance rate: 0.235
## Iteration: 9000 Acceptance rate: 0.2378889
## Iteration: 10000 Acceptance rate: 0.2317
## Iteration: 11000 Acceptance rate: 0.231
## Iteration: 12000 Acceptance rate: 0.23325
## Iteration: 13000 Acceptance rate: 0.2351538
## Iteration: 14000 Acceptance rate: 0.2362143
## Iteration: 15000 Acceptance rate: 0.2333333
## Iteration: 16000 Acceptance rate: 0.2323125
## Iteration: 17000 Acceptance rate: 0.2342353
## Iteration: 18000 Acceptance rate: 0.234
## Iteration: 19000 Acceptance rate: 0.2335789
## Iteration: 20000 Acceptance rate: 0.23245
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
par(mfrow = c(3, 4))
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
for (j in 1:9) {
plot(post_betas[, j + 1], type = 'l', col = "blue", main = paste("Trace of β", j, sep = ""), xlab = "Iteration", ylab = paste("β", j, sep = ""))
abline(h = coef_x[j], col = "red", lwd = 2)
}
par(mfrow = c(3, 4))

hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2)
abline(v = intercept, col = "blue", lwd = 2)
for (j in 1:9) {
hist(post_betas_burned[, j + 1], main = paste("Posterior of β", j, sep = ""), xlab = paste("β", j, sep = ""), col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, j + 1]), col = "red", lwd = 2)
abline(v = coef_x[j], col = "blue", lwd = 2)
}
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
sqrt((B / num_iterations + W) / W)
}
M <- 20
iterations <- 10000
initial_betas <- matrix(rnorm(M * 10), nrow = M, ncol = 10)
proposal_scale <- 1.5
chains <- lapply(1:M, function(i) {
beta <- initial_betas[i, ]
samples <- matrix(NA, nrow = iterations, ncol = length(beta))
for (iter in 1:iterations) {
proposed_beta <- beta + proposal_scale * rnorm(length(beta))
current_log_likelihood <- log_likelihood(beta, binary_data)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data)
if (is.na(current_log_likelihood) || is.na(proposed_log_likelihood) || is.infinite(current_log_likelihood) || is.infinite(proposed_log_likelihood)) {
acceptance_prob <- 0
} else {
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
}
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
samples[iter, ] <- beta
}
samples
})
combined_samples <- do.call(rbind, chains)
par(mfrow = c(3, 4))

for (j in 1:10) {
plot(combined_samples[, j], type = "l", main = paste("Trace Plot for Beta", j - 1), xlab = "Iterations", ylab = paste("Beta", j - 1))
}
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
par(mfrow = c(3, 4))

for (j in 1:10) {
hist(post_burnin_samples[, j], main = paste("Posterior Distribution of Beta", j - 1), xlab = paste("Beta", j - 1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, j]), col = "red", lwd = 2)
abline(v = c(intercept, coef_x)[j], col = "blue", lwd = 2)
}

Question 4
library(ggplot2)
library(tidyr)
library(gridExtra)
library(coda)
set.seed(170)
# Logistic Regression Parameters
sample_size <- 200
intercept <- 0.1
coef_x <- c(1.1, -0.9, 0.8, -0.7, 0.6, -0.5, 0.4, -0.3, 0.2)
# Function to create logistic data
create_logistic_data <- function(sample_size) {
x_vars <- replicate(length(coef_x), runif(sample_size, min = -2, max = 2))
linear_predictor <- intercept + rowSums(t(t(x_vars) * coef_x))
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
data.frame(x_vars, binary_outcomes = binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
# Log-likelihood function for logistic regression
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + as.matrix(data[, 1:9]) %*% beta[-1]
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
}
# MH step function to propose a new beta and calculate acceptance
mh_step_conditional <- function(current_betas, proposal_scale, index, data) {
proposed_betas <- current_betas
proposed_betas[index] <- current_betas[index] + proposal_scale * rnorm(1)
log_acceptance_ratio <- log_likelihood(proposed_betas, data) - log_likelihood(current_betas, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(betas = current_betas, accepted = 0))
}
acceptance_prob <- min(1, exp(log_acceptance_ratio))
if (runif(1) < acceptance_prob) {
return(list(betas = proposed_betas, accepted = 1))
} else {
return(list(betas = current_betas, accepted = 0))
}
}
# Main loop to run MWG algorithm and adjust acceptance rate
num_iterations <- 20000
initial_betas <- rep(0, 10)
proposal_scale <- 1.5
acceptance_rate_target <- 0.15
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 10)
current_betas <- initial_betas
accepted_count <- 0
total_count <- 0
# MWG Sampling
for (i in 1:num_iterations) {
for (j in 1:10) { # Cycle through each beta (conditionals)
step_result <- mh_step_conditional(current_betas, proposal_scale, j, binary_data)
current_betas <- step_result$betas
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
}
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
if (acceptance_rate < acceptance_rate_target) {
proposal_scale <- proposal_scale * 0.95
} else if (acceptance_rate > acceptance_rate_target) {
proposal_scale <- proposal_scale * 1.05
}
post_betas[i, ] <- current_betas
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.1533
## Iteration: 2000 Acceptance rate: 0.15105
## Iteration: 3000 Acceptance rate: 0.1526
## Iteration: 4000 Acceptance rate: 0.1495
## Iteration: 5000 Acceptance rate: 0.15056
## Iteration: 6000 Acceptance rate: 0.15205
## Iteration: 7000 Acceptance rate: 0.1507143
## Iteration: 8000 Acceptance rate: 0.1498875
## Iteration: 9000 Acceptance rate: 0.1490444
## Iteration: 10000 Acceptance rate: 0.15104
## Iteration: 11000 Acceptance rate: 0.1490455
## Iteration: 12000 Acceptance rate: 0.1499667
## Iteration: 13000 Acceptance rate: 0.1503462
## Iteration: 14000 Acceptance rate: 0.1503357
## Iteration: 15000 Acceptance rate: 0.15054
## Iteration: 16000 Acceptance rate: 0.150475
## Iteration: 17000 Acceptance rate: 0.1492588
## Iteration: 18000 Acceptance rate: 0.1507389
## Iteration: 19000 Acceptance rate: 0.1506684
## Iteration: 20000 Acceptance rate: 0.15026
# Burn-in period (discarding first 5000 samples)
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
# Plotting Trace Plots for each β (posterior samples)
par(mfrow = c(3, 4))
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
for (j in 1:9) {
plot(post_betas[, j + 1], type = 'l', col = "blue", main = paste("Trace of β", j, sep = ""), xlab = "Iteration", ylab = paste("β", j, sep = ""))
abline(h = coef_x[j], col = "red", lwd = 2)
}
# Plotting Histograms for each β after burn-in period
par(mfrow = c(3, 4))

hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2)
abline(v = intercept, col = "blue", lwd = 2)
for (j in 1:9) {
hist(post_betas_burned[, j + 1], main = paste("Posterior of β", j, sep = ""), xlab = paste("β", j, sep = ""), col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, j + 1]), col = "red", lwd = 2)
abline(v = coef_x[j], col = "blue", lwd = 2)
}
# Gelman-Rubin diagnostic for convergence
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
sqrt((B / num_iterations + W) / W)
}
# Running multiple chains for Gelman-Rubin diagnostic
M <- 10
iterations <- 10000
initial_betas <- matrix(rnorm(M * 10), nrow = M, ncol = 10)
proposal_scale <- 1.5
# Running chains
chains <- lapply(1:M, function(i) {
beta <- initial_betas[i, ]
samples <- matrix(NA, nrow = iterations, ncol = length(beta))
for (iter in 1:iterations) {
for (j in 1:10) { # Cycle through each beta (conditionals)
proposed_beta <- beta
proposed_beta[j] <- beta[j] + proposal_scale * rnorm(1)
current_log_likelihood <- log_likelihood(beta, binary_data)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data)
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
}
samples[iter, ] <- beta
}
samples
})
# Combining all chains
combined_samples <- do.call(rbind, chains)
# Plotting trace plots for the combined samples (from multiple chains)
par(mfrow = c(3, 4))

for (j in 1:10) {
plot(combined_samples[, j], type = "l", main = paste("Trace Plot for Beta", j - 1), xlab = "Iterations", ylab = paste("Beta", j - 1))
}
# Burn-in period for multiple chains
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
# Plotting histograms for the posterior distributions of each β from multiple chains
par(mfrow = c(3, 4))

for (j in 1:10) {
hist(post_burnin_samples[, j], main = paste("Posterior Distribution of Beta", j - 1), xlab = paste("Beta", j - 1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, j]), col = "red", lwd = 2)
abline(v = c(intercept, coef_x)[j], col = "blue", lwd = 2)
}

Question 5
library(ggplot2)
library(tidyr)
library(gridExtra)
library(coda)
set.seed(170)
sample_size <- 200
intercept <- 0.1
coef_x <- c(1.1, -0.9, 0.8, -0.7, 0.6, -0.5, 0.4, -0.3, 0.2)
create_logistic_data <- function(sample_size) {
x_vars <- replicate(length(coef_x), runif(sample_size, min = -2, max = 2))
linear_predictor <- intercept + rowSums(t(t(x_vars) * coef_x))
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
data.frame(x_vars, binary_outcomes = binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + as.matrix(data[, 1:9]) %*% beta[-1]
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
}
mh_step <- function(current_beta, proposal_scale, data) {
proposed_beta <- current_beta + proposal_scale * rnorm(length(current_beta))
log_acceptance_ratio <- log_likelihood(proposed_beta, data) - log_likelihood(current_beta, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(beta = current_beta, accepted = 0))
}
acceptance_prob <- min(1, exp(log_acceptance_ratio))
if (runif(1) < acceptance_prob) {
list(beta = proposed_beta, accepted = 1)
} else {
list(beta = current_beta, accepted = 0)
}
}
num_iterations <- 20000
initial_beta <- rep(0, 10)
accepted_count <- 0
total_count <- 0
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 10)
current_beta <- initial_beta
acceptance_rate_target_10 <- 0.1
acceptance_rate_target_30 <- 0.3
proposal_scale_10 <- 1.5
proposal_scale_30 <- 3.0
for (i in 1:num_iterations) {
proposal_scale <- ifelse(runif(1) < 0.5, proposal_scale_10, proposal_scale_30)
step_result <- mh_step(current_beta, proposal_scale, binary_data)
current_beta <- step_result$beta
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
post_betas[i, ] <- current_beta
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.001
## Iteration: 2000 Acceptance rate: 0.001
## Iteration: 3000 Acceptance rate: 0.0006666667
## Iteration: 4000 Acceptance rate: 5e-04
## Iteration: 5000 Acceptance rate: 4e-04
## Iteration: 6000 Acceptance rate: 0.0003333333
## Iteration: 7000 Acceptance rate: 0.0002857143
## Iteration: 8000 Acceptance rate: 0.00025
## Iteration: 9000 Acceptance rate: 0.0002222222
## Iteration: 10000 Acceptance rate: 2e-04
## Iteration: 11000 Acceptance rate: 0.0001818182
## Iteration: 12000 Acceptance rate: 0.0001666667
## Iteration: 13000 Acceptance rate: 0.0001538462
## Iteration: 14000 Acceptance rate: 0.0001428571
## Iteration: 15000 Acceptance rate: 0.0001333333
## Iteration: 16000 Acceptance rate: 0.000125
## Iteration: 17000 Acceptance rate: 0.0001176471
## Iteration: 18000 Acceptance rate: 0.0001111111
## Iteration: 19000 Acceptance rate: 0.0001052632
## Iteration: 20000 Acceptance rate: 1e-04
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
par(mfrow = c(3, 4))
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
for (j in 1:9) {
plot(post_betas[, j + 1], type = 'l', col = "blue", main = paste("Trace of β", j, sep = ""), xlab = "Iteration", ylab = paste("β", j, sep = ""))
abline(h = coef_x[j], col = "red", lwd = 2)
}
par(mfrow = c(3, 4))

hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2)
abline(v = intercept, col = "blue", lwd = 2)
for (j in 1:9) {
hist(post_betas_burned[, j + 1], main = paste("Posterior of β", j, sep = ""), xlab = paste("β", j, sep = ""), col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, j + 1]), col = "red", lwd = 2)
abline(v = coef_x[j], col = "blue", lwd = 2)
}
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
sqrt((B / num_iterations + W) / W)
}
M <- 20
iterations <- 10000
initial_betas <- matrix(rnorm(M * 10), nrow = M, ncol = 10)
proposal_scale_10 <- 1.5
proposal_scale_30 <- 3.0
chains <- lapply(1:M, function(i) {
beta <- initial_betas[i, ]
samples <- matrix(NA, nrow = iterations, ncol = length(beta))
for (iter in 1:iterations) {
proposal_scale <- ifelse(runif(1) < 0.5, proposal_scale_10, proposal_scale_30)
proposed_beta <- beta + proposal_scale * rnorm(length(beta))
current_log_likelihood <- log_likelihood(beta, binary_data)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data)
if (is.na(current_log_likelihood) || is.na(proposed_log_likelihood) || is.infinite(current_log_likelihood) || is.infinite(proposed_log_likelihood)) {
acceptance_prob <- 0
} else {
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
}
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
samples[iter, ] <- beta
}
samples
})
combined_samples <- do.call(rbind, chains)
par(mfrow = c(3, 4))

for (j in 1:10) {
plot(combined_samples[, j], type = "l", main = paste("Trace Plot for Beta", j - 1), xlab = "Iterations", ylab = paste("Beta", j - 1))
}
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
par(mfrow = c(3, 4))

for (j in 1:10) {
hist(post_burnin_samples[, j], main = paste("Posterior Distribution of Beta", j - 1), xlab = paste("Beta", j - 1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, j]), col = "red", lwd = 2)
abline(v = c(intercept, coef_x)[j], col = "blue", lwd = 2)
}

Question 5 for M=20
library(ggplot2)
library(tidyr)
library(gridExtra)
library(coda)
set.seed(170)
sample_size <- 200
intercept <- 0.1
coef_x <- c(1.1, -0.9, 0.8, -0.7, 0.6, -0.5, 0.4, -0.3, 0.2)
create_logistic_data <- function(sample_size) {
x_vars <- replicate(length(coef_x), runif(sample_size, min = -2, max = 2))
linear_predictor <- intercept + rowSums(t(t(x_vars) * coef_x))
probabilities <- 1 / (1 + exp(-linear_predictor))
binary_outcomes <- rbinom(sample_size, size = 1, prob = probabilities)
data.frame(x_vars, binary_outcomes = binary_outcomes)
}
binary_data <- create_logistic_data(sample_size)
log_likelihood <- function(beta, data) {
linear_predictor <- beta[1] + as.matrix(data[, 1:9]) %*% beta[-1]
probabilities <- 1 / (1 + exp(-linear_predictor))
probabilities <- pmin(pmax(probabilities, 1e-10), 1 - 1e-10)
sum(data$binary_outcomes * log(probabilities) + (1 - data$binary_outcomes) * log(1 - probabilities))
}
mh_step <- function(current_beta, proposal_scale, data) {
proposed_beta <- current_beta + proposal_scale * rnorm(length(current_beta))
log_acceptance_ratio <- log_likelihood(proposed_beta, data) - log_likelihood(current_beta, data)
if (is.nan(log_acceptance_ratio) || is.infinite(log_acceptance_ratio)) {
return(list(beta = current_beta, accepted = 0))
}
acceptance_prob <- min(1, exp(log_acceptance_ratio))
if (runif(1) < acceptance_prob) {
list(beta = proposed_beta, accepted = 1)
} else {
list(beta = current_beta, accepted = 0)
}
}
num_iterations <- 20000
initial_beta <- rep(0, 10)
accepted_count <- 0
total_count <- 0
acceptance_rates <- numeric(num_iterations)
post_betas <- matrix(NA, nrow = num_iterations, ncol = 10)
current_beta <- initial_beta
proposal_scales <- c(0.1, 0.3) # For 10% and 30% acceptance rates
for (i in 1:num_iterations) {
proposal_scale <- sample(proposal_scales, 1) # Randomly select proposal scale
step_result <- mh_step(current_beta, proposal_scale, binary_data)
current_beta <- step_result$beta
accepted <- step_result$accepted
accepted_count <- accepted_count + accepted
total_count <- total_count + 1
acceptance_rate <- accepted_count / total_count
acceptance_rates[i] <- acceptance_rate
post_betas[i, ] <- current_beta
if (i %% 1000 == 0) {
cat("Iteration:", i, "Acceptance rate:", acceptance_rate, "\n")
}
}
## Iteration: 1000 Acceptance rate: 0.217
## Iteration: 2000 Acceptance rate: 0.2245
## Iteration: 3000 Acceptance rate: 0.221
## Iteration: 4000 Acceptance rate: 0.22
## Iteration: 5000 Acceptance rate: 0.2192
## Iteration: 6000 Acceptance rate: 0.2203333
## Iteration: 7000 Acceptance rate: 0.2237143
## Iteration: 8000 Acceptance rate: 0.2225
## Iteration: 9000 Acceptance rate: 0.225
## Iteration: 10000 Acceptance rate: 0.2265
## Iteration: 11000 Acceptance rate: 0.2251818
## Iteration: 12000 Acceptance rate: 0.2218333
## Iteration: 13000 Acceptance rate: 0.2209231
## Iteration: 14000 Acceptance rate: 0.2223571
## Iteration: 15000 Acceptance rate: 0.2228667
## Iteration: 16000 Acceptance rate: 0.223375
## Iteration: 17000 Acceptance rate: 0.2233529
## Iteration: 18000 Acceptance rate: 0.2221667
## Iteration: 19000 Acceptance rate: 0.2206316
## Iteration: 20000 Acceptance rate: 0.2201
burn_in <- 5000
post_betas_burned <- post_betas[(burn_in + 1):num_iterations, ]
par(mfrow = c(3, 4))
plot(post_betas[, 1], type = 'l', col = "blue", main = "Trace of β0", xlab = "Iteration", ylab = "β0")
abline(h = intercept, col = "red", lwd = 2)
for (j in 1:9) {
plot(post_betas[, j + 1], type = 'l', col = "blue", main = paste("Trace of β", j, sep = ""), xlab = "Iteration", ylab = paste("β", j, sep = ""))
abline(h = coef_x[j], col = "red", lwd = 2)
}
par(mfrow = c(3, 4))

hist(post_betas_burned[, 1], main = "Posterior of β0", xlab = "β0", col = "yellow", breaks = 50)
abline(v = mean(post_betas_burned[, 1]), col = "red", lwd = 2)
abline(v = intercept, col = "blue", lwd = 2)
for (j in 1:9) {
hist(post_betas_burned[, j + 1], main = paste("Posterior of β", j, sep = ""), xlab = paste("β", j, sep = ""), col = "lightblue", breaks = 50)
abline(v = mean(post_betas_burned[, j + 1]), col = "red", lwd = 2)
abline(v = coef_x[j], col = "blue", lwd = 2)
}
gelman_rubin_stat <- function(samples) {
num_chains <- nrow(samples)
num_iterations <- ncol(samples)
chain_means <- apply(samples, 2, mean)
overall_mean <- mean(chain_means)
B <- num_iterations * var(chain_means)
W <- mean(apply(samples, 2, var))
sqrt((B / num_iterations + W) / W)
}
M <- 20
iterations <- 10000
initial_betas <- matrix(rnorm(M * 10), nrow = M, ncol = 10)
proposal_scales <- c(0.1, 0.3)
chains <- lapply(1:M, function(i) {
beta <- initial_betas[i, ]
samples <- matrix(NA, nrow = iterations, ncol = length(beta))
for (iter in 1:iterations) {
proposal_scale <- sample(proposal_scales, 1) # Randomly select proposal scale
proposed_beta <- beta + proposal_scale * rnorm(length(beta))
current_log_likelihood <- log_likelihood(beta, binary_data)
proposed_log_likelihood <- log_likelihood(proposed_beta, binary_data)
if (is.na(current_log_likelihood) || is.na(proposed_log_likelihood) || is.infinite(current_log_likelihood) || is.infinite(proposed_log_likelihood)) {
acceptance_prob <- 0
} else {
acceptance_prob <- min(1, exp(proposed_log_likelihood - current_log_likelihood))
}
if (runif(1) < acceptance_prob) {
beta <- proposed_beta
}
samples[iter, ] <- beta
}
samples
})
combined_samples <- do.call(rbind, chains)
par(mfrow = c(3, 4))

for (j in 1:10) {
plot(combined_samples[, j], type = "l", main = paste("Trace Plot for Beta", j - 1), xlab = "Iterations", ylab = paste("Beta", j - 1))
}
burn_in <- 5000
post_burnin_samples <- combined_samples[burn_in:iterations, ]
par(mfrow = c(3, 4))

for (j in 1:10) {
hist(post_burnin_samples[, j], main = paste("Posterior Distribution of Beta", j - 1), xlab = paste("Beta", j - 1), col = "lightyellow", breaks = 30)
abline(v = mean(post_burnin_samples[, j]), col = "red", lwd = 2)
abline(v = c(intercept, coef_x)[j], col = "blue", lwd = 2)
}
