# Load necessary library
library(MASS) # for multivariate Gaussian sampling
library(tidyverse)
initialize_params <- function(K) {
list(mu = matrix(0, nrow=K, ncol=1),
Sigma = array(diag(1), dim=c(1, 1, K)),
success_count = numeric(K),
failure_count = numeric(K))
}
# Initialize parameters
run_one_time <- function(K, N, effect_size){
# Number of samples
# uniform_threshold <- 6 # Number of rounds for uniform random selection
params_weighted <- initialize_params(K)
params_traditional <- initialize_params(K)
# effect of contextual factor:
effect_cont <- 0.2
# Initialize the true success rate for each arm
#true_success_rate <- c(seq(0.15, 0.5, length.out = K-1),0.6)
true_success_rate <- c(abs(rnorm(K-1, 0.5, 0.05)),0.5 + effect_size)
# calculating delta for later determination of regret
p_star <- max(true_success_rate)
delta <- p_star - true_success_rate
regret_traditional <- 0 # pre-set a number for it
regret_weighted <- 0
# Initialize reward counters
total_reward_weighted <- numeric(K)
total_reward_traditional <- numeric(K)
# Initialize table for storing results
results_table <- tibble(
Metric = c("Total Reward", paste("Arm", 1:K)),
Weighted = numeric(K + 1),
Traditional = numeric(K + 1)
)
# Simulation
for (t in 1:N) {
# Generate a context (here, just a constant 1 for demonstration)
x_t <- rbinom(1, 1, 0.25)
selected_arm_weighted <- 0
selected_arm_traditional <- 0
if (t <= K) {
# Select arm uniformly at random for both algorithms
selected_arm_weighted <- t #sample(1:K, 1)
selected_arm_traditional <- t #selected_arm_weighted # Use the same arm for fair comparison
} else {
p_weighted <- numeric(K)
p_traditional <- numeric(K)
for (k in 1:K) {
# Sample from the posterior for both algorithms
w_tilde_k_weighted <- mvrnorm(1, params_weighted$mu[k], params_weighted$Sigma[,,k])
w_tilde_k_traditional <- mvrnorm(1, params_traditional$mu[k], params_traditional$Sigma[,,k])
# Compute probability of success using logistic function
p_k_weighted <- 1 / (1 + exp(-x_t * w_tilde_k_weighted * effect_cont - w_tilde_k_weighted))
p_k_traditional <- 1 / (1 + exp(-x_t * w_tilde_k_traditional * effect_cont - w_tilde_k_weighted))
# Compute the weight based on success and failure rate for the weighted algorithm
total_count <- params_weighted$success_count[k] + params_weighted$failure_count[k] + 1
success_rate <- params_weighted$success_count[k] / total_count
failure_rate <- params_weighted$failure_count[k] / total_count
weight_k <- (1 + success_rate) * (1 - failure_rate)
# Weight the computed probability for the weighted algorithm
p_weighted[k] <- p_k_weighted * weight_k
# Use the unweighted probability for traditional TS
p_traditional[k] <- p_k_traditional
}
# Select the arm with the highest weighted/unweighted probability
selected_arm_weighted <- which.max(p_weighted)
selected_arm_traditional <- which.max(p_traditional)
# updating regret here:
regret_weighted = regret_weighted + delta[selected_arm_weighted]
regret_traditional = regret_traditional + delta[selected_arm_traditional]
}
# Simulate the reward based on the true success rate
y_weighted <- rbinom(1, 1, true_success_rate[selected_arm_weighted])
y_traditional <- rbinom(1, 1, true_success_rate[selected_arm_traditional])
# Update total rewards
total_reward_weighted[selected_arm_weighted] <- total_reward_weighted[selected_arm_weighted] + y_weighted
total_reward_traditional[selected_arm_traditional] <- total_reward_traditional[selected_arm_traditional] + y_traditional
# Update the success and failure counts for both algorithms
params_weighted$success_count[selected_arm_weighted] <- params_weighted$success_count[selected_arm_weighted] + y_weighted
params_weighted$failure_count[selected_arm_weighted] <- params_weighted$failure_count[selected_arm_weighted] + (1 - y_weighted)
params_traditional$success_count[selected_arm_traditional] <- params_traditional$success_count[selected_arm_traditional] + y_traditional
params_traditional$failure_count[selected_arm_traditional] <- params_traditional$failure_count[selected_arm_traditional] + (1 - y_traditional)
# A LaPlace posterior update (with learning rate adjustable) (the full update is way too costly to run with R setting...)
# and following a gradient descent updating with a pre-specified learning rate.
if (t > K){
learning_rate = 0.1
# Update for the weighted algorithm
error_weighted = y_weighted - p_weighted[selected_arm_weighted] # Error in prediction
params_weighted$mu[selected_arm_weighted] = params_weighted$mu[selected_arm_weighted] + learning_rate * error_weighted
# Update for the traditional algorithm
error_traditional = y_traditional - p_traditional[selected_arm_traditional] # Error in prediction
params_traditional$mu[selected_arm_traditional] = params_traditional$mu[selected_arm_traditional] + learning_rate * error_traditional
}
# if (y_weighted == 1) {
# params_weighted$mu[selected_arm_weighted] <- params_weighted$mu[selected_arm_weighted] + 0.1
# } else {
# params_weighted$mu[selected_arm_weighted] <- params_weighted$mu[selected_arm_weighted] - 0.1
# }
#
# if (y_traditional == 1) {
# params_traditional$mu[selected_arm_traditional] <- params_traditional$mu[selected_arm_traditional] + 0.1
# } else {
# params_traditional$mu[selected_arm_traditional] <- params_traditional$mu[selected_arm_traditional] - 0.1
# }
# Update total rewards in the table
results_table$Weighted[1] <- sum(total_reward_weighted)
results_table$Weighted[-1] <- total_reward_weighted
results_table$Traditional[1] <- sum(total_reward_traditional)
results_table$Traditional[-1] <- total_reward_traditional
}
# Print out the final total rewards for verification
return(list(avg_traditional_reward = sum(total_reward_traditional)/N,
avg_weighted_reward = sum(total_reward_weighted)/N,
avg_traditional_regret = regret_traditional/N,
avg_weighted_regret = regret_weighted/N))
}Simulation for WAPTS WEBCONF
Traditional Contextual Bandit Simulation:
#run_one_time(20,300, 0.3)Key function: simulating for n times
simulation_n_times <- function(K, N, effect_size, num_sim = 1000){
traditional_rewards <- c()
weighted_rewards <- c()
traditional_regrets <- c()
weighted_regrets <- c()
for (i in 1:num_sim){
data <- run_one_time(K,N,effect_size)
traditional_rewards <- append(traditional_rewards, data$avg_traditional_reward)
weighted_rewards <- append(weighted_rewards, data$avg_weighted_reward)
traditional_regrets <- append(traditional_regrets, data$avg_traditional_regret)
weighted_regrets <- append(weighted_regrets, data$avg_weighted_regret)
}
return(list(traditional_rewards = traditional_rewards,
weighted_rewards = weighted_rewards,
traditional_regrets = traditional_regrets,
weighted_regrets = weighted_regrets))
}#simulation_n_times(10,200,0.05, num_sim = 10)Plotting:
# Global Setting:
sample_sizes = c(300)
num_arms = 2:30
effect_sizes = c(0, 0.1, 0.25, 0.4)
num_sim = 10
# set 10 for demo...1000 takes too much time...but make sure to use 1000 when creating the chart, for online sharing use 10 so that people can run it...
# please feel free to let me know if you encouter any issues...For Average Reward:
# metric = 'reward'
results = tibble(
effect_size = numeric(),
num_arm = numeric(),
traditional_reward = numeric(),
weighted_reward = numeric(),
traditional_lower = numeric(),
traditional_upper = numeric(),
weighted_lower = numeric(),
weighted_upper = numeric(),
sample_size = numeric()
)
for(sample_size in sample_sizes) {
for (effect_size in effect_sizes) {
for (k in num_arms){
data = simulation_n_times(k, sample_size, effect_size, num_sim = num_sim)
t <- data$traditional_rewards
w <- data$weighted_rewards
results <- results |> add_row(
effect_size = effect_size,
num_arm = k,
traditional_reward = mean(t),
weighted_reward = mean(w),
traditional_lower = quantile(t,probs = 0.025),
traditional_upper = quantile(t,probs = 0.975),
weighted_lower = quantile(w,probs = 0.025),
weighted_upper = quantile(w,probs = 0.975),
sample_size = sample_size
)
}}}
# results |>
# ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
# geom_smooth() +
# scale_color_brewer(palette = "Set1", name = "Effect Size") +
# ylab(metric) +
# ggtitle(paste('Plot of', metric, 'with respect to sample size'))results_long <- results |>
pivot_longer(cols = starts_with("traditional_") | starts_with("weighted_"),
names_to = "metric_policy", values_to = "value") |>
separate(metric_policy, into = c("policy", "metric"), sep = "_") |>
pivot_wider(names_from = "metric", values_from = "value") |>
mutate(policy = if_else(policy == "traditional", "Traditional", "Weighted"))
# Ensure the policy column is a factor for consistent coloring
results_long$policy <- factor(results_long$policy, levels = c("Traditional", "Weighted"))
results_long <- results_long |>
mutate(policy = recode(policy,
"Traditional" = "CTS",
"Weighted" = "WAPTS"),
effect_size_label = case_when(
effect_size == 0 ~ "No Effect Size (0)",
effect_size == 0.1 ~ "Small Effect Size (0.1)",
effect_size == 0.25 ~ "Medium Effect Size (0.25)",
effect_size == 0.4 ~ "Large Effect Size (0.4)",
TRUE ~ as.character(effect_size) # Fallback in case there are unexpected values
)) |>
mutate(effect_size_label = factor(effect_size_label,
levels = c("No Effect Size (0)",
"Small Effect Size (0.1)",
"Medium Effect Size (0.25)",
"Large Effect Size (0.4)")))# plot
ggplot(data = results_long, aes(x = num_arm, y = reward, color = policy)) +
geom_point(size = 0.5) +
geom_smooth(se = TRUE, aes(fill = policy), alpha = 0.25) +
#geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2
#geom_ribbon(aes(ymin = lower, ymax = upper, fill = policy), alpha = 0.2) +
facet_wrap(~effect_size_label,
scales = "free",
labeller = labeller(effect_size_label = identity)) +
scale_color_manual(values = c("CTS" = "blue", "WAPTS" = "red")) +
labs(y = "Reward", x = "Number of Arms", color = "policy") +
ggtitle("Comparison of Rewards CTS vs. WAPTS, sample size = 300") +
theme_minimal()For average Regret:
# metric = 'regret'
results_2 = tibble(
effect_size = numeric(),
num_arm = numeric(),
traditional_regret = numeric(),
weighted_regret = numeric(),
traditional_lower = numeric(),
traditional_upper = numeric(),
weighted_lower = numeric(),
weighted_upper = numeric(),
sample_size = numeric()
)
for(sample_size in sample_sizes) {
for (effect_size in effect_sizes) {
for (k in num_arms){
data = simulation_n_times(k, sample_size, effect_size, num_sim = num_sim)
t <- data$traditional_regrets
w <- data$weighted_regrets
results_2 <- results_2 |> add_row(
effect_size = effect_size,
num_arm = k,
traditional_regret = mean(t),
weighted_regret = mean(w),
traditional_lower = quantile(t,probs = 0.025),
traditional_upper = quantile(t,probs = 0.975),
weighted_lower = quantile(w,probs = 0.025),
weighted_upper = quantile(w,probs = 0.975),
sample_size = sample_size
)
}}}results_2_long <- results_2 |>
pivot_longer(cols = starts_with("traditional_") | starts_with("weighted_"),
names_to = "metric_policy", values_to = "value") |>
separate(metric_policy, into = c("policy", "metric"), sep = "_") |>
pivot_wider(names_from = "metric", values_from = "value") |>
mutate(policy = if_else(policy == "traditional", "Traditional", "Weighted"))
# Ensure the policy column is a factor for consistent coloring
results_2_long$policy <- factor(results_2_long$policy, levels = c("Traditional", "Weighted"))
results_2_long <- results_2_long |>
mutate(policy = recode(policy,
"Traditional" = "CTS",
"Weighted" = "WAPTS"),
effect_size_label = case_when(
effect_size == 0 ~ "No Effect Size (0)",
effect_size == 0.1 ~ "Small Effect Size (0.1)",
effect_size == 0.25 ~ "Medium Effect Size (0.25)",
effect_size == 0.4 ~ "Large Effect Size (0.4)",
TRUE ~ as.character(effect_size) # Fallback in case there are unexpected values
)) |>
mutate(effect_size_label = factor(effect_size_label,
levels = c("No Effect Size (0)",
"Small Effect Size (0.1)",
"Medium Effect Size (0.25)",
"Large Effect Size (0.4)")))# plot
ggplot(data = results_2_long, aes(x = num_arm, y = regret, color = policy)) +
geom_point(size = 0.5) +
geom_smooth(se = TRUE, aes(fill = policy), alpha = 0.25) +
#geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2
#geom_ribbon(aes(ymin = lower, ymax = upper, fill = policy), alpha = 0.2) +
facet_wrap(~effect_size_label,
scales = "free",
labeller = labeller(effect_size_label = identity)) +
scale_color_manual(values = c("CTS" = "blue", "WAPTS" = "red")) +
labs(y = "Regret", x = "Number of Arms", color = "policy") +
ggtitle("Comparison of Regrets CTS vs. WAPTS, sample size = 300") +
theme_minimal()