Simulation for WAPTS WEBCONF

Traditional Contextual Bandit Simulation:

# Load necessary library
library(MASS)  # for multivariate Gaussian sampling
library(tidyverse)

initialize_params <- function(K) {
    list(mu = matrix(0, nrow=K, ncol=1),
         Sigma = array(diag(1), dim=c(1, 1, K)),
         success_count = numeric(K),
         failure_count = numeric(K))
  }

# Initialize parameters
run_one_time <- function(K, N, effect_size){
  # Number of samples
  # uniform_threshold <- 6  # Number of rounds for uniform random selection
  params_weighted <- initialize_params(K)
  params_traditional <- initialize_params(K)
  
  # effect of contextual factor:
  effect_cont <- 0.2
  # Initialize the true success rate for each arm
  #true_success_rate <- c(seq(0.15, 0.5, length.out = K-1),0.6)
  true_success_rate <- c(abs(rnorm(K-1, 0.5, 0.05)),0.5 + effect_size)
  
  # calculating delta for later determination of regret
  p_star <- max(true_success_rate)
  delta <- p_star - true_success_rate
  regret_traditional <- 0 # pre-set a number for it
  regret_weighted <- 0
  
  # Initialize reward counters
  total_reward_weighted <- numeric(K)
  total_reward_traditional <- numeric(K)
  
  # Initialize table for storing results
  results_table <- tibble(
    Metric = c("Total Reward", paste("Arm", 1:K)),
    Weighted = numeric(K + 1),
    Traditional = numeric(K + 1)
  )
  
  # Simulation
  for (t in 1:N) {
    # Generate a context (here, just a constant 1 for demonstration)
    x_t <- rbinom(1, 1, 0.25)
    
    selected_arm_weighted <- 0
    selected_arm_traditional <- 0
    
    if (t <= K) {
      # Select arm uniformly at random for both algorithms
      selected_arm_weighted <- t #sample(1:K, 1)
      selected_arm_traditional <- t #selected_arm_weighted  # Use the same arm for fair comparison
    } else {
      p_weighted <- numeric(K)
      p_traditional <- numeric(K)
      
      for (k in 1:K) {
        # Sample from the posterior for both algorithms
        w_tilde_k_weighted <- mvrnorm(1, params_weighted$mu[k], params_weighted$Sigma[,,k])
        w_tilde_k_traditional <- mvrnorm(1, params_traditional$mu[k], params_traditional$Sigma[,,k])
        
        # Compute probability of success using logistic function
        p_k_weighted <- 1 / (1 + exp(-x_t * w_tilde_k_weighted * effect_cont - w_tilde_k_weighted))
        p_k_traditional <- 1 / (1 + exp(-x_t * w_tilde_k_traditional * effect_cont - w_tilde_k_weighted))
        
        # Compute the weight based on success and failure rate for the weighted algorithm
        total_count <- params_weighted$success_count[k] + params_weighted$failure_count[k] + 1
        success_rate <- params_weighted$success_count[k] / total_count
        failure_rate <- params_weighted$failure_count[k] / total_count
        weight_k <- (1 + success_rate) * (1 - failure_rate)
        
        # Weight the computed probability for the weighted algorithm
        p_weighted[k] <- p_k_weighted * weight_k
        
        # Use the unweighted probability for traditional TS
        p_traditional[k] <- p_k_traditional
      }
      
      # Select the arm with the highest weighted/unweighted probability
      selected_arm_weighted <- which.max(p_weighted)
      selected_arm_traditional <- which.max(p_traditional)
      
      # updating regret here:
      regret_weighted = regret_weighted + delta[selected_arm_weighted]
      regret_traditional = regret_traditional + delta[selected_arm_traditional]
    }
    
    # Simulate the reward based on the true success rate
    y_weighted <- rbinom(1, 1, true_success_rate[selected_arm_weighted])
    y_traditional <- rbinom(1, 1, true_success_rate[selected_arm_traditional])
    
    # Update total rewards
    total_reward_weighted[selected_arm_weighted] <- total_reward_weighted[selected_arm_weighted] + y_weighted
    total_reward_traditional[selected_arm_traditional] <- total_reward_traditional[selected_arm_traditional] + y_traditional
    
    # Update the success and failure counts for both algorithms
    params_weighted$success_count[selected_arm_weighted] <- params_weighted$success_count[selected_arm_weighted] + y_weighted
    params_weighted$failure_count[selected_arm_weighted] <- params_weighted$failure_count[selected_arm_weighted] + (1 - y_weighted)
    
    params_traditional$success_count[selected_arm_traditional] <- params_traditional$success_count[selected_arm_traditional] + y_traditional
    params_traditional$failure_count[selected_arm_traditional] <- params_traditional$failure_count[selected_arm_traditional] + (1 - y_traditional)
    
    # A LaPlace posterior update (with learning rate adjustable) (the full update is way too costly to run with R setting...)
    # and following a gradient descent updating with a pre-specified learning rate.
    if (t > K){
      learning_rate = 0.1
      # Update for the weighted algorithm
      error_weighted = y_weighted - p_weighted[selected_arm_weighted]  # Error in prediction
      params_weighted$mu[selected_arm_weighted] = params_weighted$mu[selected_arm_weighted] + learning_rate * error_weighted
      
      # Update for the traditional algorithm
      error_traditional = y_traditional - p_traditional[selected_arm_traditional]  # Error in prediction
      params_traditional$mu[selected_arm_traditional] = params_traditional$mu[selected_arm_traditional] + learning_rate * error_traditional 
    }
    
    
    # if (y_weighted == 1) {
    #   params_weighted$mu[selected_arm_weighted] <- params_weighted$mu[selected_arm_weighted] + 0.1
    # } else {
    #   params_weighted$mu[selected_arm_weighted] <- params_weighted$mu[selected_arm_weighted] - 0.1
    # }
    # 
    # if (y_traditional == 1) {
    #   params_traditional$mu[selected_arm_traditional] <- params_traditional$mu[selected_arm_traditional] + 0.1
    # } else {
    #   params_traditional$mu[selected_arm_traditional] <- params_traditional$mu[selected_arm_traditional] - 0.1
    # }
    # Update total rewards in the table
    results_table$Weighted[1] <- sum(total_reward_weighted)
    results_table$Weighted[-1] <- total_reward_weighted
    
    results_table$Traditional[1] <- sum(total_reward_traditional)
    results_table$Traditional[-1] <- total_reward_traditional
  }
  
  # Print out the final total rewards for verification
  
  return(list(avg_traditional_reward = sum(total_reward_traditional)/N,
              avg_weighted_reward = sum(total_reward_weighted)/N,
              avg_traditional_regret = regret_traditional/N,
              avg_weighted_regret = regret_weighted/N))
}
#run_one_time(20,300, 0.3)

Key function: simulating for n times

simulation_n_times <- function(K, N, effect_size, num_sim = 1000){
  traditional_rewards <- c()
  weighted_rewards <- c()
  traditional_regrets <- c()
  weighted_regrets <- c()
  
  for (i in 1:num_sim){
    data <- run_one_time(K,N,effect_size)
    
    traditional_rewards <- append(traditional_rewards, data$avg_traditional_reward)
    weighted_rewards <- append(weighted_rewards, data$avg_weighted_reward)
    
    traditional_regrets <- append(traditional_regrets, data$avg_traditional_regret)
    weighted_regrets <- append(weighted_regrets, data$avg_weighted_regret)
    
  }
  return(list(traditional_rewards = traditional_rewards,
              weighted_rewards = weighted_rewards,
              traditional_regrets = traditional_regrets,
              weighted_regrets = weighted_regrets))
}
#simulation_n_times(10,200,0.05, num_sim = 10)

Plotting:

# Global Setting:
sample_sizes = c(300) 
num_arms = 2:30
effect_sizes = c(0, 0.1, 0.25, 0.4)
num_sim = 10 
# set 10 for demo...1000 takes too much time...but make sure to use 1000 when creating the chart, for online sharing use 10 so that people can run it...
# please feel free to let me know if you encouter any issues...

For Average Reward:

# metric = 'reward'

results = tibble(
  effect_size = numeric(),
  num_arm = numeric(),
  traditional_reward = numeric(),
  weighted_reward = numeric(),
  traditional_lower = numeric(), 
  traditional_upper = numeric(),
  weighted_lower = numeric(), 
  weighted_upper = numeric(), 
  sample_size = numeric()
)


for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    for (k in num_arms){
      data = simulation_n_times(k, sample_size, effect_size, num_sim = num_sim)
      t <- data$traditional_rewards
      w <- data$weighted_rewards
      results <- results |> add_row(
        effect_size = effect_size,
        num_arm = k,
        traditional_reward = mean(t),
        weighted_reward = mean(w),
        traditional_lower = quantile(t,probs = 0.025),
        traditional_upper = quantile(t,probs = 0.975),
        weighted_lower = quantile(w,probs = 0.025),
        weighted_upper = quantile(w,probs = 0.975),
        sample_size = sample_size
      )
    }}}

# results |> 
#   ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
#   geom_smooth() +
#   scale_color_brewer(palette = "Set1", name = "Effect Size") + 
#   ylab(metric) +
#   ggtitle(paste('Plot of', metric, 'with respect to sample size'))
results_long <- results |>
  pivot_longer(cols = starts_with("traditional_") | starts_with("weighted_"),
               names_to = "metric_policy", values_to = "value") |>
  separate(metric_policy, into = c("policy", "metric"), sep = "_") |>
  pivot_wider(names_from = "metric", values_from = "value") |>
  mutate(policy = if_else(policy == "traditional", "Traditional", "Weighted")) 

# Ensure the policy column is a factor for consistent coloring
results_long$policy <- factor(results_long$policy, levels = c("Traditional", "Weighted"))

results_long <- results_long |>
  mutate(policy = recode(policy, 
                         "Traditional" = "CTS", 
                         "Weighted" = "WAPTS"),
         effect_size_label = case_when(
           effect_size == 0 ~ "No Effect Size (0)",
           effect_size == 0.1 ~ "Small Effect Size (0.1)",
           effect_size == 0.25 ~ "Medium Effect Size (0.25)",
           effect_size == 0.4 ~ "Large Effect Size (0.4)",
           TRUE ~ as.character(effect_size)  # Fallback in case there are unexpected values
         )) |>
  mutate(effect_size_label = factor(effect_size_label,
                                    levels = c("No Effect Size (0)", 
                                               "Small Effect Size (0.1)", 
                                               "Medium Effect Size (0.25)", 
                                               "Large Effect Size (0.4)")))
# plot
ggplot(data = results_long, aes(x = num_arm, y = reward, color = policy)) +
  geom_point(size = 0.5) +
  geom_smooth(se = TRUE, aes(fill = policy), alpha = 0.25) + 
  #geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2
  #geom_ribbon(aes(ymin = lower, ymax = upper, fill = policy), alpha = 0.2) +
  facet_wrap(~effect_size_label, 
             scales = "free", 
             labeller = labeller(effect_size_label = identity)) +  
  scale_color_manual(values = c("CTS" = "blue", "WAPTS" = "red")) + 
  labs(y = "Reward", x = "Number of Arms", color = "policy") +
  ggtitle("Comparison of Rewards CTS vs. WAPTS, sample size = 300") +
  theme_minimal()

For average Regret:

# metric = 'regret'

results_2 = tibble(
  effect_size = numeric(),
  num_arm = numeric(),
  traditional_regret = numeric(),
  weighted_regret = numeric(),
  traditional_lower = numeric(), 
  traditional_upper = numeric(),
  weighted_lower = numeric(), 
  weighted_upper = numeric(), 
  sample_size = numeric()
)


for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    for (k in num_arms){
      data = simulation_n_times(k, sample_size, effect_size, num_sim = num_sim)
      t <- data$traditional_regrets
      w <- data$weighted_regrets
      results_2 <- results_2 |> add_row(
        effect_size = effect_size,
        num_arm = k,
        traditional_regret = mean(t),
        weighted_regret = mean(w),
        traditional_lower = quantile(t,probs = 0.025),
        traditional_upper = quantile(t,probs = 0.975),
        weighted_lower = quantile(w,probs = 0.025),
        weighted_upper = quantile(w,probs = 0.975),
        sample_size = sample_size
      )
    }}}
results_2_long <- results_2 |>
  pivot_longer(cols = starts_with("traditional_") | starts_with("weighted_"),
               names_to = "metric_policy", values_to = "value") |>
  separate(metric_policy, into = c("policy", "metric"), sep = "_") |>
  pivot_wider(names_from = "metric", values_from = "value") |>
  mutate(policy = if_else(policy == "traditional", "Traditional", "Weighted")) 

# Ensure the policy column is a factor for consistent coloring
results_2_long$policy <- factor(results_2_long$policy, levels = c("Traditional", "Weighted"))

results_2_long <- results_2_long |>
  mutate(policy = recode(policy, 
                         "Traditional" = "CTS", 
                         "Weighted" = "WAPTS"),
         effect_size_label = case_when(
           effect_size == 0 ~ "No Effect Size (0)",
           effect_size == 0.1 ~ "Small Effect Size (0.1)",
           effect_size == 0.25 ~ "Medium Effect Size (0.25)",
           effect_size == 0.4 ~ "Large Effect Size (0.4)",
           TRUE ~ as.character(effect_size)  # Fallback in case there are unexpected values
         )) |>
  mutate(effect_size_label = factor(effect_size_label,
                                    levels = c("No Effect Size (0)", 
                                               "Small Effect Size (0.1)", 
                                               "Medium Effect Size (0.25)", 
                                               "Large Effect Size (0.4)")))
# plot
ggplot(data = results_2_long, aes(x = num_arm, y = regret, color = policy)) +
  geom_point(size = 0.5) +
  geom_smooth(se = TRUE, aes(fill = policy), alpha = 0.25) + 
  #geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2
  #geom_ribbon(aes(ymin = lower, ymax = upper, fill = policy), alpha = 0.2) +
  facet_wrap(~effect_size_label, 
             scales = "free", 
             labeller = labeller(effect_size_label = identity)) +  
  scale_color_manual(values = c("CTS" = "blue", "WAPTS" = "red")) + 
  labs(y = "Regret", x = "Number of Arms", color = "policy") +
  ggtitle("Comparison of Regrets CTS vs. WAPTS, sample size = 300") +
  theme_minimal()