Simulation on magnitude of AP test

Simulation code base for L@S

Let’s simulate this on a simple example

Considerations of what kind of plot to give:

https://towardsdatascience.com/a-comparison-of-bandit-algorithms-24b4adfcabb

Multi-arm cases (baseline):

TS_multiarm:

TS_multi <- function(success_rates, n, burnin = 1, batch = 1) {
  # by default let's make sure the pb is the correct arm
  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  K = length(success_rates)
  # add delta:
  p_star <- max(success_rates)
  delta <- p_star - success_rates
  outcomes = list()
  for (k in 1:K){
    outcomes[[paste("arm", k, sep = "_")]] = rbinom(n, 1, success_rates[k])
  }

  arm_successes <- array(0, dim=c(K, n))
  arm_failures <- array(0, dim=c(K, n))
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_s <- rep(0, K)
  arm_f <- rep(0, K)

  # History that tracks if there the correct arm is allocated
  # ex. [1,0,1,1,0,1] 1 indicates the correct arm was allocated for the trial, 0 indicates a wrong arm is allocated, we'll make the assumption that only the last arm is correct
  abl <- c()
  history <- c()
  regret = 0
  
  AP = c()
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {
    
    draw_arms <- runif(K) # randomly drawing one arm out of all arms
    arm_chosen <- which.max(draw_arms) # settle down which arm to pick
    
    if(arm_chosen == K){
      abl[i] <- 1
    }
    else{
      abl[i] <- 0
    }
    history[i] <- outcomes[[paste("arm", arm_chosen, sep = "_")]][i]

    if (history[i] == 1) {
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1}
    else {
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1
      }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_successes[ , i] <- arm_s
    arm_failures[ , i] <- arm_f
    regret = regret + delta[arm_chosen]
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # CASE 1: Posterior probability of arms a & b is updated
    if (m == 0) {
      draw_arms <- rbeta(K, arm_successes[, i - 1] + 1, arm_failures[, i - 1] + 1)
      
      # AP_i just for place holder for now, not really calculating it
      AP_i <- 0
      AP <- c(AP, AP_i)
    }
    # CASE 2: Posterior probability of arms a & b is not updated
    else {
      draw_arms <- rbeta(K, arm_successes[, i - m] + 1, arm_failures[, i - m] + 1)
        
    # Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
    }
    # select the chosen arm to choose
    arm_chosen <- which.max(draw_arms)
    
    # adding on to the correct allocation rate
    if(arm_chosen == K){
      abl[i] <- 1
    }
    else{
      abl[i] <- 0
    }
    
    # record history of the success/failure of that select arm
    history[i] <- outcomes[[paste("arm", arm_chosen, sep = "_")]][i]
    
    
    # Update arm a & b history (abl), successes, and failures
    if (history[i] == 1) {
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1  # Update success count for chosen arm
    } else {
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1  # Update failure count for chosen arm
    }
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_successes[ , i] <- arm_s
    arm_failures[ , i] <- arm_f
    regret = regret + delta[arm_chosen]
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  
  # todo: adding in the calculation on Wald_score:
  final_success <- arm_successes[,n]
  final_failure <- arm_failures[,n]
  
  Wald_score <- c()
  
  p_optimal <- 0 
  try(p_optimal <- final_success[K]/(final_failure[K] + final_success[K]))
  for (k in 1:(K-1)){
    if (final_success[k] == 0 & final_failure[k] == 0){
      p_arm <- 0
      p_avg <- p_optimal
    }
    else{
      p_arm <- final_success[k]/(final_failure[k] + final_success[k])
      p_avg <- (final_success[k] + final_success[K])/(final_failure[k] + final_success[k]+ final_failure[K] + final_success[K])
    }
    wald <- (p_optimal - p_arm) / sqrt(p_avg*(1-p_avg)*(1/(final_failure[k]+final_success[k]) + 1/(final_failure[K]+final_success[K])))
    Wald_score <- c(Wald_score, wald)
  }
  reject <-  all(Wald_score > qnorm(1-0.05)) 
  if (is.na(reject) == TRUE){
    reject <- FALSE 
  }
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  #return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
  return(list(arm_successes = arm_successes[,n],
              arm_failures = arm_failures[,n],
              reject = reject,
              correct_rate = sum(abl)/n,
              regret = regret/n))
}

# mock trial:
# TS_multi(success_rates = c(0.4,0.4,0.6), n = 1000)

UR_multiarm:

UR_multi <- function(success_rates, n, burnin = 1, batch = 1) {
  # by default let's make sure the pb is the correct arm
  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  K = length(success_rates)
  # add delta:
  p_star <- max(success_rates)
  delta <- p_star - success_rates
  outcomes = list()
  for (k in 1:K){
    outcomes[[paste("arm", k, sep = "_")]] = rbinom(n, 1, success_rates[k])
  }

  arm_successes <- array(0, dim=c(K, n))
  arm_failures <- array(0, dim=c(K, n))
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_s <- rep(0, K)
  arm_f <- rep(0, K)

  # History that tracks if there the correct arm is allocated
  # ex. [1,0,1,1,0,1] 1 indicates the correct arm was allocated for the trial, 0 indicates a wrong arm is allocated, we'll make the assumption that only the last arm is correct
  abl <- c()
  history <- c()
  regret = 0
  
  AP = c()
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:n) {
    
    draw_arms <- runif(K) # randomly drawing one arm out of all arms
    arm_chosen <- which.max(draw_arms) # settle down which arm to pick
    
    if(arm_chosen == K){
      abl[i] <- 1
    }
    else{
      abl[i] <- 0
    }
    history[i] <- outcomes[[paste("arm", arm_chosen, sep = "_")]][i]

    if (history[i] == 1) {
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1}
    else {
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1
      }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_successes[ , i] <- arm_s
    arm_failures[ , i] <- arm_f
    regret = regret + delta[arm_chosen]
  }
  
  # todo: adding in the calculation on Wald_score:
  final_success <- arm_successes[,n]
  final_failure <- arm_failures[,n]
  
  Wald_score <- c()
  
  p_optimal <- 0 
  try(p_optimal <- final_success[K]/(final_failure[K] + final_success[K]))
  for (k in 1:(K-1)){
    if (final_success[k] == 0 & final_failure[k] == 0){
      p_arm <- 0
      p_avg <- p_optimal
    }
    else{
      p_arm <- final_success[k]/(final_failure[k] + final_success[k])
      p_avg <- (final_success[k] + final_success[K])/(final_failure[k] + final_success[k]+ final_failure[K] + final_success[K])
    }
    wald <- (p_optimal - p_arm) / sqrt(p_avg*(1-p_avg)*(1/(final_failure[k]+final_success[k]) + 1/(final_failure[K]+final_success[K])))
    Wald_score <- c(Wald_score, wald)
  }
  reject <-  all(Wald_score > qnorm(1-0.05)) 
  if (is.na(reject) == TRUE){
    reject <- FALSE 
  }
  
  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  #return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
  return(list(arm_successes = arm_successes[,n],
              arm_failures = arm_failures[,n],
              reject = reject,
              correct_rate = sum(abl)/n,
              regret = regret/n))
}

# mock trial:
# UR_multi(success_rates = c(0.4,0.4,0.6), n = 1000)

(Archived) A two arm binary reward case:

# TS <- function(pa, pb, n, burnin = 1, batch = 1) {
#   # by default let's make sure the pb is the correct arm
#   # List: tracking successes and failures under each arm
#   # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
#   outcomes <- list(arm_a = rbinom(n,1,pa),
#                    arm_b = rbinom(n,1,pb))
#   arm_a_successes <- c(0)
#   arm_a_failures <- c(0)
#   arm_b_successes <- c(0)
#   arm_b_failures <- c(0)
#   
#   # List: tracking only the current number of successes and failures (Value changes for each iteration)
#   arm_a_s <- 0
#   arm_a_f <- 0
#   arm_b_s <- 0
#   arm_b_f <- 0
# 
#   # History that tracks if there the correct arm is allocated
#   # ex. [1,0,1,1,0,1] 1 indicates the correct arm was allocated for the trial, 0 indicates a wrong arm is allocated
#   abl <- c()
#   history <- c()
#   regret = 0
#   
#   AP = c()
#   
#   # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
#   
#   for (i in 1:burnin) {
# 
#     draw_a <- runif(1) # Random probability of arm a being chosen
#     draw_b <- runif(1) # Random probability of arm b being chosen
# 
#     # CASE 1: Arm a is chosen
#     if (draw_a > draw_b) {
#       abl[i] <- 0
#       history[i] <- outcomes$arm_a[i]
#       # Arm a was successful  
#       if (outcomes$arm_a[i] == 1) {
#         arm_a_s <- arm_a_s + 1 # update arm a success count
#       } 
#       # Arm a was not successful
#       else {
#         arm_a_f <- arm_a_f + 1 # update arm a failure count
#       }
#     }
# 
#     # CASE 2: Arm b is chosen
#     else {
#       abl[i] <- 1
#       history[i] <- outcomes$arm_b[i]
#       # Arm b was successful
#       if (outcomes$arm_b[i] == 1) {
#         arm_b_s <- arm_b_s + 1 # update arm b success count
#       }
#       # Arm b was not successful
#       else {
#         arm_b_f <- arm_b_f + 1 # update arm b failure count
#       }
#     }
# 
#     # Tracking the build-up of total number of successes, failures, for arm a & b
#     arm_a_successes[i] <- arm_a_s
#     arm_a_failures[i] <- arm_a_f
#     arm_b_successes[i] <- arm_b_s
#     arm_b_failures[i] <- arm_b_f
#     regret = regret + (max(c(outcomes$arm_a[i], outcomes$arm_b[i])) - history[i])
#   }
#   
#   # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
#   
#   for (i in (burnin + 1):n) {
# 
#     # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
#     m = i %% batch 
#     
#     # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
#     # alpha = current successes, beta = current failures of each arm
# 
#     # CASE 1: Posterior probability of arms a & b is updated
#     if (m == 0) {
#       draw_a <-
#         rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
#       draw_b <-
#         rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
#       exp_a <- (arm_a_successes[i - 1] + 1)/(arm_a_successes[i - 1] + 1 + arm_a_failures[i - 1] + 1)
#       exp_b <- (arm_b_successes[i - 1] + 1)/(arm_b_successes[i - 1] + 1 + arm_b_failures[i - 1] + 1)
#       AP_i <- exp_b/(exp_a + exp_b)
#       # AP_i <-
#       #   sum(
#       #     rbeta(5000, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1) > rbeta(5000, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
#       #     # do it this way so that it fits for all kinds of posteriors
#       #   ) / 5000
#       AP <- c(AP, AP_i)
#     }
#     # CASE 2: Posterior probability of arms a & b is not updated
#     else {
#       draw_a <-
#         rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1) 
#       draw_b <-
#         rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
#         
#     # Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
#     }
# 
#     # Update arm a & b history (abl), successes, and failures
#     if (draw_a > draw_b) {
#       abl[i] <- 0
#       history[i] <- outcomes$arm_a[i]
#       if (outcomes$arm_a[i] == 1) {
#         arm_a_s <- arm_a_s + 1 # update arm a success count
#       } else {
#         arm_a_f <- arm_a_f + 1 # update arm a failure count
#       }
#     } else{
#       abl[i] <- 1
#       history[i] <- outcomes$arm_b[i]
#       if (outcomes$arm_b[i] == 1) {
#         arm_b_s <- arm_b_s + 1 #update arm b success count
#       } else {
#         arm_b_f <- arm_b_f + 1 #update arm b failure count
#       }
#     }  
#     
#     # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
#     arm_a_successes[i] <- arm_a_s
#     arm_a_failures[i] <- arm_a_f
#     arm_b_successes[i] <- arm_b_s
#     arm_b_failures[i] <- arm_b_f
#     regret = regret + (max(c(outcomes$arm_a[i], outcomes$arm_b[i])) - history[i])
#   }
# 
#   # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
#   na <- arm_a_successes + arm_a_failures
#   nb <- arm_b_successes + arm_b_failures
#   # AP <- nb/(na+nb)
# 
#   # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
#   pa_est <- arm_a_successes / na
#   pb_est <- arm_b_successes / nb
# 
#   # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
#   # WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
#   p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
#   WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
#   
#   # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
#   expected_reward <- (na[n] * pa + nb[n] * pb) / n
#   # Lists out the average expected reward for each individual trial (final index, final expected reward value)
#   actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n 
#   
#   #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
#   #return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
#   return(list(arm_successes = c(arm_a_successes[n], arm_b_successes[n]),
#               arm_failures = c(arm_a_failures[n], arm_b_failures[n]),
#               AP = AP,
#               correct_rate = sum(abl)/n,
#               regret = regret/n))
# }

Key function: Simulating_n_times

simulation_n_times <- function(policy,policy_parameters, num_sim = 1000){
  rewards <- c()
  correct_rates <- c()
  rejects <- c()
  regrets <- c()
  for (i in 1:num_sim){
    data <- do.call(policy, policy_parameters)
    reward_i <- sum(data$arm_successes)/(sum(c(data$arm_successes, data$arm_failures)))
    correct_rate_i <- data$correct_rate
    regret_i <- data$regret
    reject_i <- data$reject
    
    rewards <- append(rewards, reward_i)
    correct_rates <- append(correct_rates, correct_rate_i)
    rejects <- append(rejects, reject_i)
    regrets <- append(regrets, regret_i)
  }
  return(list(avg_reward = mean(rewards),
              avg_correct_rate = mean(correct_rates),
              avg_regret = mean(regrets),
              power = sum(rejects)/length(rejects)))
}

simulation_n_times(TS_multi, list(success_rates = c(0.4,0.4,0.7), n = 300), num_sim = 100)

$avg_reward
[1] 0.6713667

$avg_correct_rate
[1] 0.9004

$avg_regret
[1] 0.02988

$power
[1] 0.96

# Global Setting:
sample_sizes = c(20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800, 900, 1000) 
effect_sizes = c(0.01, 0.05, 0.1, 0.2)
sd = 0.01
num_sim = 1000

For Two arm Comparisons

Type S error:

library(tidyverse)
metric = 'avg_correct_rate'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa = 0.5 - effect / 2
    pb = 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

Mean Reward: (we might want to factor them out by group when later on looking on contextual factors)

metric = 'avg_reward'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa = 0.5 - effect / 2
    pb = 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Mean Regret:

metric = 'avg_regret'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa = 0.5 - effect / 2
    pb = 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Power:

metric = 'power'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa = 0.5 - effect / 2
    pb = 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

For 3 arm case:

p1 = p2 < p3

Type S error:

metric = 'avg_correct_rate'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.5 - effect / 2
    pb <- 0.5 - effect / 2
    pc <- 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Mean Reward:

metric = 'avg_reward'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.5 - effect / 2
    pb <- 0.5 - effect / 2
    pc <- 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Avg Regret:

metric = 'avg_regret'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.5 - effect / 2
    pb <- 0.5 - effect / 2
    pc <- 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Power:

metric = 'power'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.5 - effect / 2
    pb <- 0.5 - effect / 2
    pc <- 0.5 + effect / 2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

p1 = p2 << p3

Type S error:

metric = 'avg_correct_rate'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.2
    pc <- 0.5 + effect
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Mean Reward:

metric = 'avg_reward'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.2
    pc <- 0.5 + effect
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Avg regret:

metric = 'avg_regret'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.2
    pc <- 0.5 + effect
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Avg Power:

metric = 'power'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.2
    pc <- 0.5 + effect
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

p1<< p2 <p3

Type S error:

metric = 'avg_correct_rate'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.5 - effect/2
    pc <- 0.5 + effect/2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Mean Reward:

metric = 'avg_reward'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.5 - effect/2
    pc <- 0.5 + effect/2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Avg Regret:

metric = 'avg_regret'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.5 - effect/2
    pc <- 0.5 + effect/2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Avg Power:

metric = 'power'

results = tibble(effect_size = numeric(),
                 metric = numeric(),
                 sample_size = numeric())
for(sample_size in sample_sizes) {
  for (effect_size in effect_sizes) {
    effect = abs(rnorm(1,effect_size, sd))
    pa <- 0.2
    pb <- 0.5 - effect/2
    pc <- 0.5 + effect/2
    n = sample_size
    data = simulation_n_times(TS_multi, list(success_rates = c(pa,pb,pc), 
                                             n = n), 
                              num_sim = num_sim)
    results <- results |> add_row(
      effect_size = effect_size,
      metric = data[[metric]],
      sample_size = sample_size
    )
  }
}

results |> 
  ggplot(aes(x = sample_size, y = metric, color = as.factor(effect_size))) +
  geom_smooth() +
  scale_color_brewer(palette = "Set1", name = "Effect Size") + 
  ylab(metric) +
  ggtitle(paste('Plot of', metric, 'with respect to sample size'))

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# # also some exploration on Allocation Probability
# # note that this within one trial
# data <- data.frame(values = TS(0.5,0.5, 10000)$AP)
# ggplot(data, aes(x = values)) +
#   geom_histogram(fill = "#69b3a2", color = "black") +
#   labs(title = "Histogram of Values", x = "Value", y = "Frequency") +
#   theme_minimal()