AP test application

Author

Fred Song

Published

August 18, 2023

Section 1 Policy Introductions

Section 1.1 Running Traditional UR

Section 1.1.1: Two-arm Uniform Random Sampling

# todo add induced-wald-z test: it also goes with the distribution
#' UR-Uniform Random Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the UR algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 

UR <- function(pa, pb, n, burnin, batch) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
  APT = 0 # AP Test statistic over the time, here we should be expecting this to be around 50% of the sample size n.
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  # note: would be the same here burnin and the actual policy as the policy we are simulating is UR, just for style keeping :) (sorry OCD)
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
    }
  
  # ---------- UR Period: use Uniform Random (UR) sampling only ----------
  
  
  for (i in (burnin+1):n) {
    
    m = i %% batch 

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      if(m == 0){
        APT = APT + 1
      }
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb) # Tracking of the changes of allocation probability over the time, it should stabilize at a 50% when sample size gets large enough

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
  WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.1.2: multi-arm Uniform Random

#' UR Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param priors this is a list c() containing the success rate of each arm c(pa, pb, pc,...)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method

UR_multiarm <- function(priors, n, burnin, batch) {
  # for an n arm setting, let's calculate the number of arms first
  num_arms <- length(priors)
  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_successes <- array(0, dim=c(num_arms, n))
  arm_failures <- array(0, dim=c(num_arms, n))
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_s <- rep(0, num_arms)
  arm_f <- rep(0, num_arms)

  # still keeping track of the allocation probability test statistcs under a multi-arm case
  APT = 0
  
  # keep the same abl list for record keeping, to see how many successes and failures along the way
  abl <- c()
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_arms <- runif(num_arms) # randomly drawing one arm out of all arms
    arm_chosen <- which.max(draw_arms) # settle down which arm to pick
    
    if(runif(1) < priors[arm_chosen]){
      abl[i] <- 1
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # update arm a success count
    }
    else{
      abl[i] <- 0
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # update arm a failure count
    }
    
    # Tracking the build-up of total number of successes, failures, for all the arms:
    arm_successes[ , i] <- arm_s
    arm_failures[ , i] <- arm_f
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # CASE 1: Posterior probability of arms a & b is updated
    if (m == 0) {
      draw_arms <- runif(num_arms)
      # purely uniform sampling
      # assuming the last arm in the prior list is always the experimental arm and we are to determine if it is a superior treatment compare to all other arms
      if(draw_arms[num_arms] > 1/num_arms){
        APT = APT + 1
      }
    }
    
    # CASE 2: Posterior probability of arms a & b is not updated
    else {
      draw_arms <- runif(num_arms)
      # purely uniform random sampling
    }
    
    # settle the chosen arm:
    arm_chosen <- which.max(draw_arms)  # Settle down which arm to pick based on the highest sampled posterior room
    
    # Update arms history (abl), successes, and failures
    if (runif(1) < priors[arm_chosen]) {
      abl[i] <- 1
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1  # Update success count for chosen arm
    } else {
      abl[i] <- 0
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1  # Update failure count for chosen arm
    }  
    
    # Tracking the build-up of total number of successes, failures, for all arms, adding onto successes and failures from burnin period
    arm_successes[, i] <- arm_s
    arm_failures[, i] <- arm_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  n_total <- arm_successes + arm_failures
  AP <- n_total[num_arms, ]/colSums(n_total)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  p_est <- arm_successes / n_total

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  # we are calculating the Z test for whether or not the last arm is significantly better than the average of the other arms
  # Get the proportions of successes for the last arm and the other arms
  p_last_arm <- p_est[num_arms, n]
  
  
  # Get the sample sizes for the last arm and the other arms
  n_last_arm <- n_total[num_arms, n]
  n_other_arms <- n_total[-num_arms, n]
  
  # Calculate the weighted average success rate for the other arms
  p_other_arms <- sum(n_other_arms * p_est[-num_arms, n]) / sum(n_other_arms)
  
  # Calculate the pooled proportion of successes
  p_pooled <- (n_last_arm * p_last_arm + sum(n_other_arms) * p_other_arms) / (n_last_arm + sum(n_other_arms))
  
  # Calculate the Z-score
  WaldScore <- (p_last_arm - p_other_arms) / sqrt(p_pooled * (1 - p_pooled) * (1/n_last_arm + 1/sum(n_other_arms)))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- sum(n_total[, n] * priors) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- sum(arm_successes[, n]) / n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore, expected_reward, actual_reward, AP[n], APT))
}

Section 1.2 Running Traditional TS

Section 1.2.1: 2-arm Traditional TS

#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method

TS <- function(pa, pb, n, burnin, batch) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # CASE 1: Posterior probability of arms a & b is updated
    if (m == 0) {
      draw_a <-
        rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1) 
      draw_b <-
        rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1) 
      if(draw_b > draw_a){
        APT = APT + 1
      }
    }
    
    # CASE 2: Posterior probability of arms a & b is not updated
    else {
      draw_a <-
        rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1) 
      draw_b <-
        rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
        
    # Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  # WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
  WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.2.2: multi-arm Traditional TS

#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param priors this is a list c() containing the success rate of each arm c(pa, pb, pc,...)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method

TS_multiarm <- function(priors, n, burnin, batch) {
  # for an n arm setting, let's calculate the number of arms first
  num_arms <- length(priors)
  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_successes <- array(0, dim=c(num_arms, n))
  arm_failures <- array(0, dim=c(num_arms, n))
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_s <- rep(0, num_arms)
  arm_f <- rep(0, num_arms)

  # still keeping track of the allocation probability test statistcs under a multi-arm case
  APT = 0
  
  # keep the same abl list for record keeping, to see how many successes and failures along the way
  abl <- c()
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_arms <- runif(num_arms) # randomly drawing one arm out of all arms
    arm_chosen <- which.max(draw_arms) # settle down which arm to pick
    
    if(runif(1) < priors[arm_chosen]){
      abl[i] <- 1
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # update arm a success count
    }
    else{
      abl[i] <- 0
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # update arm a failure count
    }
    
    # Tracking the build-up of total number of successes, failures, for all the arms:
    arm_successes[ , i] <- arm_s
    arm_failures[ , i] <- arm_f
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # CASE 1: Posterior probability of arms a & b is updated
    if (m == 0) {
      draw_arms <- rbeta(num_arms, arm_successes[, i - 1] + 1, arm_failures[, i - 1] + 1)
      # assuming the last arm in the prior list is always the experimental arm and we are to determine if it is a superior treatment compare to all other arms
      if(draw_arms[num_arms] > 1/num_arms){
        APT = APT + 1
      }
    }
    
    # CASE 2: Posterior probability of arms a & b is not updated
    else {
      draw_arms <- rbeta(num_arms, arm_successes[, i - m] + 1, arm_failures[, i - m] + 1)
        
    # Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
    }
    
    # settle the chosen arm:
    arm_chosen <- which.max(draw_arms)  # Settle down which arm to pick based on the highest sampled posterior room
    
    # Update arms history (abl), successes, and failures
    if (runif(1) < priors[arm_chosen]) {
      abl[i] <- 1
      arm_s[arm_chosen] <- arm_s[arm_chosen] + 1  # Update success count for chosen arm
    } else {
      abl[i] <- 0
      arm_f[arm_chosen] <- arm_f[arm_chosen] + 1  # Update failure count for chosen arm
    }  
    
    # Tracking the build-up of total number of successes, failures, for all arms, adding onto successes and failures from burnin period
    arm_successes[, i] <- arm_s
    arm_failures[, i] <- arm_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  n_total <- arm_successes + arm_failures
  AP <- n_total[num_arms, ]/colSums(n_total)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  p_est <- arm_successes / n_total

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  # we are calculating the Z test for whether or not the last arm is significantly better than the average of the other arms
  # Get the proportions of successes for the last arm and the other arms
  p_last_arm <- p_est[num_arms, n]
  
  
  # Get the sample sizes for the last arm and the other arms
  n_last_arm <- n_total[num_arms, n]
  n_other_arms <- n_total[-num_arms, n]
  
  # Calculate the weighted average success rate for the other arms
  p_other_arms <- sum(n_other_arms * p_est[-num_arms, n]) / sum(n_other_arms)
  
  # Calculate the pooled proportion of successes
  p_pooled <- (n_last_arm * p_last_arm + sum(n_other_arms) * p_other_arms) / (n_last_arm + sum(n_other_arms))
  
  # Calculate the Z-score
  WaldScore <- (p_last_arm - p_other_arms) / sqrt(p_pooled * (1 - p_pooled) * (1/n_last_arm + 1/sum(n_other_arms)))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- sum(n_total[, n] * priors) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- sum(arm_successes[, n]) / n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore, expected_reward, actual_reward, AP[n], APT))
}

Section 1.3 Running Epsilon greedy TS

#' Epsilon-TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs epsilon greedy TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param epsilon - this is the epsilon (or proportion) we defined previous to simulations at a certain level for UR sampling within the simulations
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method
TS_epsilon <- function(pa, pb, n, burnin, batch, epsilon) {
  
  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0 # AP Test statistic
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
    if (runif(1) < epsilon) {
      draw_a <- runif(1) # Random probability of arm a being chosen
      draw_b <- runif(1) # Random probability of arm b being chosen
    } 
    
    # Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
    else {
      # CASE 1: Posterior probability of arms a & b is updated
      if (m == 0) {
        draw_a <-
          rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
        if (draw_b > draw_a){
          APT = APT +1
        }
      }
      # CASE 2: Posterior probability of arms a & b is not updated
      else {
        draw_a <-
          rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
      }
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  #WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
  WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.4 Running TS-PostDiff (Code adapted from Tong)

#' TS-PostDiff - Allocation Probability Test
#'
#' Runs TS-PostDiff algorithm in two-armed case, for n batches, with given batch size and BURNIN period, to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of arm a (typically unknown in practice)
#' @param pb - actual prior probability of arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param c - threshold of difference in expected reward between the two arms
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS-PostDiff algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated
#'
#' @return vector containing WaldScore[n], reward[n], APT_1, APT_2: the final waldscore, reward, and AP Test statistics for both arms

TSPDD <- function(pa, pb, n, burnin, batch, c) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0 # AP Test statistic
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm
    # Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
    if (abs(draw_a - draw_b) < c) {
      draw_a <- runif(1) # Random probability of arm a being chosen
      draw_b <- runif(1) # Random probability of arm b being chosen
    } 
    
    # Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
    else {
      # CASE 1: Posterior probability of arms a & b is updated
      if (m == 0) {
        draw_a <-
          rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
        if (draw_b > draw_a){
          APT = APT + 1
        }
      }
      # CASE 2: Posterior probability of arms a & b is not updated
      else {
        draw_a <-
          rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
      }
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb
  AP <- nb/(na+nb)

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  #WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
  WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 2: Finding Critical Value

Section 2.1 Finding Critical Values, Using Normal Assumption on Law of Large Numbers:

#' Critical Value (Normal)
#'
#' Finds critical value using normal assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#' 
#' @return generated critical value from normality assumption

normal_cv <- function(APT, alpha) {
  m <- mean(APT) 
  v <- var(APT)
  sd <- sqrt(v)
  normal_rej <- qnorm(1 - alpha) #Rejection area defined (following normal distribution assumption)
  cv <- normal_rej * sd + m
  return(cv)
}

Section 2.2 Finding Critical Values, Using Empirical Approach

#' Critical Value (Empirical)
#'
#' Finds critical value using empirical assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#' 
#' @return generated critical value from empirical simulations

empirical_cv <- function(APT, alpha) {
  cdf = 0
  i = 0 
  # Repeatedly add the empirical probabilities of each [i] value until cdf reaches 1 - significance level
  while (cdf <= 1- alpha) {
    p_i <-sum(APT == i)/length(APT)
    cdf <- sum(cdf, p_i)
    i = i + 1
  }
  return(i - 1) 
}

Section 2.3 Replicating and rerun the simulations for B replications

# One output for each simulation, the four dimensions are waldscore, average reward, APT_ban_UR, and APT_w_UR

simulation_n_times <-
  function(alpha,
           policy,
           effect_size,
           n,
           B,
           burnin,
           batch,
           multi_arm = FALSE, 
           ...) {
    if (multi_arm == FALSE) {
      pa <- 0.5 - effect_size / 2 # define the corresponding priors of the success rate centred and scaled at p1+p2 = 1
      pb <- 0.5 + effect_size / 2
      results <- array(dim = c(B, 5)) # output a four dimensional arrage
      # number of simulations
      for (i in 1:B) {
        results[i, ] <- policy(
          pa = pa,
          pb = pb,
          n = n,
          burnin = burnin,
          batch = batch,
          ...
        )
      }
      wald <- results[, 1]
      average_power = mean(results[, 1] > qnorm(1 - alpha)) # Find average power based on waldscore
      
      expected_reward = mean(results[, 2]) # expected reward setting
      average_reward = mean(results[, 3]) # calculate the actual real reward throughout many trials
      average_AP = mean(results[, 4]) # find average allocation probability for the experimental arm
      APT = results[, 5]
      # Empirical and normal CV of APT
      em_cv <- empirical_cv(APT, alpha)
      norm_cv <- normal_cv(APT, alpha)
      F1 <- ecdf(APT)
      # Empirical and normal FPR of APT is
      em_FPR <- 1 - F1(em_cv)
      norm_FPR <- 1 - F1(floor(norm_cv))
      return(
        list(
          average_power = average_power,
          expected_reward = expected_reward,
          average_reward = average_reward,
          average_AP = average_AP,
          em_cv = em_cv,
          norm_cv = norm_cv,
          em_FPR = em_FPR,
          norm_FPR = norm_FPR
        )
      )
    }
    if (class(multi_arm) == "numeric"){
      priors <- seq(from = 0.5 - effect_size/2, 
                    to = 0.5 + effect_size/2,
                    length.out = multi_arm)
      
      results <- array(dim = c(B, 5))
      for (i in 1:B) {
        # at this point of time there is no other policies like TS_contextual or TS_epsilon 
        results[i, ] <- policy(
          priors = priors,
          n = n,
          burnin = burnin,
          batch = batch
        )
      }
      wald <- results[, 1]
      average_power = mean(results[, 1] > qnorm(1 - alpha)) # Find average power based on waldscore
      
      expected_reward = mean(results[, 2]) # expected reward setting
      average_reward = mean(results[, 3]) # calculate the actual real reward throughout many trials
      average_AP = mean(results[, 4]) # find average allocation probability for the experimental arm
      APT = results[, 5]
      # Empirical and normal CV of APT
      em_cv <- empirical_cv(APT, alpha)
      norm_cv <- normal_cv(APT, alpha)
      F1 <- ecdf(APT)
      # Empirical and normal FPR of APT is
      em_FPR <- 1 - F1(em_cv)
      norm_FPR <- 1 - F1(floor(norm_cv))
      return(
        list(
          average_power = average_power,
          expected_reward = expected_reward,
          average_reward = average_reward,
          average_AP = average_AP,
          em_cv = em_cv,
          norm_cv = norm_cv,
          em_FPR = em_FPR,
          norm_FPR = norm_FPR
        )
      )
    }
    else{
      print('Error: Please double check your multi_arm input is numeric or FALSE')
    }
  }

Section 2.4: Construction of the reporting table:

Section 2.4.1: Comparing at a specific effect size and sample size:

# it takes in the same parameter as in before and output a comparison dataframe across different policies.
compare_policies <-
  function(alpha,
           effect_size,
           n,
           B,
           burnin,
           batch,
           multi_arm = 3,
           epsilon = 0.1,
           c = 0.1) {
    UR_result <-
      simulation_n_times(alpha, UR, effect_size, n, B, burnin, batch, multi_arm = FALSE)
    UR_multi_arm_result <-
      simulation_n_times(alpha, UR_multiarm, effect_size, n, B, burnin, batch, multi_arm)
    TS_result <-
      simulation_n_times(alpha, TS, effect_size, n, B, burnin, batch, multi_arm = FALSE)
    TS_multi_arm_result <-
      simulation_n_times(alpha, TS_multiarm, effect_size, n, B, burnin, batch, multi_arm)
    TS_epsilon_result <-
      simulation_n_times(alpha, TS_epsilon, effect_size, n, B, burnin, batch, multi_arm = FALSE, epsilon)
    TSPDD_result <-
      simulation_n_times(alpha, TSPDD, effect_size, n, B, burnin, batch, multi_arm = FALSE, c)
    
    
    # Create a summary table
    summary_table <- data.frame(
      Metric = c(
        "Average Power",
        "Expected Reward",
        "Average Reward",
        "Average AP",
        "Empirical CV",
        "Normal CV",
        "Empirical FPR",
        "Normal FPR"
      ),
      UR = c(
        UR_result$average_power,
        UR_result$expected_reward,
        UR_result$average_reward,
        UR_result$average_AP,
        UR_result$em_cv,
        UR_result$norm_cv,
        UR_result$em_FPR,
        UR_result$norm_FPR
      ),
      UR_multiarm = c(
        UR_multi_arm_result$average_power,
        UR_multi_arm_result$expected_reward,
        UR_multi_arm_result$average_reward,
        UR_multi_arm_result$average_AP,
        UR_multi_arm_result$em_cv,
        UR_multi_arm_result$norm_cv,
        UR_multi_arm_result$em_FPR,
        UR_multi_arm_result$norm_FPR
      ),
      TS = c(
        TS_result$average_power,
        TS_result$expected_reward,
        TS_result$average_reward,
        TS_result$average_AP,
        TS_result$em_cv,
        TS_result$norm_cv,
        TS_result$em_FPR,
        TS_result$norm_FPR
      ),
      TS_multiarm = c(
        TS_multi_arm_result$average_power,
        TS_multi_arm_result$expected_reward,
        TS_multi_arm_result$average_reward,
        TS_multi_arm_result$average_AP,
        TS_multi_arm_result$em_cv,
        TS_multi_arm_result$norm_cv,
        TS_multi_arm_result$em_FPR,
        TS_multi_arm_result$norm_FPR
      ),
      TS_epsilon = c(
        TS_epsilon_result$average_power,
        TS_epsilon_result$expected_reward,
        TS_epsilon_result$average_reward,
        TS_epsilon_result$average_AP,
        TS_epsilon_result$em_cv,
        TS_epsilon_result$norm_cv,
        TS_epsilon_result$em_FPR,
        TS_epsilon_result$norm_FPR
      ),
      TSPDD = c(
        TSPDD_result$average_power,
        TSPDD_result$expected_reward,
        TSPDD_result$average_reward,
        TSPDD_result$average_AP,
        TSPDD_result$em_cv,
        TSPDD_result$norm_cv,
        TSPDD_result$em_FPR,
        TSPDD_result$norm_FPR
      )
    )
    
    return(summary_table)
}

Section 2.4.2: Comparing at a range of effect size and sample size:

# notice that here the input of the effect_sizes is a list, all the others remain the same, this is a helper function that I created for easy comparison
compare_effect_sizes <- 
  function(alpha,
           effect_sizes,
           n,
           B,
           burnin,
           batch,
           multi_arm = 3, # setting default for 3-arm cases comparison
           epsilon = 0.1, # setting default
           c = 0.1) {
    UR_avg_reward <- c()
    UR_multiarm_avg_reward <-c()
    TS_avg_reward <- c()
    TS_multiarm_avg_reward <-c()
    TS_epsilon_avg_reward <- c()
    TSPDD_avg_reward <- c()
    
    UR_power <- c()
    UR_multiarm_power <- c()
    TS_power <- c()
    TS_multiarm_power <- c()
    TS_epsilon_power <- c()
    TSPDD_power <- c()
    
    # Assuming allocation probability is represented by Average AP
    UR_alloc_prob <- c()
    UR_multiarm_alloc_prob <- c()
    TS_alloc_prob <- c()
    TS_multiarm_alloc_prob <- c()
    TS_epsilon_alloc_prob <- c()
    TSPDD_alloc_prob <- c()
    
    for (effect_size in effect_sizes){
      result <- compare_policies(alpha = alpha,
                                 effect_size = effect_size, 
                                 n = n,
                                 B = B,
                                 burnin = burnin,
                                 batch = batch,
                                 multi_arm = multi_arm,
                                 epsilon = epsilon,
                                 c = c)
      # avg reward construction
      UR_avg_reward <- c(UR_avg_reward, sprintf("%.2f", result$UR[3]))
      UR_multiarm_avg_reward <- c(UR_multiarm_avg_reward, sprintf("%.2f", result$UR_multiarm[3]) )
      TS_avg_reward <- c(TS_avg_reward, sprintf("%.2f", result$TS[3]))
      TS_multiarm_avg_reward <- c(TS_multiarm_avg_reward, sprintf("%.2f", result$TS_multiarm[3]))
      TS_epsilon_avg_reward <- c(TS_epsilon_avg_reward, sprintf("%.2f", result$TS_epsilon[3]))
      TSPDD_avg_reward <- c(TSPDD_avg_reward, sprintf("%.2f", result$TSPDD[3]))
      
      # avg power construction
      UR_power <- c(UR_power, sprintf("%.2f", result$UR[1]))
      UR_multiarm_power <- c(UR_multiarm_power, sprintf("%.2f", result$UR_multiarm[1]))
      TS_power <- c(TS_power, sprintf("%.2f", result$TS[1]))
      TS_multiarm_power <- c(TS_multiarm_power, sprintf("%.2f", result$TS_multiarm[1]))
      TS_epsilon_power <- c(TS_epsilon_power, sprintf("%.2f", result$TS_epsilon[1]))
      TSPDD_power <- c(TSPDD_power, sprintf("%.2f", result$TSPDD[1]))
      
      # avg allocation probability construction
      UR_alloc_prob <- c(UR_alloc_prob, sprintf("%.2f", result$UR[4]))
      UR_multiarm_alloc_prob <- c(UR_multiarm_alloc_prob, sprintf("%.2f", result$UR_multiarm[4]))
      TS_alloc_prob <- c(TS_alloc_prob, sprintf("%.2f", result$TS[4]))
      TS_multiarm_alloc_prob <- c(TS_multiarm_alloc_prob, sprintf("%.2f", result$TS_multiarm[4]))
      TS_epsilon_alloc_prob <- c(TS_epsilon_alloc_prob, sprintf("%.2f", result$TS_epsilon[4]))
      TSPDD_alloc_prob <- c(TSPDD_alloc_prob, sprintf("%.2f", result$TSPDD[4]))
    }
    final_summary_table <- data.frame(
      Metric = c("Average Reward", 
                 "Power",
                 "Allocation Probability"),
      UR = c(
        paste(UR_avg_reward, collapse = " | "),
        paste(UR_power, collapse = " | "),
        paste(UR_alloc_prob, collapse = " | ")
      ),
      UR_multiarm = c(
        paste(UR_multiarm_avg_reward, collapse = ' | '),
        paste(UR_multiarm_power, collapse = ' | '),
        paste(UR_multiarm_alloc_prob, collapse = ' | ')
      ),
      TS = c(
        paste(TS_avg_reward, collapse = " | "),
        paste(TS_power, collapse = " | "),
        paste(TS_alloc_prob, collapse = " | ")
      ),
      TS_multiarm = c(
        paste(TS_multiarm_avg_reward, collapse = ' | '),
        paste(TS_multiarm_power, collapse = ' | '),
        paste(TS_multiarm_alloc_prob, collapse = ' | ')
      ),
      TS_epsilon = c(
        paste(TS_epsilon_avg_reward, collapse = " | "),
        paste(TS_epsilon_power, collapse = " | "),
        paste(TS_epsilon_alloc_prob, collapse = " | ")
      ),
      TSPDD = c(
        paste(TSPDD_avg_reward, collapse = " | "),
        paste(TSPDD_power, collapse = " | "),
        paste(TSPDD_alloc_prob, collapse = " | ")
      ))
    
    return(final_summary_table)
  }

Section 3: Simulations:

Section 3.1: Global Settings

In this section we will provide some unified parameters across different simulations:

# let's make this a function
set.seed(0) # for results to be replicable
#calculate power and reward in case p0 < p1
alpha <- 0.05 # rejection region
effect_size <- 0 # by default effect size is 0: no difference between arms | effect size is 0.05-0.1: Intervention is of relatively small or marginal effect | effect size is 0.1-0.2: Medium or smal effect | effect size is 0.2-0.3+: relatively large effectt between interventions
n <- 100 # sample_size(total number of trials in one simulation)
burnin <- 0.1 * n # make by default the burin size is 10% of the total sample size that we have
B <- 1000 # number of simulations
batch <-  2 # batch_size

Section 3.2: Sample size n=50

n <- 50 # notice that this will change the gloabl setting of parameters

Section 3.2.1: Effect size = 0.0

compare_policies(alpha,effect_size, n, B,burnin,batch,epsilon=0.1,c=0.1)
           Metric       UR UR_multiarm       TS TS_multiarm TS_epsilon    TSPDD
1   Average Power  0.06100     0.05400  0.07500     0.06200    0.07400  0.08200
2 Expected Reward  0.50000     0.50000  0.50000     0.50000    0.50000  0.50000
3  Average Reward  0.49892     0.49846  0.49814     0.49810    0.50012  0.50224
4      Average AP  0.50342     0.33426  0.50174     0.33728    0.50704  0.50122
5    Empirical CV 14.00000    17.00000 19.00000    20.00000   17.00000 17.00000
6       Normal CV 13.81105    16.69130 19.44977    23.11388   17.84984 15.74270
7   Empirical FPR  0.02200     0.01900  0.02800     0.00000    0.04800  0.03800
8      Normal FPR  0.06700     0.05100  0.02800     0.00000    0.04800  0.08000

Section 3.2.2 Effect size = 0.05

compare_policies(alpha,effect_size = 0.05,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric        UR UR_multiarm        TS TS_multiarm TS_epsilon
1   Average Power  0.102000   0.0830000  0.125000    0.088000   0.115000
2 Expected Reward  0.499988   0.4999255  0.502283    0.501595   0.502944
3  Average Reward  0.504080   0.5006000  0.502640    0.496720   0.501800
4      Average AP  0.499760   0.3312800  0.545660    0.359520   0.558880
5    Empirical CV 14.000000  17.0000000 20.000000   20.000000  18.000000
6       Normal CV 13.592118  16.8090778 20.771675   23.305910  18.689324
7   Empirical FPR  0.018000   0.0200000  0.000000    0.000000   0.028000
8      Normal FPR  0.054000   0.0650000  0.000000    0.000000   0.028000
      TSPDD
1  0.121000
2  0.502801
3  0.503260
4  0.556020
5 18.000000
6 17.159720
7  0.023000
8  0.055000

Section 3.2.3: Effect size = 0.1

compare_policies(alpha,effect_size = 0.1,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric        UR UR_multiarm        TS TS_multiarm TS_epsilon
1   Average Power  0.188000    0.140000  0.203000    0.138000   0.191000
2 Expected Reward  0.499938    0.499857  0.512484    0.506353   0.509848
3  Average Reward  0.500700    0.500040  0.510240    0.507060   0.507840
4      Average AP  0.499380    0.330900  0.624840    0.402660   0.598480
5    Empirical CV 14.000000   17.000000 20.000000   20.000000  18.000000
6       Normal CV 13.778909   16.730109 21.756543   23.722987  19.582023
7   Empirical FPR  0.021000    0.014000  0.000000    0.000000   0.041000
8      Normal FPR  0.064000    0.055000  0.000000    0.000000   0.011000
      TSPDD
1  0.205000
2  0.509524
3  0.509780
4  0.595240
5 18.000000
6 18.339351
7  0.037000
8  0.037000

Section 3.2.4: Effect size = 0.2

compare_policies(alpha,effect_size = 0.2,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric        UR UR_multiarm        TS TS_multiarm TS_epsilon
1   Average Power  0.408000    0.277000  0.387000     0.27200    0.38700
2 Expected Reward  0.500244    0.500114  0.542412     0.52615    0.53836
3  Average Reward  0.501360    0.499720  0.539640     0.52584    0.54148
4      Average AP  0.501220    0.335100  0.712060     0.47604    0.69180
5    Empirical CV 14.000000   17.000000 20.000000    20.00000   19.00000
6       Normal CV 13.571623   16.875222 22.481174    23.83220   20.56034
7   Empirical FPR  0.016000    0.017000  0.000000     0.00000    0.01600
8      Normal FPR  0.050000    0.059000  0.000000     0.00000    0.00000
      TSPDD
1  0.391000
2  0.535696
3  0.537400
4  0.678480
5 19.000000
6 20.283181
7  0.042000
8  0.000000

Section 3.2.5: Effect size = 0.3

compare_policies(alpha,effect_size = 0.3,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric       UR UR_multiarm        TS TS_multiarm TS_epsilon
1   Average Power  0.73800    0.449000  0.618000    0.457000   0.606000
2 Expected Reward  0.49973    0.500216  0.583898    0.557015   0.575078
3  Average Reward  0.50018    0.503540  0.586200    0.556320   0.573440
4      Average AP  0.49910    0.333200  0.779660    0.550680   0.750260
5    Empirical CV 14.00000   17.000000 20.000000   20.000000  19.000000
6       Normal CV 13.56317   16.854308 22.739797   23.675730  20.896212
7   Empirical FPR  0.02000    0.018000  0.000000    0.000000   0.028000
8      Normal FPR  0.05700    0.067000  0.000000    0.000000   0.000000
      TSPDD
1  0.650000
2  0.574316
3  0.574300
4  0.747720
5 20.000000
6 21.368103
7  0.000000
8  0.000000

Section 3.2.6: Effect size = 0.4

compare_policies(alpha,effect_size = 0.4,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric        UR UR_multiarm        TS TS_multiarm TS_epsilon
1   Average Power  0.913000    0.664000  0.803000     0.62600   0.851000
2 Expected Reward  0.499816    0.499188  0.630984     0.59500   0.621184
3  Average Reward  0.501280    0.501200  0.627320     0.59436   0.620800
4      Average AP  0.499540    0.331560  0.827460     0.61090   0.802960
5    Empirical CV 14.000000   17.000000 20.000000    20.00000  20.000000
6       Normal CV 13.709091   16.700920 22.136119    23.12041  20.609016
7   Empirical FPR  0.025000    0.012000  0.000000     0.00000   0.000000
8      Normal FPR  0.057000    0.055000  0.000000     0.00000   0.000000
      TSPDD
1  0.848000
2  0.622296
3  0.619220
4  0.805740
5 20.000000
6 21.992025
7  0.000000
8  0.000000

Section 3.2.7: Effect size = 0.5

compare_policies(alpha,effect_size = 0.5,n,B,burnin,batch,epsilon=0.1,c=0.1)
           Metric       UR UR_multiarm       TS TS_multiarm TS_epsilon    TSPDD
1   Average Power  0.98600     0.80500  0.92500     0.76800    0.93000  0.96200
2 Expected Reward  0.49964     0.50029  0.67961     0.64109    0.66423  0.66974
3  Average Reward  0.50092     0.50270  0.68092     0.64322    0.66510  0.66894
4      Average AP  0.49928     0.33576  0.85922     0.67514    0.82846  0.83948
5    Empirical CV 13.00000    17.00000 20.00000    20.00000   20.00000 20.00000
6       Normal CV 13.68636    16.86853 21.79609    22.64537   20.27195 21.75946
7   Empirical FPR  0.05000     0.02100  0.00000     0.00000    0.00000  0.00000
8      Normal FPR  0.05000     0.06600  0.00000     0.00000    0.00000  0.00000

Section 3.2.8: Summary Table for sample size = 50, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:

n<-50

effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)

n_50 <- compare_effect_sizes(alpha,effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)

Section 3.3: Sample size n=100, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:

n <- 100

effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)

n_100 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)

Section 3.4: Sample size n=200, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:

n <- 200

effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)

n_200 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)

Section 3.5: Sample size n=300, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:

n <- 300

effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)

n_300 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)

Section 3.6: joining the table in a long format

# Step 1: renaming the row names with corresponding sample size
rownames(n_50) <- paste(rownames(n_50), "(n=50)")
rownames(n_100) <- paste(rownames(n_100), "(n=100)")
rownames(n_200) <- paste(rownames(n_200), "(n=200)")
rownames(n_300) <- paste(rownames(n_300), "(n=300)")
library(stringr) # need to install this pacakage if needed
# Step 2: Concatenate these tables vertically
merged_table <- rbind(n_50, n_100, n_200, n_300)

# Separate metrics and sample sizes from the row names
metrics <- gsub("\\(.*", "", rownames(merged_table))
  sample_sizes <- gsub(".*\\(", "", gsub("\\).*", "", rownames(merged_table)))
  
  # Create a data.frame to hold the metrics, sample sizes, and row indices
ordering_df <- data.frame(
    Metric = metrics,
    Sample_Size = sample_sizes,
    Row_Index = 1:nrow(merged_table))
ordering_df$Sample_Size <- as.numeric(str_extract(sample_sizes, "\\d+"))
  
# Order the data.frame first by Metric, then by Sample_Size
ordering_df <- ordering_df[order(ordering_df$Metric, ordering_df$Sample_Size), ]

# Rearrange the rows of merged_table based on the ordered row indices
merged_table <- merged_table[ordering_df$Row_Index, ]

Section 3.7: Plotting the trend plot with respect to effect size for each metric

Section 3.7.0 Defining the reward graphing tool:

library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)
#' graphing_tool - graphical analysis on different metrics like reward, power, or allocation probabilities
#'
#' Takes the below input

#' @param merged_table - merged tables on above sample sizes
#' @metric the metric we want to see grpahical interpretation
#' @policies the polcies we want to investigate on
#' @return the graph of the trend for selected policis 
graphing_tool <- function(merged_table,
                          metric, 
                          policies = c()) {
  # Step 1: Filter the merged_table
  filtered_data <- merged_table |> filter(Metric == metric)
  
  # Step 2.1 Creating a helper function for splitting the data:
  split_and_convert <- function(value) {
    as.numeric(str_split(value, " \\| ")[[1]])
  }
  
  # Step 2.2: Split the values in the Value column into a numeric vector
  metric_values_UR <- c(sapply(filtered_data$UR, split_and_convert))
  metric_values_UR_multiarm <- c(sapply(filtered_data$UR_multiarm, split_and_convert))
  metric_values_TS <- c(sapply(filtered_data$TS, split_and_convert))
  metric_values_TS_multiarm <- c(sapply(filtered_data$TS_multiarm, split_and_convert))
  metric_values_TS_epsilon <- c(sapply(filtered_data$TS_epsilon, split_and_convert))
  metric_values_TSPDD <- c(sapply(filtered_data$TSPDD, split_and_convert))
  
  
  
  # Step 3: Create a data frame for plotting
  plot_data <- data.frame(
    expand_grid(
      Policy = c("UR", "UR_multiarm", "TS", "TS_multiarm", "TS_epsilon", "TSPDD"),
      Sample_size = c(50, 100, 200, 300),
      Effect_Size = effect_sizes
    ) %>% bind_cols(
      Metric = c(
        metric_values_UR,
        metric_values_UR_multiarm,
        metric_values_TS,
        metric_values_TS_multiarm,
        metric_values_TS_epsilon,
        metric_values_TSPDD
      )
    )
  )
  
  # Step 4: Use ggplot2 to plot the trend
  p <- ggplot(plot_data %>% filter(Policy %in% policies),
              aes(x = Effect_Size, y = Metric, color = Policy)) +
    geom_point(size = 0.5, alpha = 0.7) + geom_line(alpha = 0.7) +
    facet_wrap( ~ Sample_size) +
    labs(title = paste("Trend of ", metric ," across Effect Sizes and Sample Sizes"),
         x = "Effect Size",
         y = metric) +
    theme_minimal()
  
  return(p)
}

Section 3.7.1 For Reward:

graphing_tool(merged_table,
              'Average Reward',
              c('UR_multiarm', "TS_multiarm"))

Section 3.7.2 For Power:

graphing_tool(merged_table,
              'Power',
              c('UR_multiarm',
                "TS_multiarm"))