AP test application

Author

Fred Song

Published

August 18, 2023

Section 1 Policy Introductions

Section 1.1 Running Traditional UR

#' UR-Uniform Random Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the UR algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 

UR <- function(pa, pb, n, burnin, batch) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
  APT = 0 # AP Test statistic over the time, here we should be expecting this to be around 50% of the sample size n.
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  # note: would be the same here burnin and the actual policy as the policy we are simulating is UR, just for style keeping :) (sorry OCD)
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
    }
  
  # ---------- UR Period: use Uniform Random (UR) sampling only ----------
  
  
  for (i in (burnin+1):n) {
    
    m = i %% batch 

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      if(m == 0){
        APT = APT + 1
      }
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb) # Tracking of the changes of allocation probability over the time, it should stabilize at a 50% when sample size gets large enough

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.2 Running Traditional TS

#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method

TS <- function(pa, pb, n, burnin, batch) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # CASE 1: Posterior probability of arms a & b is updated
    if (m == 0) {
      draw_a <-
        rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1) 
      draw_b <-
        rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1) 
      if(draw_b > draw_a){
        APT = APT + 1
      }
    }
    
    # CASE 2: Posterior probability of arms a & b is not updated
    else {
      draw_a <-
        rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1) 
      draw_b <-
        rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
        
    # Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.3 Running Epsilon greedy TS

#' Epsilon-TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs epsilon greedy TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param epsilon - this is the epsilon (or proportion) we defined previous to simulations at a certain level for UR sampling within the simulations
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms, 
# or per say the CARA-FLGI method
TS_epsilon <- function(pa, pb, n, burnin, batch, epsilon) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0 # AP Test statistic
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm

    # Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
    if (runif(1) < epsilon) {
      draw_a <- runif(1) # Random probability of arm a being chosen
      draw_b <- runif(1) # Random probability of arm b being chosen
    } 
    
    # Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
    else {
      # CASE 1: Posterior probability of arms a & b is updated
      if (m == 0) {
        draw_a <-
          rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
        if (draw_b > draw_a){
          APT = APT +1
        }
      }
      # CASE 2: Posterior probability of arms a & b is not updated
      else {
        draw_a <-
          rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
      }
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures
  AP <- nb/(na+nb)

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 1.4 Running TS-PostDiff (Code adapted from Tong)

#' TS-PostDiff - Allocation Probability Test
#'
#' Runs TS-PostDiff algorithm in two-armed case, for n batches, with given batch size and BURNIN period, to calculate an AP Test statistic for each arm

#' @param pa - actual prior probability of arm a (typically unknown in practice)
#' @param pb - actual prior probability of arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation 
#' @param c - threshold of difference in expected reward between the two arms
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS-PostDiff algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated
#'
#' @return vector containing WaldScore[n], reward[n], APT_1, APT_2: the final waldscore, reward, and AP Test statistics for both arms

TSPDD <- function(pa, pb, n, burnin, batch, c) {

  # List: tracking successes and failures under each arm
  # ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
  arm_a_successes <- c(0)
  arm_a_failures <- c(0)
  arm_b_successes <- c(0)
  arm_b_failures <- c(0)
  
  # List: tracking only the current number of successes and failures (Value changes for each iteration)
  arm_a_s <- 0
  arm_a_f <- 0
  arm_b_s <- 0
  arm_b_f <- 0

  # History that tracks if there was success in either arm for each trial (Variable not used in final results)
  # ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
  abl <- c()
 
  APT = 0 # AP Test statistic
  
  # ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
  
  for (i in 1:burnin) {

    draw_a <- runif(1) # Random probability of arm a being chosen
    draw_b <- runif(1) # Random probability of arm b being chosen

    # CASE 1: Arm a is chosen
    if (draw_a > draw_b) {
      # Arm a was successful  
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } 
      # Arm a was not successful
      else {       
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    }

    # CASE 2: Arm b is chosen
    else {
      # Arm b was successful
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 # update arm b success count
      }
      # Arm b was not successful
      else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 # update arm b failure count
      }
    }

    # Tracking the build-up of total number of successes, failures, for arm a & b
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f 
  }
  
  # ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
  
  for (i in (burnin + 1):n) {

    # Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
    m = i %% batch 
    
    # Draw from Beta(alpha, beta) distribution, using only the current success & failure counts 
    # alpha = current successes, beta = current failures of each arm
    # Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
    if (abs(draw_a - draw_b) < c) {
      draw_a <- runif(1) # Random probability of arm a being chosen
      draw_b <- runif(1) # Random probability of arm b being chosen
    } 
    
    # Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
    else {
      # CASE 1: Posterior probability of arms a & b is updated
      if (m == 0) {
        draw_a <-
          rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
        if (draw_b > draw_a){
          APT = APT + 1
        }
      }
      # CASE 2: Posterior probability of arms a & b is not updated
      else {
        draw_a <-
          rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
        draw_b <-
          rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
      }
    }

    # Update arm a & b history (abl), successes, and failures
    if (draw_a > draw_b) {
      if (runif(1) < pa) {
        abl[i] <- 1
        arm_a_s <- arm_a_s + 1 # update arm a success count
      } else {
        abl[i] <- 0
        arm_a_f <- arm_a_f + 1 # update arm a failure count
      }
    } else{
      if (runif(1) < pb) {
        abl[i] <- 1
        arm_b_s <- arm_b_s + 1 #update arm b success count
      } else {
        abl[i] <- 0
        arm_b_f <- arm_b_f + 1 #update arm b failure count
      }
    }  
    
    # Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
    arm_a_successes[i] <- arm_a_s
    arm_a_failures[i] <- arm_a_f
    arm_b_successes[i] <- arm_b_s
    arm_b_failures[i] <- arm_b_f
  }

  # List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
  na <- arm_a_successes + arm_a_failures
  nb <- arm_b_successes + arm_b_failures

  # List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
  pa_est <- arm_a_successes / na
  pb_est <- arm_b_successes / nb
  AP <- nb/(na+nb)

  # Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
  WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
  
  # List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
  expected_reward <- (na[n] * pa + nb[n] * pb) / n
  # Lists out the average expected reward for each individual trial (final index, final expected reward value)
  actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
  
  #return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
  return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}

Section 2: Finding Critical Value

Section 2.1 Finding Critical Values, Using Normal Assumption on Law of Large Numbers:

#' Critical Value (Normal)
#'
#' Finds critical value using normal assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#' 
#' @return generated critical value from normality assumption

normal_cv <- function(APT, alpha) {
  m <- mean(APT) 
  v <- var(APT)
  sd <- sqrt(v)
  normal_rej <- qnorm(1 - alpha) #Rejection area defined (following normal distribution assumption)
  cv <- normal_rej * sd + m
  return(cv)
}

Section 2.2 Finding Critical Values, Using Empirical Approach

#' Critical Value (Empirical)
#'
#' Finds critical value using empirical assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#' 
#' @return generated critical value from empirical simulations

empirical_cv <- function(APT, alpha) {
  cdf = 0
  i = 0 
  # Repeatedly add the empirical probabilities of each [i] value until cdf reaches 1 - significance level
  while (cdf <= 1- alpha) {
    p_i <-sum(APT == i)/length(APT)
    cdf <- sum(cdf, p_i)
    i = i + 1
  }
  return(i - 1) 
}

Section 2.3 Replicating and rerun the simulations for B replications

# One output for each simulation, the four dimensions are waldscore, average reward, APT_ban_UR, and APT_w_UR

simulation_n_times <- function(alpha,policy, effect_size, n, B, burnin, batch, ...) {
  pa <- 0.5 - effect_size / 2 # define the corresponding priors of the success rate centred and scaled at p1+p2 = 1
  pb <- 0.5 + effect_size / 2
  results <- array(dim = c(B, 5)) # output a four dimensional arrage
  # number of simulations
  for (i in 1:B) {
    results[i,] <- policy(
      pa = pa,
      pb = pb,
      n = n,
      burnin = burnin,
      batch = batch,
      ...
      )
  }
  average_power = mean(results[, 1] > qnorm(1-alpha)) # Find average power based on waldscore
  
  expected_reward = mean(results[, 2]) # expected reward setting
  average_reward = mean(results[,3]) # calculate the actual real reward throughout many trials
  average_AP = mean(results[,4]) # find average allocation probability for the experimental arm
  APT = results[,5]
  # Empirical and normal CV of APT
  em_cv <- empirical_cv(APT,alpha)
  norm_cv <- normal_cv(APT,alpha)
  F1 <- ecdf(APT)
  # Empirical and normal FPR of APT is
  em_FPR <- 1-F1(em_cv)
  norm_FPR <- 1-F1(floor(norm_cv))
  return(list(average_power = average_power, 
              expected_reward = expected_reward,
              average_reward = average_reward, 
              average_AP = average_AP,
              em_cv = em_cv,
              norm_cv = norm_cv,
              em_FPR = em_FPR,
              norm_FPR = norm_FPR))
}

Section 2.4: Construction of the reporting table:

# it takes in the same parameter as in before and output a comparison dataframe across different policies.
compare_policies <-
  function(alpha,
           effect_size,
           n,
           B,
           burnin,
           batch,
           epsilon,
           c) {
    UR_result <-
      simulation_n_times(alpha, UR, effect_size, n, B, burnin, batch)
    TS_result <-
      simulation_n_times(alpha, TS, effect_size, n, B, burnin, batch)
    TS_epsilon_result <-
      simulation_n_times(alpha, TS_epsilon, effect_size, n, B, burnin, batch, epsilon = 0.1)
    TSPDD_result <-
      simulation_n_times(alpha, TSPDD, effect_size, n, B, burnin, batch, c = 0.1)
    
    
    # Create a summary table
    summary_table <- data.frame(
      Metric = c(
        "Average Power",
        "Expected Reward",
        "Average Reward",
        "Average AP",
        "Empirical CV",
        "Normal CV",
        "Empirical FPR",
        "Normal FPR"
      ),
      UR = c(
        UR_result$average_power,
        UR_result$expected_reward,
        UR_result$average_reward,
        UR_result$average_AP,
        UR_result$em_cv,
        UR_result$norm_cv,
        UR_result$em_FPR,
        UR_result$norm_FPR
      ),
      TS = c(
        TS_result$average_power,
        TS_result$expected_reward,
        TS_result$average_reward,
        TS_result$average_AP,
        TS_result$em_cv,
        TS_result$norm_cv,
        TS_result$em_FPR,
        TS_result$norm_FPR
      ),
      TS_epsilon = c(
        TS_epsilon_result$average_power,
        TS_epsilon_result$expected_reward,
        TS_epsilon_result$average_reward,
        TS_epsilon_result$average_AP,
        TS_epsilon_result$em_cv,
        TS_epsilon_result$norm_cv,
        TS_epsilon_result$em_FPR,
        TS_epsilon_result$norm_FPR
      ),
      TSPDD = c(
        TSPDD_result$average_power,
        TSPDD_result$expected_reward,
        TSPDD_result$average_reward,
        TSPDD_result$average_AP,
        TSPDD_result$em_cv,
        TSPDD_result$norm_cv,
        TSPDD_result$em_FPR,
        TSPDD_result$norm_FPR
      )
    )
    
    return(summary_table)
}

Section 3: Simulations:

Section 3.1: Global Settings

In this section we will provide some unified parameters across different simulations:

# let's make this a function
set.seed(0) # for results to be replicable
#calculate power and reward in case p0 < p1
alpha <- 0.05 # rejection region
effect_size <- 0 # by default effect size is 0: no difference between arms | effect size is 0.05-0.1: Intervention is of relatively small or marginal effect | effect size is 0.1-0.2: Medium or smal effect | effect size is 0.2-0.3+: relatively large effectt between interventions
n <- 100 # sample_size(total number of trials in one simulation)
burnin <- 0.1 * n # make by default the burin size is 10% of the total sample size that we have
B <- 1000 # number of simulations
batch <-  2 # batch_size

Section 3.2: Effect size = 0.0

compare_policies(alpha,effect_size = 0,n,B,burnin,batch,epsilon=0.1,c=0.1)

           Metric       UR       TS TS_epsilon    TSPDD
1   Average Power  0.05400  0.09700    0.07900  0.09700
2 Expected Reward  0.50000  0.50000    0.50000  0.50000
3  Average Reward  0.49853  0.50091    0.50173  0.50078
4      Average AP  0.50277  0.49996    0.50253  0.50390
5    Empirical CV 28.00000 42.00000   38.00000 37.00000
6       Normal CV 28.30240 43.58928   38.57921 33.40849
7   Empirical FPR  0.04800  0.05000    0.04400  0.04900
8      Normal FPR  0.04800  0.02800    0.04400  0.08100

Section 3.2: Effect size = 0.05

compare_policies(alpha,effect_size = 0.05,n,B,burnin,batch,epsilon=0.1,c=0.1)

           Metric        UR         TS TS_epsilon      TSPDD
1   Average Power  0.112000  0.1890000    0.13800  0.1780000
2 Expected Reward  0.499968  0.5042665    0.50372  0.5035515
3  Average Reward  0.499510  0.5055200    0.50163  0.5027600
4      Average AP  0.499360  0.5853300    0.57440  0.5710300
5    Empirical CV 28.000000 43.0000000   39.00000 38.0000000
6       Normal CV 27.674925 47.1424816   41.70846 36.7797006
7   Empirical FPR  0.027000  0.0430000    0.04700  0.0490000
8      Normal FPR  0.052000  0.0000000    0.01200  0.0750000

Section 3.3: Effect size = 0.1

compare_policies(alpha,effect_size = 0.1,n,B,burnin,batch,epsilon=0.1,c=0.1)

           Metric        UR        TS TS_epsilon     TSPDD
1   Average Power  0.299000  0.296000   0.244000  0.293000
2 Expected Reward  0.500227  0.516966   0.514616  0.513474
3  Average Reward  0.499430  0.516410   0.511840  0.512120
4      Average AP  0.502270  0.669660   0.646160  0.634740
5    Empirical CV 28.000000 44.000000  40.000000 41.000000
6       Normal CV 27.954363 49.312830  43.780939 40.982710
7   Empirical FPR  0.041000  0.030000   0.040000  0.045000
8      Normal FPR  0.068000  0.000000   0.002000  0.052000

Section 3.4: Effect size = 0.2

compare_policies(alpha,effect_size = 0.2,n,B,burnin,batch,epsilon=0.1,c=0.1)

           Metric        UR        TS TS_epsilon     TSPDD
1   Average Power  0.643000  0.584000   0.585000  0.611000
2 Expected Reward  0.499858  0.558064   0.552914  0.549406
3  Average Reward  0.502580  0.559750   0.551750  0.547950
4      Average AP  0.499290  0.790320   0.764570  0.747030
5    Empirical CV 28.000000 45.000000  42.000000 44.000000
6       Normal CV 27.973850 50.120082  45.636924 45.798869
7   Empirical FPR  0.036000  0.000000   0.021000  0.021000
8      Normal FPR  0.066000  0.000000   0.000000  0.000000