#' UR-Uniform Random Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the UR algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
UR <- function(pa, pb, n, burnin, batch) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic over the time, here we should be expecting this to be around 50% of the sample size n.
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
# note: would be the same here burnin and the actual policy as the policy we are simulating is UR, just for style keeping :) (sorry OCD)
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- UR Period: use Uniform Random (UR) sampling only ----------
for (i in (burnin+1):n) {
m = i %% batch
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
if(m == 0){
APT = APT + 1
}
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb) # Tracking of the changes of allocation probability over the time, it should stabilize at a 50% when sample size gets large enough
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}AP test application
Section 1 Policy Introductions
Section 1.1 Running Traditional UR
Section 1.2 Running Traditional TS
#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
TS <- function(pa, pb, n, burnin, batch) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if(draw_b > draw_a){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
# Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 1.3 Running Epsilon greedy TS
#' Epsilon-TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs epsilon greedy TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param epsilon - this is the epsilon (or proportion) we defined previous to simulations at a certain level for UR sampling within the simulations
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
TS_epsilon <- function(pa, pb, n, burnin, batch, epsilon) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
if (runif(1) < epsilon) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
}
# Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
else {
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if (draw_b > draw_a){
APT = APT +1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
}
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 1.4 Running TS-PostDiff (Code adapted from Tong)
#' TS-PostDiff - Allocation Probability Test
#'
#' Runs TS-PostDiff algorithm in two-armed case, for n batches, with given batch size and BURNIN period, to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of arm a (typically unknown in practice)
#' @param pb - actual prior probability of arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param c - threshold of difference in expected reward between the two arms
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS-PostDiff algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated
#'
#' @return vector containing WaldScore[n], reward[n], APT_1, APT_2: the final waldscore, reward, and AP Test statistics for both arms
TSPDD <- function(pa, pb, n, burnin, batch, c) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
if (abs(draw_a - draw_b) < c) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
}
# Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
else {
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if (draw_b > draw_a){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
}
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
AP <- nb/(na+nb)
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 2: Finding Critical Value
Section 2.1 Finding Critical Values, Using Normal Assumption on Law of Large Numbers:
#' Critical Value (Normal)
#'
#' Finds critical value using normal assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#'
#' @return generated critical value from normality assumption
normal_cv <- function(APT, alpha) {
m <- mean(APT)
v <- var(APT)
sd <- sqrt(v)
normal_rej <- qnorm(1 - alpha) #Rejection area defined (following normal distribution assumption)
cv <- normal_rej * sd + m
return(cv)
}Section 2.2 Finding Critical Values, Using Empirical Approach
#' Critical Value (Empirical)
#'
#' Finds critical value using empirical assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#'
#' @return generated critical value from empirical simulations
empirical_cv <- function(APT, alpha) {
cdf = 0
i = 0
# Repeatedly add the empirical probabilities of each [i] value until cdf reaches 1 - significance level
while (cdf <= 1- alpha) {
p_i <-sum(APT == i)/length(APT)
cdf <- sum(cdf, p_i)
i = i + 1
}
return(i - 1)
}Section 2.3 Replicating and rerun the simulations for B replications
# One output for each simulation, the four dimensions are waldscore, average reward, APT_ban_UR, and APT_w_UR
simulation_n_times <- function(alpha,policy, effect_size, n, B, burnin, batch, ...) {
pa <- 0.5 - effect_size / 2 # define the corresponding priors of the success rate centred and scaled at p1+p2 = 1
pb <- 0.5 + effect_size / 2
results <- array(dim = c(B, 5)) # output a four dimensional arrage
# number of simulations
for (i in 1:B) {
results[i,] <- policy(
pa = pa,
pb = pb,
n = n,
burnin = burnin,
batch = batch,
...
)
}
average_power = mean(results[, 1] > qnorm(1-alpha)) # Find average power based on waldscore
expected_reward = mean(results[, 2]) # expected reward setting
average_reward = mean(results[,3]) # calculate the actual real reward throughout many trials
average_AP = mean(results[,4]) # find average allocation probability for the experimental arm
APT = results[,5]
# Empirical and normal CV of APT
em_cv <- empirical_cv(APT,alpha)
norm_cv <- normal_cv(APT,alpha)
F1 <- ecdf(APT)
# Empirical and normal FPR of APT is
em_FPR <- 1-F1(em_cv)
norm_FPR <- 1-F1(floor(norm_cv))
return(list(average_power = average_power,
expected_reward = expected_reward,
average_reward = average_reward,
average_AP = average_AP,
em_cv = em_cv,
norm_cv = norm_cv,
em_FPR = em_FPR,
norm_FPR = norm_FPR))
}Section 2.4: Construction of the reporting table:
# it takes in the same parameter as in before and output a comparison dataframe across different policies.
compare_policies <-
function(alpha,
effect_size,
n,
B,
burnin,
batch,
epsilon,
c) {
UR_result <-
simulation_n_times(alpha, UR, effect_size, n, B, burnin, batch)
TS_result <-
simulation_n_times(alpha, TS, effect_size, n, B, burnin, batch)
TS_epsilon_result <-
simulation_n_times(alpha, TS_epsilon, effect_size, n, B, burnin, batch, epsilon = 0.1)
TSPDD_result <-
simulation_n_times(alpha, TSPDD, effect_size, n, B, burnin, batch, c = 0.1)
# Create a summary table
summary_table <- data.frame(
Metric = c(
"Average Power",
"Expected Reward",
"Average Reward",
"Average AP",
"Empirical CV",
"Normal CV",
"Empirical FPR",
"Normal FPR"
),
UR = c(
UR_result$average_power,
UR_result$expected_reward,
UR_result$average_reward,
UR_result$average_AP,
UR_result$em_cv,
UR_result$norm_cv,
UR_result$em_FPR,
UR_result$norm_FPR
),
TS = c(
TS_result$average_power,
TS_result$expected_reward,
TS_result$average_reward,
TS_result$average_AP,
TS_result$em_cv,
TS_result$norm_cv,
TS_result$em_FPR,
TS_result$norm_FPR
),
TS_epsilon = c(
TS_epsilon_result$average_power,
TS_epsilon_result$expected_reward,
TS_epsilon_result$average_reward,
TS_epsilon_result$average_AP,
TS_epsilon_result$em_cv,
TS_epsilon_result$norm_cv,
TS_epsilon_result$em_FPR,
TS_epsilon_result$norm_FPR
),
TSPDD = c(
TSPDD_result$average_power,
TSPDD_result$expected_reward,
TSPDD_result$average_reward,
TSPDD_result$average_AP,
TSPDD_result$em_cv,
TSPDD_result$norm_cv,
TSPDD_result$em_FPR,
TSPDD_result$norm_FPR
)
)
return(summary_table)
}Section 3: Simulations:
Section 3.1: Global Settings
In this section we will provide some unified parameters across different simulations:
# let's make this a function
set.seed(0) # for results to be replicable
#calculate power and reward in case p0 < p1
alpha <- 0.05 # rejection region
effect_size <- 0 # by default effect size is 0: no difference between arms | effect size is 0.05-0.1: Intervention is of relatively small or marginal effect | effect size is 0.1-0.2: Medium or smal effect | effect size is 0.2-0.3+: relatively large effectt between interventions
n <- 100 # sample_size(total number of trials in one simulation)
burnin <- 0.1 * n # make by default the burin size is 10% of the total sample size that we have
B <- 1000 # number of simulations
batch <- 2 # batch_sizeSection 3.2: Effect size = 0.0
compare_policies(alpha,effect_size = 0,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR TS TS_epsilon TSPDD
1 Average Power 0.05400 0.09700 0.07900 0.09700
2 Expected Reward 0.50000 0.50000 0.50000 0.50000
3 Average Reward 0.49853 0.50091 0.50173 0.50078
4 Average AP 0.50277 0.49996 0.50253 0.50390
5 Empirical CV 28.00000 42.00000 38.00000 37.00000
6 Normal CV 28.30240 43.58928 38.57921 33.40849
7 Empirical FPR 0.04800 0.05000 0.04400 0.04900
8 Normal FPR 0.04800 0.02800 0.04400 0.08100
Section 3.2: Effect size = 0.05
compare_policies(alpha,effect_size = 0.05,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR TS TS_epsilon TSPDD
1 Average Power 0.112000 0.1890000 0.13800 0.1780000
2 Expected Reward 0.499968 0.5042665 0.50372 0.5035515
3 Average Reward 0.499510 0.5055200 0.50163 0.5027600
4 Average AP 0.499360 0.5853300 0.57440 0.5710300
5 Empirical CV 28.000000 43.0000000 39.00000 38.0000000
6 Normal CV 27.674925 47.1424816 41.70846 36.7797006
7 Empirical FPR 0.027000 0.0430000 0.04700 0.0490000
8 Normal FPR 0.052000 0.0000000 0.01200 0.0750000
Section 3.3: Effect size = 0.1
compare_policies(alpha,effect_size = 0.1,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR TS TS_epsilon TSPDD
1 Average Power 0.299000 0.296000 0.244000 0.293000
2 Expected Reward 0.500227 0.516966 0.514616 0.513474
3 Average Reward 0.499430 0.516410 0.511840 0.512120
4 Average AP 0.502270 0.669660 0.646160 0.634740
5 Empirical CV 28.000000 44.000000 40.000000 41.000000
6 Normal CV 27.954363 49.312830 43.780939 40.982710
7 Empirical FPR 0.041000 0.030000 0.040000 0.045000
8 Normal FPR 0.068000 0.000000 0.002000 0.052000
Section 3.4: Effect size = 0.2
compare_policies(alpha,effect_size = 0.2,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR TS TS_epsilon TSPDD
1 Average Power 0.643000 0.584000 0.585000 0.611000
2 Expected Reward 0.499858 0.558064 0.552914 0.549406
3 Average Reward 0.502580 0.559750 0.551750 0.547950
4 Average AP 0.499290 0.790320 0.764570 0.747030
5 Empirical CV 28.000000 45.000000 42.000000 44.000000
6 Normal CV 27.973850 50.120082 45.636924 45.798869
7 Empirical FPR 0.036000 0.000000 0.021000 0.021000
8 Normal FPR 0.066000 0.000000 0.000000 0.000000