# todo add induced-wald-z test: it also goes with the distribution
#' UR-Uniform Random Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the UR algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
UR <- function(pa, pb, n, burnin, batch) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic over the time, here we should be expecting this to be around 50% of the sample size n.
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
# note: would be the same here burnin and the actual policy as the policy we are simulating is UR, just for style keeping :) (sorry OCD)
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- UR Period: use Uniform Random (UR) sampling only ----------
for (i in (burnin+1):n) {
m = i %% batch
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
if(m == 0){
APT = APT + 1
}
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb) # Tracking of the changes of allocation probability over the time, it should stabilize at a 50% when sample size gets large enough
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}AP test application
Section 1 Policy Introductions
Section 1.1 Running Traditional UR
Section 1.1.1: Two-arm Uniform Random Sampling
Section 1.1.2: multi-arm Uniform Random
#' UR Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs UR algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param priors this is a list c() containing the success rate of each arm c(pa, pb, pc,...)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
UR_multiarm <- function(priors, n, burnin, batch) {
# for an n arm setting, let's calculate the number of arms first
num_arms <- length(priors)
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_successes <- array(0, dim=c(num_arms, n))
arm_failures <- array(0, dim=c(num_arms, n))
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_s <- rep(0, num_arms)
arm_f <- rep(0, num_arms)
# still keeping track of the allocation probability test statistcs under a multi-arm case
APT = 0
# keep the same abl list for record keeping, to see how many successes and failures along the way
abl <- c()
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_arms <- runif(num_arms) # randomly drawing one arm out of all arms
arm_chosen <- which.max(draw_arms) # settle down which arm to pick
if(runif(1) < priors[arm_chosen]){
abl[i] <- 1
arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # update arm a success count
}
else{
abl[i] <- 0
arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # update arm a failure count
}
# Tracking the build-up of total number of successes, failures, for all the arms:
arm_successes[ , i] <- arm_s
arm_failures[ , i] <- arm_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_arms <- runif(num_arms)
# purely uniform sampling
# assuming the last arm in the prior list is always the experimental arm and we are to determine if it is a superior treatment compare to all other arms
if(draw_arms[num_arms] > 1/num_arms){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_arms <- runif(num_arms)
# purely uniform random sampling
}
# settle the chosen arm:
arm_chosen <- which.max(draw_arms) # Settle down which arm to pick based on the highest sampled posterior room
# Update arms history (abl), successes, and failures
if (runif(1) < priors[arm_chosen]) {
abl[i] <- 1
arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # Update success count for chosen arm
} else {
abl[i] <- 0
arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # Update failure count for chosen arm
}
# Tracking the build-up of total number of successes, failures, for all arms, adding onto successes and failures from burnin period
arm_successes[, i] <- arm_s
arm_failures[, i] <- arm_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
n_total <- arm_successes + arm_failures
AP <- n_total[num_arms, ]/colSums(n_total)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
p_est <- arm_successes / n_total
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
# we are calculating the Z test for whether or not the last arm is significantly better than the average of the other arms
# Get the proportions of successes for the last arm and the other arms
p_last_arm <- p_est[num_arms, n]
# Get the sample sizes for the last arm and the other arms
n_last_arm <- n_total[num_arms, n]
n_other_arms <- n_total[-num_arms, n]
# Calculate the weighted average success rate for the other arms
p_other_arms <- sum(n_other_arms * p_est[-num_arms, n]) / sum(n_other_arms)
# Calculate the pooled proportion of successes
p_pooled <- (n_last_arm * p_last_arm + sum(n_other_arms) * p_other_arms) / (n_last_arm + sum(n_other_arms))
# Calculate the Z-score
WaldScore <- (p_last_arm - p_other_arms) / sqrt(p_pooled * (1 - p_pooled) * (1/n_last_arm + 1/sum(n_other_arms)))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- sum(n_total[, n] * priors) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- sum(arm_successes[, n]) / n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore, expected_reward, actual_reward, AP[n], APT))
}Section 1.2 Running Traditional TS
Section 1.2.1: 2-arm Traditional TS
#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
TS <- function(pa, pb, n, burnin, batch) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if(draw_b > draw_a){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
# Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
# WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 1.2.2: multi-arm Traditional TS
#' TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param priors this is a list c() containing the success rate of each arm c(pa, pb, pc,...)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#'
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
TS_multiarm <- function(priors, n, burnin, batch) {
# for an n arm setting, let's calculate the number of arms first
num_arms <- length(priors)
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_successes <- array(0, dim=c(num_arms, n))
arm_failures <- array(0, dim=c(num_arms, n))
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_s <- rep(0, num_arms)
arm_f <- rep(0, num_arms)
# still keeping track of the allocation probability test statistcs under a multi-arm case
APT = 0
# keep the same abl list for record keeping, to see how many successes and failures along the way
abl <- c()
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_arms <- runif(num_arms) # randomly drawing one arm out of all arms
arm_chosen <- which.max(draw_arms) # settle down which arm to pick
if(runif(1) < priors[arm_chosen]){
abl[i] <- 1
arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # update arm a success count
}
else{
abl[i] <- 0
arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # update arm a failure count
}
# Tracking the build-up of total number of successes, failures, for all the arms:
arm_successes[ , i] <- arm_s
arm_failures[ , i] <- arm_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_arms <- rbeta(num_arms, arm_successes[, i - 1] + 1, arm_failures[, i - 1] + 1)
# assuming the last arm in the prior list is always the experimental arm and we are to determine if it is a superior treatment compare to all other arms
if(draw_arms[num_arms] > 1/num_arms){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_arms <- rbeta(num_arms, arm_successes[, i - m] + 1, arm_failures[, i - m] + 1)
# Using index [i - m] shows we are using the posterior probabilities from the previous batch, and not our most current success/failure counts
}
# settle the chosen arm:
arm_chosen <- which.max(draw_arms) # Settle down which arm to pick based on the highest sampled posterior room
# Update arms history (abl), successes, and failures
if (runif(1) < priors[arm_chosen]) {
abl[i] <- 1
arm_s[arm_chosen] <- arm_s[arm_chosen] + 1 # Update success count for chosen arm
} else {
abl[i] <- 0
arm_f[arm_chosen] <- arm_f[arm_chosen] + 1 # Update failure count for chosen arm
}
# Tracking the build-up of total number of successes, failures, for all arms, adding onto successes and failures from burnin period
arm_successes[, i] <- arm_s
arm_failures[, i] <- arm_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
n_total <- arm_successes + arm_failures
AP <- n_total[num_arms, ]/colSums(n_total)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
p_est <- arm_successes / n_total
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
# we are calculating the Z test for whether or not the last arm is significantly better than the average of the other arms
# Get the proportions of successes for the last arm and the other arms
p_last_arm <- p_est[num_arms, n]
# Get the sample sizes for the last arm and the other arms
n_last_arm <- n_total[num_arms, n]
n_other_arms <- n_total[-num_arms, n]
# Calculate the weighted average success rate for the other arms
p_other_arms <- sum(n_other_arms * p_est[-num_arms, n]) / sum(n_other_arms)
# Calculate the pooled proportion of successes
p_pooled <- (n_last_arm * p_last_arm + sum(n_other_arms) * p_other_arms) / (n_last_arm + sum(n_other_arms))
# Calculate the Z-score
WaldScore <- (p_last_arm - p_other_arms) / sqrt(p_pooled * (1 - p_pooled) * (1/n_last_arm + 1/sum(n_other_arms)))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- sum(n_total[, n] * priors) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- sum(arm_successes[, n]) / n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore, expected_reward, actual_reward, AP[n], APT))
}Section 1.3 Running Epsilon greedy TS
#' Epsilon-TS-Thompson Sampling Method - Comparison of Allocation Probability Test vs. Traditional Wald Z test
#'
#' Runs epsilon greedy TS algorithm in two-armed case, for n batches to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of controlled group arm a (typically unknown in practice)
#' @param pb - actual prior probability of experimental arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated, but in UR case it is sort of irrelevant
#' @param epsilon - this is the epsilon (or proportion) we defined previous to simulations at a certain level for UR sampling within the simulations
#' @return vector containing WaldScore[n], reward[n], AP, and APT: the final Wald-Z-score, reward, allocation probability throughout the trial, and AP Test statistics for both arms,
# or per say the CARA-FLGI method
TS_epsilon <- function(pa, pb, n, burnin, batch, epsilon) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
if (runif(1) < epsilon) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
}
# Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
else {
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if (draw_b > draw_a){
APT = APT +1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
}
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
AP <- nb/(na+nb)
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
#WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 1.4 Running TS-PostDiff (Code adapted from Tong)
#' TS-PostDiff - Allocation Probability Test
#'
#' Runs TS-PostDiff algorithm in two-armed case, for n batches, with given batch size and BURNIN period, to calculate an AP Test statistic for each arm
#' @param pa - actual prior probability of arm a (typically unknown in practice)
#' @param pb - actual prior probability of arm b (typically unknown in practice)
#' @param n - total number of trials in one simulation
#' @param c - threshold of difference in expected reward between the two arms
#' @param burnin - number of trials that run only Uniform Random (UR) sampling, before applying the TS-PostDiff algorithm for all remaining trials, and updating posterior probability every batch (using beta distribution)
#' @param batch - number of trials before posterior probability is updated
#'
#' @return vector containing WaldScore[n], reward[n], APT_1, APT_2: the final waldscore, reward, and AP Test statistics for both arms
TSPDD <- function(pa, pb, n, burnin, batch, c) {
# List: tracking successes and failures under each arm
# ex. [0,1,1,1,2,3,3,4,5,6], final index value indicates total number of successes/failures
arm_a_successes <- c(0)
arm_a_failures <- c(0)
arm_b_successes <- c(0)
arm_b_failures <- c(0)
# List: tracking only the current number of successes and failures (Value changes for each iteration)
arm_a_s <- 0
arm_a_f <- 0
arm_b_s <- 0
arm_b_f <- 0
# History that tracks if there was success in either arm for each trial (Variable not used in final results)
# ex. [1,0,1,1,0,1] 1 indicates success for either arm, 0 indicates failure for both arms
abl <- c()
APT = 0 # AP Test statistic
# ---------- BURNIN PERIOD: use Uniform Random (UR) sampling only ----------
for (i in 1:burnin) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
# CASE 1: Arm a is chosen
if (draw_a > draw_b) {
# Arm a was successful
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
}
# Arm a was not successful
else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
}
# CASE 2: Arm b is chosen
else {
# Arm b was successful
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 # update arm b success count
}
# Arm b was not successful
else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 # update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# ---------- AFTER BURNIN PERIOD: Use TS or UR depending on difference between posterior probabilities ----------
for (i in (burnin + 1):n) {
# Posterior probability updates happen by batch size (whenever m becomes 0, larger batch size -> slower updates)
m = i %% batch
# Draw from Beta(alpha, beta) distribution, using only the current success & failure counts
# alpha = current successes, beta = current failures of each arm
# Use UR: If difference between posterior probabilities of arms is less than threshold value (expected difference in reward between arms)
if (abs(draw_a - draw_b) < c) {
draw_a <- runif(1) # Random probability of arm a being chosen
draw_b <- runif(1) # Random probability of arm b being chosen
}
# Use TS: If difference between posterior probabilities of arms crosses threshold value (expected difference in reward between arms)
else {
# CASE 1: Posterior probability of arms a & b is updated
if (m == 0) {
draw_a <-
rbeta(1, arm_a_successes[i - 1] + 1, arm_a_failures[i - 1] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - 1] + 1, arm_b_failures[i - 1] + 1)
if (draw_b > draw_a){
APT = APT + 1
}
}
# CASE 2: Posterior probability of arms a & b is not updated
else {
draw_a <-
rbeta(1, arm_a_successes[i - m] + 1, arm_a_failures[i - m] + 1)
draw_b <-
rbeta(1, arm_b_successes[i - m] + 1, arm_b_failures[i - m] + 1)
}
}
# Update arm a & b history (abl), successes, and failures
if (draw_a > draw_b) {
if (runif(1) < pa) {
abl[i] <- 1
arm_a_s <- arm_a_s + 1 # update arm a success count
} else {
abl[i] <- 0
arm_a_f <- arm_a_f + 1 # update arm a failure count
}
} else{
if (runif(1) < pb) {
abl[i] <- 1
arm_b_s <- arm_b_s + 1 #update arm b success count
} else {
abl[i] <- 0
arm_b_f <- arm_b_f + 1 #update arm b failure count
}
}
# Tracking the build-up of total number of successes, failures, for arm a & b, adding onto successes and failures from burnin period
arm_a_successes[i] <- arm_a_s
arm_a_failures[i] <- arm_a_f
arm_b_successes[i] <- arm_b_s
arm_b_failures[i] <- arm_b_f
}
# List: complete history of number of times arms a & b were chosen (final index, total number of times chosen for each)
na <- arm_a_successes + arm_a_failures
nb <- arm_b_successes + arm_b_failures
# List: complete history of estimated posterior probability updates, for arm a & b success (final index, final posterior probability for each)
pa_est <- arm_a_successes / na
pb_est <- arm_b_successes / nb
AP <- nb/(na+nb)
# Wald Score for confidence interval: for analyzing categorical data underlying chi-squared distribution vectors, instead of summation
#WaldScore <- (pb_est - pa_est) / sqrt(pa_est * (1 - pa_est) / na + pb_est * (1 - pb_est) / nb)
p_hat <- (arm_a_successes + arm_b_successes)/(na + nb)
WaldScore <- (pb_est - pa_est) / sqrt(p_hat*(1-p_hat)*(1/na + 1/nb))
# List: our expected reward, from both arms (number of times chosen * posterior success probability, for each arm)
expected_reward <- (na[n] * pa + nb[n] * pb) / n
# Lists out the average expected reward for each individual trial (final index, final expected reward value)
actual_reward <- (arm_a_successes[n] + arm_b_successes[n])/n
#return(list(WaldScore = WaldScore[n], reward = reward[n], AP = AP[n], APT = APT))
return(c(WaldScore[n], expected_reward, actual_reward, AP[n], APT))
}Section 2: Finding Critical Value
Section 2.1 Finding Critical Values, Using Normal Assumption on Law of Large Numbers:
#' Critical Value (Normal)
#'
#' Finds critical value using normal assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#'
#' @return generated critical value from normality assumption
normal_cv <- function(APT, alpha) {
m <- mean(APT)
v <- var(APT)
sd <- sqrt(v)
normal_rej <- qnorm(1 - alpha) #Rejection area defined (following normal distribution assumption)
cv <- normal_rej * sd + m
return(cv)
}Section 2.2 Finding Critical Values, Using Empirical Approach
#' Critical Value (Empirical)
#'
#' Finds critical value using empirical assumption on LLN
#' @param APT - AP Test statistic (List of either APT_ban_UR/APT_w_UR results across different simulations)
#' @param alpha - Customized significance level
#'
#' @return generated critical value from empirical simulations
empirical_cv <- function(APT, alpha) {
cdf = 0
i = 0
# Repeatedly add the empirical probabilities of each [i] value until cdf reaches 1 - significance level
while (cdf <= 1- alpha) {
p_i <-sum(APT == i)/length(APT)
cdf <- sum(cdf, p_i)
i = i + 1
}
return(i - 1)
}Section 2.3 Replicating and rerun the simulations for B replications
# One output for each simulation, the four dimensions are waldscore, average reward, APT_ban_UR, and APT_w_UR
simulation_n_times <-
function(alpha,
policy,
effect_size,
n,
B,
burnin,
batch,
multi_arm = FALSE,
...) {
if (multi_arm == FALSE) {
pa <- 0.5 - effect_size / 2 # define the corresponding priors of the success rate centred and scaled at p1+p2 = 1
pb <- 0.5 + effect_size / 2
results <- array(dim = c(B, 5)) # output a four dimensional arrage
# number of simulations
for (i in 1:B) {
results[i, ] <- policy(
pa = pa,
pb = pb,
n = n,
burnin = burnin,
batch = batch,
...
)
}
wald <- results[, 1]
average_power = mean(results[, 1] > qnorm(1 - alpha)) # Find average power based on waldscore
expected_reward = mean(results[, 2]) # expected reward setting
average_reward = mean(results[, 3]) # calculate the actual real reward throughout many trials
average_AP = mean(results[, 4]) # find average allocation probability for the experimental arm
APT = results[, 5]
# Empirical and normal CV of APT
em_cv <- empirical_cv(APT, alpha)
norm_cv <- normal_cv(APT, alpha)
F1 <- ecdf(APT)
# Empirical and normal FPR of APT is
em_FPR <- 1 - F1(em_cv)
norm_FPR <- 1 - F1(floor(norm_cv))
return(
list(
average_power = average_power,
expected_reward = expected_reward,
average_reward = average_reward,
average_AP = average_AP,
em_cv = em_cv,
norm_cv = norm_cv,
em_FPR = em_FPR,
norm_FPR = norm_FPR
)
)
}
if (class(multi_arm) == "numeric"){
priors <- seq(from = 0.5 - effect_size/2,
to = 0.5 + effect_size/2,
length.out = multi_arm)
results <- array(dim = c(B, 5))
for (i in 1:B) {
# at this point of time there is no other policies like TS_contextual or TS_epsilon
results[i, ] <- policy(
priors = priors,
n = n,
burnin = burnin,
batch = batch
)
}
wald <- results[, 1]
average_power = mean(results[, 1] > qnorm(1 - alpha)) # Find average power based on waldscore
expected_reward = mean(results[, 2]) # expected reward setting
average_reward = mean(results[, 3]) # calculate the actual real reward throughout many trials
average_AP = mean(results[, 4]) # find average allocation probability for the experimental arm
APT = results[, 5]
# Empirical and normal CV of APT
em_cv <- empirical_cv(APT, alpha)
norm_cv <- normal_cv(APT, alpha)
F1 <- ecdf(APT)
# Empirical and normal FPR of APT is
em_FPR <- 1 - F1(em_cv)
norm_FPR <- 1 - F1(floor(norm_cv))
return(
list(
average_power = average_power,
expected_reward = expected_reward,
average_reward = average_reward,
average_AP = average_AP,
em_cv = em_cv,
norm_cv = norm_cv,
em_FPR = em_FPR,
norm_FPR = norm_FPR
)
)
}
else{
print('Error: Please double check your multi_arm input is numeric or FALSE')
}
}Section 2.4: Construction of the reporting table:
Section 2.4.1: Comparing at a specific effect size and sample size:
# it takes in the same parameter as in before and output a comparison dataframe across different policies.
compare_policies <-
function(alpha,
effect_size,
n,
B,
burnin,
batch,
multi_arm = 3,
epsilon = 0.1,
c = 0.1) {
UR_result <-
simulation_n_times(alpha, UR, effect_size, n, B, burnin, batch, multi_arm = FALSE)
UR_multi_arm_result <-
simulation_n_times(alpha, UR_multiarm, effect_size, n, B, burnin, batch, multi_arm)
TS_result <-
simulation_n_times(alpha, TS, effect_size, n, B, burnin, batch, multi_arm = FALSE)
TS_multi_arm_result <-
simulation_n_times(alpha, TS_multiarm, effect_size, n, B, burnin, batch, multi_arm)
TS_epsilon_result <-
simulation_n_times(alpha, TS_epsilon, effect_size, n, B, burnin, batch, multi_arm = FALSE, epsilon)
TSPDD_result <-
simulation_n_times(alpha, TSPDD, effect_size, n, B, burnin, batch, multi_arm = FALSE, c)
# Create a summary table
summary_table <- data.frame(
Metric = c(
"Average Power",
"Expected Reward",
"Average Reward",
"Average AP",
"Empirical CV",
"Normal CV",
"Empirical FPR",
"Normal FPR"
),
UR = c(
UR_result$average_power,
UR_result$expected_reward,
UR_result$average_reward,
UR_result$average_AP,
UR_result$em_cv,
UR_result$norm_cv,
UR_result$em_FPR,
UR_result$norm_FPR
),
UR_multiarm = c(
UR_multi_arm_result$average_power,
UR_multi_arm_result$expected_reward,
UR_multi_arm_result$average_reward,
UR_multi_arm_result$average_AP,
UR_multi_arm_result$em_cv,
UR_multi_arm_result$norm_cv,
UR_multi_arm_result$em_FPR,
UR_multi_arm_result$norm_FPR
),
TS = c(
TS_result$average_power,
TS_result$expected_reward,
TS_result$average_reward,
TS_result$average_AP,
TS_result$em_cv,
TS_result$norm_cv,
TS_result$em_FPR,
TS_result$norm_FPR
),
TS_multiarm = c(
TS_multi_arm_result$average_power,
TS_multi_arm_result$expected_reward,
TS_multi_arm_result$average_reward,
TS_multi_arm_result$average_AP,
TS_multi_arm_result$em_cv,
TS_multi_arm_result$norm_cv,
TS_multi_arm_result$em_FPR,
TS_multi_arm_result$norm_FPR
),
TS_epsilon = c(
TS_epsilon_result$average_power,
TS_epsilon_result$expected_reward,
TS_epsilon_result$average_reward,
TS_epsilon_result$average_AP,
TS_epsilon_result$em_cv,
TS_epsilon_result$norm_cv,
TS_epsilon_result$em_FPR,
TS_epsilon_result$norm_FPR
),
TSPDD = c(
TSPDD_result$average_power,
TSPDD_result$expected_reward,
TSPDD_result$average_reward,
TSPDD_result$average_AP,
TSPDD_result$em_cv,
TSPDD_result$norm_cv,
TSPDD_result$em_FPR,
TSPDD_result$norm_FPR
)
)
return(summary_table)
}Section 2.4.2: Comparing at a range of effect size and sample size:
# notice that here the input of the effect_sizes is a list, all the others remain the same, this is a helper function that I created for easy comparison
compare_effect_sizes <-
function(alpha,
effect_sizes,
n,
B,
burnin,
batch,
multi_arm = 3, # setting default for 3-arm cases comparison
epsilon = 0.1, # setting default
c = 0.1) {
UR_avg_reward <- c()
UR_multiarm_avg_reward <-c()
TS_avg_reward <- c()
TS_multiarm_avg_reward <-c()
TS_epsilon_avg_reward <- c()
TSPDD_avg_reward <- c()
UR_power <- c()
UR_multiarm_power <- c()
TS_power <- c()
TS_multiarm_power <- c()
TS_epsilon_power <- c()
TSPDD_power <- c()
# Assuming allocation probability is represented by Average AP
UR_alloc_prob <- c()
UR_multiarm_alloc_prob <- c()
TS_alloc_prob <- c()
TS_multiarm_alloc_prob <- c()
TS_epsilon_alloc_prob <- c()
TSPDD_alloc_prob <- c()
for (effect_size in effect_sizes){
result <- compare_policies(alpha = alpha,
effect_size = effect_size,
n = n,
B = B,
burnin = burnin,
batch = batch,
multi_arm = multi_arm,
epsilon = epsilon,
c = c)
# avg reward construction
UR_avg_reward <- c(UR_avg_reward, sprintf("%.2f", result$UR[3]))
UR_multiarm_avg_reward <- c(UR_multiarm_avg_reward, sprintf("%.2f", result$UR_multiarm[3]) )
TS_avg_reward <- c(TS_avg_reward, sprintf("%.2f", result$TS[3]))
TS_multiarm_avg_reward <- c(TS_multiarm_avg_reward, sprintf("%.2f", result$TS_multiarm[3]))
TS_epsilon_avg_reward <- c(TS_epsilon_avg_reward, sprintf("%.2f", result$TS_epsilon[3]))
TSPDD_avg_reward <- c(TSPDD_avg_reward, sprintf("%.2f", result$TSPDD[3]))
# avg power construction
UR_power <- c(UR_power, sprintf("%.2f", result$UR[1]))
UR_multiarm_power <- c(UR_multiarm_power, sprintf("%.2f", result$UR_multiarm[1]))
TS_power <- c(TS_power, sprintf("%.2f", result$TS[1]))
TS_multiarm_power <- c(TS_multiarm_power, sprintf("%.2f", result$TS_multiarm[1]))
TS_epsilon_power <- c(TS_epsilon_power, sprintf("%.2f", result$TS_epsilon[1]))
TSPDD_power <- c(TSPDD_power, sprintf("%.2f", result$TSPDD[1]))
# avg allocation probability construction
UR_alloc_prob <- c(UR_alloc_prob, sprintf("%.2f", result$UR[4]))
UR_multiarm_alloc_prob <- c(UR_multiarm_alloc_prob, sprintf("%.2f", result$UR_multiarm[4]))
TS_alloc_prob <- c(TS_alloc_prob, sprintf("%.2f", result$TS[4]))
TS_multiarm_alloc_prob <- c(TS_multiarm_alloc_prob, sprintf("%.2f", result$TS_multiarm[4]))
TS_epsilon_alloc_prob <- c(TS_epsilon_alloc_prob, sprintf("%.2f", result$TS_epsilon[4]))
TSPDD_alloc_prob <- c(TSPDD_alloc_prob, sprintf("%.2f", result$TSPDD[4]))
}
final_summary_table <- data.frame(
Metric = c("Average Reward",
"Power",
"Allocation Probability"),
UR = c(
paste(UR_avg_reward, collapse = " | "),
paste(UR_power, collapse = " | "),
paste(UR_alloc_prob, collapse = " | ")
),
UR_multiarm = c(
paste(UR_multiarm_avg_reward, collapse = ' | '),
paste(UR_multiarm_power, collapse = ' | '),
paste(UR_multiarm_alloc_prob, collapse = ' | ')
),
TS = c(
paste(TS_avg_reward, collapse = " | "),
paste(TS_power, collapse = " | "),
paste(TS_alloc_prob, collapse = " | ")
),
TS_multiarm = c(
paste(TS_multiarm_avg_reward, collapse = ' | '),
paste(TS_multiarm_power, collapse = ' | '),
paste(TS_multiarm_alloc_prob, collapse = ' | ')
),
TS_epsilon = c(
paste(TS_epsilon_avg_reward, collapse = " | "),
paste(TS_epsilon_power, collapse = " | "),
paste(TS_epsilon_alloc_prob, collapse = " | ")
),
TSPDD = c(
paste(TSPDD_avg_reward, collapse = " | "),
paste(TSPDD_power, collapse = " | "),
paste(TSPDD_alloc_prob, collapse = " | ")
))
return(final_summary_table)
}Section 3: Simulations:
Section 3.1: Global Settings
In this section we will provide some unified parameters across different simulations:
# let's make this a function
set.seed(0) # for results to be replicable
#calculate power and reward in case p0 < p1
alpha <- 0.05 # rejection region
effect_size <- 0 # by default effect size is 0: no difference between arms | effect size is 0.05-0.1: Intervention is of relatively small or marginal effect | effect size is 0.1-0.2: Medium or smal effect | effect size is 0.2-0.3+: relatively large effectt between interventions
n <- 100 # sample_size(total number of trials in one simulation)
burnin <- 0.1 * n # make by default the burin size is 10% of the total sample size that we have
B <- 1000 # number of simulations
batch <- 2 # batch_sizeSection 3.2: Sample size n=50
n <- 50 # notice that this will change the gloabl setting of parametersSection 3.2.1: Effect size = 0.0
compare_policies(alpha,effect_size, n, B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon TSPDD
1 Average Power 0.06100 0.05400 0.07500 0.06200 0.07400 0.08200
2 Expected Reward 0.50000 0.50000 0.50000 0.50000 0.50000 0.50000
3 Average Reward 0.49892 0.49846 0.49814 0.49810 0.50012 0.50224
4 Average AP 0.50342 0.33426 0.50174 0.33728 0.50704 0.50122
5 Empirical CV 14.00000 17.00000 19.00000 20.00000 17.00000 17.00000
6 Normal CV 13.81105 16.69130 19.44977 23.11388 17.84984 15.74270
7 Empirical FPR 0.02200 0.01900 0.02800 0.00000 0.04800 0.03800
8 Normal FPR 0.06700 0.05100 0.02800 0.00000 0.04800 0.08000
Section 3.2.2 Effect size = 0.05
compare_policies(alpha,effect_size = 0.05,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon
1 Average Power 0.102000 0.0830000 0.125000 0.088000 0.115000
2 Expected Reward 0.499988 0.4999255 0.502283 0.501595 0.502944
3 Average Reward 0.504080 0.5006000 0.502640 0.496720 0.501800
4 Average AP 0.499760 0.3312800 0.545660 0.359520 0.558880
5 Empirical CV 14.000000 17.0000000 20.000000 20.000000 18.000000
6 Normal CV 13.592118 16.8090778 20.771675 23.305910 18.689324
7 Empirical FPR 0.018000 0.0200000 0.000000 0.000000 0.028000
8 Normal FPR 0.054000 0.0650000 0.000000 0.000000 0.028000
TSPDD
1 0.121000
2 0.502801
3 0.503260
4 0.556020
5 18.000000
6 17.159720
7 0.023000
8 0.055000
Section 3.2.3: Effect size = 0.1
compare_policies(alpha,effect_size = 0.1,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon
1 Average Power 0.188000 0.140000 0.203000 0.138000 0.191000
2 Expected Reward 0.499938 0.499857 0.512484 0.506353 0.509848
3 Average Reward 0.500700 0.500040 0.510240 0.507060 0.507840
4 Average AP 0.499380 0.330900 0.624840 0.402660 0.598480
5 Empirical CV 14.000000 17.000000 20.000000 20.000000 18.000000
6 Normal CV 13.778909 16.730109 21.756543 23.722987 19.582023
7 Empirical FPR 0.021000 0.014000 0.000000 0.000000 0.041000
8 Normal FPR 0.064000 0.055000 0.000000 0.000000 0.011000
TSPDD
1 0.205000
2 0.509524
3 0.509780
4 0.595240
5 18.000000
6 18.339351
7 0.037000
8 0.037000
Section 3.2.4: Effect size = 0.2
compare_policies(alpha,effect_size = 0.2,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon
1 Average Power 0.408000 0.277000 0.387000 0.27200 0.38700
2 Expected Reward 0.500244 0.500114 0.542412 0.52615 0.53836
3 Average Reward 0.501360 0.499720 0.539640 0.52584 0.54148
4 Average AP 0.501220 0.335100 0.712060 0.47604 0.69180
5 Empirical CV 14.000000 17.000000 20.000000 20.00000 19.00000
6 Normal CV 13.571623 16.875222 22.481174 23.83220 20.56034
7 Empirical FPR 0.016000 0.017000 0.000000 0.00000 0.01600
8 Normal FPR 0.050000 0.059000 0.000000 0.00000 0.00000
TSPDD
1 0.391000
2 0.535696
3 0.537400
4 0.678480
5 19.000000
6 20.283181
7 0.042000
8 0.000000
Section 3.2.5: Effect size = 0.3
compare_policies(alpha,effect_size = 0.3,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon
1 Average Power 0.73800 0.449000 0.618000 0.457000 0.606000
2 Expected Reward 0.49973 0.500216 0.583898 0.557015 0.575078
3 Average Reward 0.50018 0.503540 0.586200 0.556320 0.573440
4 Average AP 0.49910 0.333200 0.779660 0.550680 0.750260
5 Empirical CV 14.00000 17.000000 20.000000 20.000000 19.000000
6 Normal CV 13.56317 16.854308 22.739797 23.675730 20.896212
7 Empirical FPR 0.02000 0.018000 0.000000 0.000000 0.028000
8 Normal FPR 0.05700 0.067000 0.000000 0.000000 0.000000
TSPDD
1 0.650000
2 0.574316
3 0.574300
4 0.747720
5 20.000000
6 21.368103
7 0.000000
8 0.000000
Section 3.2.6: Effect size = 0.4
compare_policies(alpha,effect_size = 0.4,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon
1 Average Power 0.913000 0.664000 0.803000 0.62600 0.851000
2 Expected Reward 0.499816 0.499188 0.630984 0.59500 0.621184
3 Average Reward 0.501280 0.501200 0.627320 0.59436 0.620800
4 Average AP 0.499540 0.331560 0.827460 0.61090 0.802960
5 Empirical CV 14.000000 17.000000 20.000000 20.00000 20.000000
6 Normal CV 13.709091 16.700920 22.136119 23.12041 20.609016
7 Empirical FPR 0.025000 0.012000 0.000000 0.00000 0.000000
8 Normal FPR 0.057000 0.055000 0.000000 0.00000 0.000000
TSPDD
1 0.848000
2 0.622296
3 0.619220
4 0.805740
5 20.000000
6 21.992025
7 0.000000
8 0.000000
Section 3.2.7: Effect size = 0.5
compare_policies(alpha,effect_size = 0.5,n,B,burnin,batch,epsilon=0.1,c=0.1) Metric UR UR_multiarm TS TS_multiarm TS_epsilon TSPDD
1 Average Power 0.98600 0.80500 0.92500 0.76800 0.93000 0.96200
2 Expected Reward 0.49964 0.50029 0.67961 0.64109 0.66423 0.66974
3 Average Reward 0.50092 0.50270 0.68092 0.64322 0.66510 0.66894
4 Average AP 0.49928 0.33576 0.85922 0.67514 0.82846 0.83948
5 Empirical CV 13.00000 17.00000 20.00000 20.00000 20.00000 20.00000
6 Normal CV 13.68636 16.86853 21.79609 22.64537 20.27195 21.75946
7 Empirical FPR 0.05000 0.02100 0.00000 0.00000 0.00000 0.00000
8 Normal FPR 0.05000 0.06600 0.00000 0.00000 0.00000 0.00000
Section 3.2.8: Summary Table for sample size = 50, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:
n<-50
effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
n_50 <- compare_effect_sizes(alpha,effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)Section 3.3: Sample size n=100, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:
n <- 100
effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
n_100 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)Section 3.4: Sample size n=200, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:
n <- 200
effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
n_200 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)Section 3.5: Sample size n=300, effect_size = 0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5:
n <- 300
effect_sizes <- c(0.0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
n_300 <- compare_effect_sizes(alpha,effect_sizes = effect_sizes,n,B,burnin,batch,epsilon=0.1,c=0.1)Section 3.6: joining the table in a long format
# Step 1: renaming the row names with corresponding sample size
rownames(n_50) <- paste(rownames(n_50), "(n=50)")
rownames(n_100) <- paste(rownames(n_100), "(n=100)")
rownames(n_200) <- paste(rownames(n_200), "(n=200)")
rownames(n_300) <- paste(rownames(n_300), "(n=300)")library(stringr) # need to install this pacakage if needed
# Step 2: Concatenate these tables vertically
merged_table <- rbind(n_50, n_100, n_200, n_300)
# Separate metrics and sample sizes from the row names
metrics <- gsub("\\(.*", "", rownames(merged_table))
sample_sizes <- gsub(".*\\(", "", gsub("\\).*", "", rownames(merged_table)))
# Create a data.frame to hold the metrics, sample sizes, and row indices
ordering_df <- data.frame(
Metric = metrics,
Sample_Size = sample_sizes,
Row_Index = 1:nrow(merged_table))
ordering_df$Sample_Size <- as.numeric(str_extract(sample_sizes, "\\d+"))
# Order the data.frame first by Metric, then by Sample_Size
ordering_df <- ordering_df[order(ordering_df$Metric, ordering_df$Sample_Size), ]
# Rearrange the rows of merged_table based on the ordered row indices
merged_table <- merged_table[ordering_df$Row_Index, ]Section 3.7: Plotting the trend plot with respect to effect size for each metric
Section 3.7.0 Defining the reward graphing tool:
library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)
#' graphing_tool - graphical analysis on different metrics like reward, power, or allocation probabilities
#'
#' Takes the below input
#' @param merged_table - merged tables on above sample sizes
#' @metric the metric we want to see grpahical interpretation
#' @policies the polcies we want to investigate on
#' @return the graph of the trend for selected policis
graphing_tool <- function(merged_table,
metric,
policies = c()) {
# Step 1: Filter the merged_table
filtered_data <- merged_table |> filter(Metric == metric)
# Step 2.1 Creating a helper function for splitting the data:
split_and_convert <- function(value) {
as.numeric(str_split(value, " \\| ")[[1]])
}
# Step 2.2: Split the values in the Value column into a numeric vector
metric_values_UR <- c(sapply(filtered_data$UR, split_and_convert))
metric_values_UR_multiarm <- c(sapply(filtered_data$UR_multiarm, split_and_convert))
metric_values_TS <- c(sapply(filtered_data$TS, split_and_convert))
metric_values_TS_multiarm <- c(sapply(filtered_data$TS_multiarm, split_and_convert))
metric_values_TS_epsilon <- c(sapply(filtered_data$TS_epsilon, split_and_convert))
metric_values_TSPDD <- c(sapply(filtered_data$TSPDD, split_and_convert))
# Step 3: Create a data frame for plotting
plot_data <- data.frame(
expand_grid(
Policy = c("UR", "UR_multiarm", "TS", "TS_multiarm", "TS_epsilon", "TSPDD"),
Sample_size = c(50, 100, 200, 300),
Effect_Size = effect_sizes
) %>% bind_cols(
Metric = c(
metric_values_UR,
metric_values_UR_multiarm,
metric_values_TS,
metric_values_TS_multiarm,
metric_values_TS_epsilon,
metric_values_TSPDD
)
)
)
# Step 4: Use ggplot2 to plot the trend
p <- ggplot(plot_data %>% filter(Policy %in% policies),
aes(x = Effect_Size, y = Metric, color = Policy)) +
geom_point(size = 0.5, alpha = 0.7) + geom_line(alpha = 0.7) +
facet_wrap( ~ Sample_size) +
labs(title = paste("Trend of ", metric ," across Effect Sizes and Sample Sizes"),
x = "Effect Size",
y = metric) +
theme_minimal()
return(p)
}Section 3.7.1 For Reward:
graphing_tool(merged_table,
'Average Reward',
c('UR_multiarm', "TS_multiarm"))Section 3.7.2 For Power:
graphing_tool(merged_table,
'Power',
c('UR_multiarm',
"TS_multiarm"))