Individuals indexed by \(i = 1, \dots , N\) are tasked with comparing two profiles \(j \in \{1, 2\}\).
Each profile \(j\) is associated with a unique treatment \(w_j \in \mathcal{W}\), where \(|\mathcal{W}| = K\). Outcomes \(Y_{ij}\in \{0,1\}\) are binary, where \(Y_{i1} = 1\) and \(Y_{i2} = 0\) if individual \(i\) prefers profile \(1\) over profile \(2\).
Using potential outcomes notation, let \(Y_i(\bar w)\) denote the two dimensional vector of outcomes for individual \(i\) when assigned to compare profiles associated with the treatment vector \(\bar w = \{w_1, w_2\}\). Treatment vectors are constrained so that \(w_1 \neq w_2\), so each individual has \(K(K-1)/2\) potential outcome vectors for comparing profiles of two treatments. (Assumptions: stability, no profile ordering effects.)
The estimand of interest is \[ \sum_{w'\neq w}\textrm{E}\left[ Y_{i}(\{w, w'\}) \right]\times \textrm{Pr}^{\textrm{U}}\left[\{w, w'\} \right]. \] Expectation is taken over the population, \(\textrm{Pr}\left[\{w, w'\} \right]\) is the probability of \(w\) and \(w'\) being compared, \(\textrm{Pr}^{\textrm{U}}\) is when pairs are assigned uniformly. I.e., the estimand is the expected rate at which profile \(w\) is selected when compared to all other profiles \(w' \neq w\), when profiles are assigned uniformly at random.
set.seed(60637)
# Experiment design for Stage 1
K_stage1 <- 70 # number of arms (treatments)
sd_stage1 <- 0.75 # standard deviation of underlying thetas
# the sd will affect the dispersion of win probabilities
true_theta <- rnorm(K_stage1, mean = 0, sd = sd_stage1)
# Check dispersion: what is the starting distribution of win probabilities?
# More dispersed means easier to find winner; less dispersed means harder
# Pairwise win probability: P(i beats j)
P_win <- function(i, j) plogis(true_theta[i] - true_theta[j])
win_mat <- matrix(0, nrow = K_stage1, ncol = K_stage1)
invisible(sapply(1:K_stage1, function(i) {
sapply(1:K_stage1, function(j) {
if (i != j) {
win_mat[i, j] <<- P_win(i, j)
}
})
}))
# Truth (Borda)
true_strength <- rowSums(win_mat) / (K_stage1 - 1) # ignore diagonal zeros
rank_borda <- order(true_strength, decreasing = TRUE)
# Truth (Maximin)
win_rate_mat <- win_mat / (win_mat + t(win_mat))
diag(win_rate_mat) <- NA
hist(win_mat[upper.tri(win_mat)], breaks = 20,
main = glue("Distribution of win probabilities,\nStage 1,\nsd = {sd_stage1}"),
xlab = "Win probability", ylab = "Frequency")run_single_sim <- function(sim_seed, algorithm, batch_sizes, K, A, alpha, delta, floor_mult, n_mc) {
set.seed(sim_seed)
# Storage for win/loss counts
wins <- losses <- wins_running <- losses_running <-
wins_filter <- losses_filter <-
matrix(1, nrow = K, ncol = K) # prior alpha
# Storage for results
probs_wide <- matrix(NA, nrow = A, ncol = K*(K-1)/2)
yobs_wide <- matrix(NA, nrow = A, ncol = 2)
ws_wide <- matrix(NA, nrow = A, ncol = 2)
# stores proportion correct during experiment for top 1, 3, 5, 10, 20 arms
pc_1 <- rep(NA, A)
pc_3 <- rep(NA, A)
pc_5 <- rep(NA, A)
pc_10 <- rep(NA, A)
pc_20 <- rep(NA, A)
pc_top5_in_20 <- rep(NA, A) # Percent of true top 5 that are in estimated top 20
pc_top5_in_25 <- rep(NA, A) # Percent of true top 5 that are in estimated top 25
rank_order <- matrix(NA, nrow = A, ncol = K)
sampled_matrix <- matrix(0, K, K)
colnames(sampled_matrix) <- rownames(sampled_matrix) <- paste0("Arm", 1:K)
update_times <- cumsum(batch_sizes)
conf_lower <- matrix(0, nrow = K, ncol = K)
conf_upper <- matrix(1, nrow = K, ncol= K)
for (t in 1:A) {
# 0. For first batch assign arms uniformly at random
if (t <= update_times[1]) {
arm1 <- sample(1:K, 1)
arm2 <- sample(setdiff(1:K, arm1), 1)
} else {
# probs and idx is set below at the end of the batch
choice <- sample.int(length(probs), 1L, prob = probs)
out <- c(idx[choice, "row"], idx[choice, "col"])
arm1 <- out[1]
arm2 <- out[2]
}
# Compare results
winner <- ifelse(runif(1) < P_win(arm1, arm2), arm1, arm2)
loser <- arm1 + arm2 - winner
wins_running[winner, loser] <- wins_running[winner, loser] + 1
losses_running[loser, winner] <- losses_running[loser, winner] + 1
# update probability correct here
# sum up the rows for wins_running and losses_running, make vector wins_running/ (wins_running + losses_running), sort,
row_wins_running <- rowSums(wins_running) - diag(wins_running)
row_losses_running <- rowSums(losses_running) - diag(losses_running)
phat_running <- wins_running / (wins_running + losses_running) # Beta–posterior means
diag(phat_running) <- 0 # ignore self-duels
borda_hat_running <- rowSums(phat_running) / (K - 1)
ranked_running <- order(borda_hat_running, decreasing = TRUE)
# compare ranking to rank_borda (truth)
pc_1[t] <- length(intersect(ranked_running[1], rank_borda[1]))
pc_3[t] <- length(intersect(ranked_running[1:3], rank_borda[1:3])) / 3
pc_5[t] <- length(intersect(ranked_running[1:5], rank_borda[1:5])) / 5
pc_10[t] <- length(intersect(ranked_running[1:10], rank_borda[1:10])) / 10
pc_20[t] <- length(intersect(ranked_running[1:20], rank_borda[1:20])) / 20
pc_top5_in_20[t] <- length(intersect(rank_borda[1:5], ranked_running[1:20])) / 5
pc_top5_in_25[t] <- length(intersect(rank_borda[1:5], ranked_running[1:25])) / 5
rank_order[t,] <- ranked_running
ws_wide[t,] <- sort(c(arm1, arm2))
yobs_wide[t,] <- as.integer(ws_wide[t, ] == winner)
# IF2 Filter algorithm from 2009 Yue paper
# begins here, after first batch assigned treatment uniformly at random
if(algorithm == "if2"& t >= update_times[1]){
if(!exists("W")){
# 2: δ ← 1/(TK^2) (initialized above)
# 3: Choose ˆb ∈ B randomly (this was done in the first batch)
# 4: W ← {b1,...,bK} \ {ˆb}
w1 <- sample(1:K, 1)
W <- setdiff(1:K, w1)
}
# 5: ∀b ∈ W, maintain estimate ˆP_{ˆb,b} of P(ˆb, b)
# (initialized above)
# 6: ∀b ∈ W, maintain 1 − δ confidence interval Cˆ_{ˆb,b} of ˆP_{ˆb,b}
# (initialized above)
# 7: while W is not empty do
# 8: for b ∈ W do
# 9: compare ˆb and b
wins_filter[winner, loser] <- wins_filter[winner, loser] + 1
losses_filter[loser, winner] <- losses_filter[loser, winner] + 1
# 10: update Cˆ_{ˆb,b}, ˆP_{ˆb,b}
CI_filter <- binom.confint(wins_filter[arm1, arm2],
n = wins_filter[arm1, arm2]
+ losses_filter[arm1, arm2],
conf.level = 1-delta, methods = "wilson")
conf_lower[arm1, arm2] = CI_filter$lower
conf_upper[arm1, arm2] = CI_filter$upper
# 11: end for
w2 <- sample(W, 1)
probs_mat <- matrix(0, K, K)
# set treatment deterministically
probs_mat[w1, w2] <- probs_mat[w2, w1] <- 1
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
# then add probability floor
prob_floor <- 1/(K*(K-1)/2) * floor_mult
probs <- pmax(probs, prob_floor)
# renormalize
row_sums <- sum(probs)
probs <- probs / row_sums
}
# Update posteriors if batch completed
if(t %in% update_times) {
wins <- wins_running
losses <- losses_running
# save historical probabilities
if(t == update_times[1]){
probs_wide[1:t,] <- 1/(K*(K-1)/2)
W <- setdiff(1:K, arm1)
} else {
t_0 <- update_times[max(which(update_times<t))]+1
probs_wide[t_0:t,] <- matrix(rep(probs, length(t_0:t)),
nrow = length(t_0:t),
byrow = TRUE)
}
if(algorithm != "uniform") { # Skip algorithm-specific logic for uniform
if(algorithm == "if2"){
# 12: while ∃b ∈ W s.t. (ˆP_{ˆb,b} > 1/2 ∧ 1/2 ∈/ Cˆ_{ˆb,b}) do:
# Pruning
# if bandit is empirically inferior to b-hat/arm1 with confidence, it is removed from W
phat_filter <- wins_filter / (wins_filter + losses_filter)
weak_arms <- intersect(which(phat_filter[w1,] > 0.5), W) # more than 50% chance of arm1 beating it, AND it is in W
for (w in weak_arms){
if ((0.5 < conf_lower[w1, w]) | (0.5 > conf_upper[w1, w])){
# 13: W ← W \ {b}
W <- setdiff(W, w)
}
}
# 14: end while
# 15: if ∃b' ∈ W s.t. (ˆP_{ˆb,b'} > 1/2 ∧ 1/2 ∈/ Cˆ_{ˆb,b'}) then
strong_arms <- intersect(which(phat_filter[w1,] < 0.5), W)
# 16: ˆb ← b', W ← W \ {b'} //new round
if (length(strong_arms) != 0){
w1 <- sample(strong_arms, 1)
W <- setdiff(W, w1)
}
# if W becomes empty, refill it
if (length(W) < 2){
w1 <- sample(1:K, 1)
W <- setdiff(1:K, w1)
}
# 17: ∀b ∈ W, reset ˆP_{ˆb,b} and Cˆ_{ˆb,b}
wins_filter <- losses_filter <- matrix(1, nrow = K, ncol = K)
conf_lower <- matrix(0, nrow = K, ncol = K)
conf_upper <- matrix(1, nrow = K, ncol= K)
# 18: end if
# 19: end while
w2 <- sample(W, 1)
probs_mat <- matrix(0, K, K)
# set treatment deterministically
probs_mat[w1, w2] <- probs_mat[w2, w1] <- 1
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "dts"){
# recalculate probabilities
probs_mat <- sample_probs_dts(wins, losses, t, K = K, alpha = alpha, n_mc = 1e3)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "tt"){
# recalculate probabilities
probs_mat <- sample_probs_TT(wins, losses, K = K, n_mc = 1e3)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "topk5"){
# Top-K Thompson sampling (samples from top 5 arms)
probs_mat <- sample_probs_TopK(wins, losses, K = K, n_mc = 1e3, top_k = 5)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "topk10"){
# Top-K Thompson sampling (samples from top 10 arms)
probs_mat <- sample_probs_TopK(wins, losses, K = K, n_mc = 1e3, top_k = 10)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "weighted_topk5"){
# Weighted Top-K Thompson sampling (top 5)
probs_mat <- sample_probs_WeightedTopK(wins, losses, K = K, n_mc = 1e3, top_k = 5, decay = 0.7)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
} else if(algorithm == "weighted_topk10"){
# Weighted Top-K Thompson sampling (top 10)
probs_mat <- sample_probs_WeightedTopK(wins, losses, K = K, n_mc = 1e3, top_k = 10, decay = 0.7)
idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
probs <- probs_mat[upper.tri(probs_mat)]
}
# add probability floor
prob_floor <- 1/(K*(K-1)/2) * floor_mult
probs <- pmax(probs, prob_floor)
# renormalize
row_sums <- sum(probs)
probs <- probs / row_sums
}
# print(paste0("batch done, t = ", t))
}
}
df_long <- tibble(
Trial = rep(seq_len(A), each = 2), # 1,1,2,2,3,3,…
Arm = as.vector(t(ws_wide)) # flatten the A×2 matrix
) |>
filter(!is.na(Arm)) |> # drop the NAs from pre-allocation
mutate(Played = 1L) # indicator that this arm was played
# 2. Fill in the (Arm, Trial) combos where the arm was NOT chosen
df_cumulative <- df_long |>
complete(Arm = 1:K, Trial = 1:A, # all combos you ever want to see
fill = list(Played = 0L)) |>
arrange(Arm, Trial) |>
group_by(Arm) |>
mutate(Cumulative_Count = cumsum(Played)) |>
ungroup()
list(
pc = tibble(
Row = seq_len(A),
pc1 = pc_1,
pc3 = pc_3,
pc5 = pc_5,
pc10 = pc_10,
pc20 = pc_20,
pc_top5_in_20 = pc_top5_in_20,
pc_top5_in_25 = pc_top5_in_25
),
cum = df_cumulative,
rank_order = rank_order#,
# ws = ws_wide,
# probs = probs_wide,
# yobs = yobs_wide
)
}Our goal is to:
Treatment is assigned uniformly at random, both measures are ranked
set.seed(60637)
# Multiple true correlation coefficients in underlying Gaussian (including negative)
r_true_values <- c(0.95, 0.9, 0.8, 0.6, 0.3, 0.1, -0.6, -1)
# Calculate corresponding binomial correlations for each Gaussian correlation
cor_true_values <- map_dbl(r_true_values, function(r_val) {
cormat <- t(sapply(sample(win_mat, 1e4, replace = TRUE),
function(x) {bingen(r = r_val, p = x)}))
cor(cormat[,1], cormat[,2])
})
# Create mapping for later use
correlation_mapping <- tibble(
r_gaussian = r_true_values,
r_binomial = cor_true_values
)
## 95% confidence interval for the correlation
aa <- 0.05
# Simple uniform allocation simulation
run_stage1_sim <- function(sim_seed, K, A, r_true) {
set.seed(sim_seed)
# Storage for wins/losses for both measures
wins1 <- losses1 <- wins2 <- losses2 <- matrix(1, nrow = K, ncol = K) # Beta(1,1) prior
Y1 <- Y2 <- matrix(0, nrow = A, ncol = 2) # Outcomes for both measures
# stores proportion correct during experiment for top 1, 3, 5, 10, 20 arms
pc_1 <- rep(NA, A)
pc_3 <- rep(NA, A)
pc_5 <- rep(NA, A)
pc_10 <- rep(NA, A)
pc_20 <- rep(NA, A)
pc_top5_in_20 <- rep(NA, A) # Percent of true top 5 that are in estimated top 20
pc_top5_in_25 <- rep(NA, A) # Percent of true top 5 that are in estimated top 25
# store two-sided confidence interval
ci_r <- matrix(NA, nrow = A, ncol = 2)
# store one-sided confidence interval
# ci_low_r <- rep(NA, A)
# Uniform allocation
for (t in 1:A) {
arm1 <- sample(1:K, 1)
arm2 <- sample(setdiff(1:K, arm1), 1)
# Generate correlated outcomes using Gaussian copula approach
p <- P_win(arm1, arm2)
out <- bingen(r = r_true, p = p)
# Outcome for measure 1
winner1 <- ifelse(out[1] == 1, arm1, arm2)
loser1 <- arm1 + arm2 - winner1
wins1[winner1, loser1] <- wins1[winner1, loser1] + 1
losses1[loser1, winner1] <- losses1[loser1, winner1] + 1
Y1[t, ] <- c(1*(arm1 == winner1), 1*(arm2 == winner1))
# Outcome for measure 2
winner2 <- ifelse(out[2] == 1, arm1, arm2)
loser2 <- arm1 + arm2 - winner2
wins2[winner2, loser2] <- wins2[winner2, loser2] + 1
losses2[loser2, winner2] <- losses2[loser2, winner2] + 1
Y2[t, ] <- c(1*(arm1 == winner2), 1*(arm2 == winner2))
# Compare results using just winner1
# update probability correct here
# sum up the rows for wins_running and losses_running, make vector wins_running/ (wins_running + losses_running), sort,
row_wins_running <- rowSums(wins1) - diag(wins1)
row_losses_running <- rowSums(losses1) - diag(losses1)
phat_running <- wins1 / (wins1 + losses1) # Beta–posterior means
diag(phat_running) <- 0 # ignore self-duels
borda_hat_running <- rowSums(phat_running) / (K - 1)
ranked_running <- order(borda_hat_running, decreasing = TRUE)
# compare ranking to rank_borda (truth)
pc_1[t] <- length(intersect(ranked_running[1], rank_borda[1]))
pc_3[t] <- length(intersect(ranked_running[1:3], rank_borda[1:3])) / 3
pc_5[t] <- length(intersect(ranked_running[1:5], rank_borda[1:5])) / 5
pc_10[t] <- length(intersect(ranked_running[1:10], rank_borda[1:10])) / 10
pc_20[t] <- length(intersect(ranked_running[1:20], rank_borda[1:20])) / 20
pc_top5_in_20[t] <- length(intersect(rank_borda[1:5], ranked_running[1:20])) / 5
pc_top5_in_25[t] <- length(intersect(rank_borda[1:5], ranked_running[1:25])) / 5
# warnings suppressed because there are issues with the first couple of obs
r_hat <- suppressWarnings(cor(Y1[1:t, 1], Y2[1:t, 1]))
## Fisher z‑transform
z <- atanh(r_hat)
se_z <- 1 / suppressWarnings(sqrt(t - 3))
## back‑transform: SE for r
se_r <- se_z * (1 - r_hat^2) # delta method: SE(r̂) = SE(ẑ)*(1−r̂²)
# two-sided confidence interval
z_crit <- qnorm(1 - aa/2) # 1.96 for 95 %
ci_r[t,] <- tanh( z + c(-1, 1) * z_crit * se_z )
# # one sided lower-confidence interval
# z_crit <- qnorm(1-aa)
# ci_low_r[t] <- tanh(z - z_crit * se_z)
}
list(
ci_r = ci_r,
pc = tibble(
Row = seq_len(A),
pc1 = pc_1,
pc3 = pc_3,
pc5 = pc_5,
pc10 = pc_10,
pc20 = pc_20,
pc_top5_in_20 = pc_top5_in_20,
pc_top5_in_25 = pc_top5_in_25
)
)
}
# Run multiple simulations across different correlation values
M_stage1 <- 100
A_stage1 <- 2.5e3 # Define here for clarity
# Run simulations for each correlation value
stage1_results <- map_dfr(seq_along(r_true_values), function(r_idx) {
r_val <- r_true_values[r_idx]
sim_results <- map(1:M_stage1, ~ run_stage1_sim(.x + 60637, K_stage1, A_stage1, r_val))
# Add correlation info to each result
map_dfr(seq_along(sim_results), ~ {
result <- sim_results[[.x]]
list(
r_gaussian = r_val,
r_binomial = cor_true_values[r_idx],
sim = .x,
ci_r = list(result$ci_r),
pc = list(result$pc)
)
})
})# Process results for width of CIs and proportion correct analysis
# Extract confidence intervals with correlation info
precision_data <- map_dfr(1:nrow(stage1_results), function(row_idx) {
result <- stage1_results[row_idx, ]
ci_matrix <- result$ci_r[[1]]
tibble(
# r_gaussian = result$r_gaussian,
r_binomial = result$r_binomial,
sim = result$sim,
trial = 1:A_stage1,
ci_low = ci_matrix[,1],
ci_hgh = ci_matrix[,2],
ci_width = ci_matrix[,2] - ci_matrix[,1]
)
})
# Calculate precision curves for each correlation value
precision_curves <- precision_data |>
group_by(r_binomial, trial) |>
summarise(
width = mean(ci_width, na.rm = TRUE),
.groups = "drop"
)|>
mutate(
r_binomial_rounded = round(r_binomial, 2),
correlation_label = factor(paste0("r = ", r_binomial_rounded),
levels = paste0("r = ", sort(unique(round(r_binomial, 2)))))
)
# Extract proportion correct data with correlation info
pc_data <- map_dfr(1:nrow(stage1_results), function(row_idx) {
result <- stage1_results[row_idx, ]
pc_df <- result$pc[[1]]
pc_df |> mutate(
r_gaussian = result$r_gaussian,
r_binomial = result$r_binomial,
sim = result$sim
)
})
# Calculate average proportion correct at each time point for each correlation
pc_curves <- pc_data |>
group_by(Row) |>
summarise(
pc1_mean = mean(pc1, na.rm = TRUE),
pc3_mean = mean(pc3, na.rm = TRUE),
pc5_mean = mean(pc5, na.rm = TRUE),
pc10_mean = mean(pc10, na.rm = TRUE),
pc20_mean = mean(pc20, na.rm = TRUE),
pc_top5_in_20_mean = mean(pc_top5_in_20, na.rm = TRUE),
pc_top5_in_25_mean = mean(pc_top5_in_25, na.rm = TRUE),
.groups = "drop"
)
# Plot 1: Precision curves colored by true binomial correlation
p1_stage1_precision <- ggplot(precision_curves,
aes(x = trial, y = width, color = correlation_label)) +
geom_line(linewidth = 1, na.rm = TRUE) +
geom_hline(yintercept = 0.1, linetype = "dashed", color = "red", alpha = 0.7) +
geom_hline(yintercept = 0.05, linetype = "dashed", color = "red", alpha = 0.7) +
geom_hline(yintercept = 0.02, linetype = "dashed", color = "red", alpha = 0.7) +
# Add text labels for the reference lines
annotate("text", x = A_stage1 * 0.05, y = 0.1, label = "0.1",
color = "red", vjust = -0.3, size = 3) +
annotate("text", x = A_stage1 * 0.05, y = 0.05, label = "0.05",
color = "red", vjust = -0.3, size = 3) +
annotate("text", x = A_stage1 * 0.05, y = 0.02, label = "0.02",
color = "red", vjust = -0.3, size = 3) +
labs(
title = "Stage 1: Precision of correlation estimates",
subtitle = glue("M = {M_stage1}, K = {K_stage1}, sd = {sd_stage1}"),
x = "Number of comparisons",
y = "95% confidence interval width",
color = "True binomial\ncorrelation"
) +
coord_cartesian(ylim = c(0, 0.5), xlim = c(0, A_stage1)) +
# Use regular even breaks for clean gridlines
scale_y_continuous(breaks = seq(0, 0.5, 0.1), minor_breaks = NULL) +
scale_color_manual(values = correlation_colors_new) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
plot.subtitle = element_text(size = 10)
)
print(p1_stage1_precision)ggsave("../figures/stage1_precision_curves.png", p1_stage1_precision, width = 10, height = 6, dpi = 300)
# Plot 2: Proportion correct curves colored by correlation
pc_long <- pc_curves |>
dplyr::select(Row, pc20_mean, pc_top5_in_20_mean, pc_top5_in_25_mean) |>
pivot_longer(
cols = c(pc20_mean, pc_top5_in_20_mean, pc_top5_in_25_mean),
names_to = "metric",
values_to = "proportion_correct",
names_pattern = "pc(.+)_mean"
) |>
mutate(
metric = case_when(
metric == "_top5_in_25" ~ "Best 5 in top 25",
metric == "_top5_in_20" ~ "Best 5 in top 20",
metric == "20" ~ "Top 20"
),
metric = factor(metric, levels = c("Top 20", "Best 5 in top 20", "Best 5 in top 25"))
)
p2_stage1_pc <- ggplot(pc_long, aes(x = Row, y = proportion_correct, color = metric)) +
geom_line(linewidth = 1) +
labs(
title = "Stage 1: Proportion correct over time",
subtitle = glue("Uniform allocation, M = {M_stage1}, K = {K_stage1}, sd = {sd_stage1}"),
x = "Number of comparisons",
y = "Proportion correct",
color = "Metric"
) +
coord_cartesian(ylim = c(0, 1), xlim = c(0, A_stage1)) +
scale_y_continuous(breaks = seq(0, 1, 0.2)) +
scale_color_manual(values = ranking_colors[c("Top 20", "Best 5 in top 20", "Best 5 in top 25")]) +
geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
geom_hline(yintercept = 0.9, linetype = "dashed", color = "red", alpha = 0.7) +
annotate("text", x = A_stage1 * 0.05, y = 0.8, label = "80%",
color = "red", vjust = -0.3, size = 3) +
annotate("text", x = A_stage1 * 0.05, y = 0.9, label = "90%",
color = "red", vjust = -0.3, size = 3) +
theme_minimal() +
theme(
plot.subtitle = element_text(size = 10)
)
print(p2_stage1_pc)# Additional Stage 1 analysis with K=50 arms
set.seed(60637 + 500)
# Parameters for K=50 analysis
K_stage1_50 <- 50
# Generate true arm strengths for K=50
true_theta_50 <- rnorm(K_stage1_50, mean = 0, sd = sd_stage1)
# Create win matrix and rankings for K=50
P_win_50 <- function(i, j) plogis(true_theta_50[i] - true_theta_50[j])
win_mat_50 <- matrix(0, nrow = K_stage1_50, ncol = K_stage1_50)
invisible(sapply(1:K_stage1_50, function(i) {
sapply(1:K_stage1_50, function(j) {
if (i != j) win_mat_50[i, j] <<- P_win_50(i, j)
})
}))
true_strength_50 <- rowSums(win_mat_50) / (K_stage1_50 - 1)
rank_borda_50 <- order(true_strength_50, decreasing = TRUE)
# Run simulations (reuse existing function with modified globals)
P_win_temp <- P_win
rank_borda_temp <- rank_borda
P_win <- P_win_50
rank_borda <- rank_borda_50
sim_results_k50 <- map_dfr(1:M_stage1, function(sim_idx) {
run_stage1_sim(60637 + 500 + sim_idx, K_stage1_50, A_stage1, r_true_values[1])$pc |>
mutate(sim = sim_idx)
})
# Restore original globals
P_win <- P_win_temp
rank_borda <- rank_borda_temp
# Calculate average performance
pc_curves_k50 <- sim_results_k50 |>
group_by(Row) |>
summarise(across(starts_with("pc"), ~ mean(.x, na.rm = TRUE)), .groups = "drop")# Create plot for K=50
pc_long_k50 <- pc_curves_k50 |>
dplyr::select(Row, pc20, pc_top5_in_20, pc_top5_in_25) |>
pivot_longer(
cols = -Row,
names_to = "metric",
values_to = "proportion_correct"
) |>
mutate(
metric = case_when(
metric == "pc_top5_in_25" ~ "Best 5 in top 25",
metric == "pc_top5_in_20" ~ "Best 5 in top 20",
metric == "pc20" ~ "Top 20"
),
metric = factor(metric, levels = c("Top 20", "Best 5 in top 20", "Best 5 in top 25"))
)
p2_stage1_pc_k50 <- ggplot(pc_long_k50, aes(x = Row, y = proportion_correct, color = metric)) +
geom_line(linewidth = 1) +
labs(
title = "Stage 1: Proportion correct over time (K = 50)",
subtitle = glue("Uniform allocation, M = {M_stage1}, K = {K_stage1_50}, sd = {sd_stage1}"),
x = "Number of comparisons",
y = "Proportion correct",
color = "Metric"
) +
coord_cartesian(ylim = c(0, 1), xlim = c(0, A_stage1)) +
scale_y_continuous(breaks = seq(0, 1, 0.2)) +
scale_color_manual(values = ranking_colors[c("Top 20", "Best 5 in top 20", "Best 5 in top 25")]) +
geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
geom_hline(yintercept = 0.9, linetype = "dashed", color = "red", alpha = 0.7) +
annotate("text", x = A_stage1 * 0.05, y = 0.8, label = "80%",
color = "red", vjust = -0.3, size = 3) +
annotate("text", x = A_stage1 * 0.05, y = 0.9, label = "90%",
color = "red", vjust = -0.3, size = 3) +
theme_minimal() +
theme(plot.subtitle = element_text(size = 10))
print(p2_stage1_pc_k50)ggsave("../figures/stage1_proportion_correct_k50.png", p2_stage1_pc_k50, width = 10, height = 6, dpi = 300)Our goal is to:
Treatment is assigned adaptively, both measures are ranked
# Stage 2 Parameters (defined once)
set.seed(60637 + 1000) # Different seed for Stage 2
K_stage2 <- 20 # Focused set of arms (eventually from Stage 1 pruning)
A_stage2 <- 1e3 # Total comparisons
M_stage2 <- 1e2 # Number of simulation replications
first_batch_stage2 <- 400 # Initial batch size for adaptive algorithms
nbatches_stage2 <- 5 # Batches for adaptive algorithms
sd_stage2 <- 0.5 # Moderate dispersion (harder than Stage 1)
# Algorithm comparison parameters
alpha_stage2 <- 0.5 # DTS confidence parameter
delta_stage2 <- 1/(A_stage2 * K_stage2^2) # IF2 confidence parameter
floor_mult_stage2 <- 0.30 # Probability floor multiplier
n_mc_stage2 <- 1e3 # Monte Carlo samples
# Generate Stage 2 true arm strengths
true_theta_stage2 <- rnorm(K_stage2, mean = 0, sd = sd_stage2)
# Stage 2 win probabilities
P_win_stage2 <- function(i, j) plogis(true_theta_stage2[i] - true_theta_stage2[j])
# Generate win matrix for Stage 2
win_mat_stage2 <- matrix(0, nrow = K_stage2, ncol = K_stage2)
for(i in 1:K_stage2) {
for(j in 1:K_stage2) {
if (i != j) {
win_mat_stage2[i, j] <- P_win_stage2(i, j)
}
}
}
batch_sizes <- c(first_batch_stage2, rep((A_stage2-first_batch_stage2)/(nbatches_stage2-1), nbatches_stage2-1))
# True rankings for Stage 2
true_strength_stage2 <- rowSums(win_mat_stage2) / (K_stage2 - 1)
rank_borda_stage2 <- order(true_strength_stage2, decreasing = TRUE)
# Define algorithms and their settings for Stage 2
algorithms_stage2 <- list(
dts = list(name = "dts", batch_sizes = batch_sizes),
# tt = list(name = "tt", batch_sizes = batch_sizes),
topk5 = list(name = "topk5", batch_sizes = batch_sizes),
topk10 = list(name = "topk10", batch_sizes = batch_sizes),
# weighted_topk5 = list(name = "weighted_topk5", batch_sizes = batch_sizes),
# weighted_topk10 = list(name = "weighted_topk10", batch_sizes = batch_sizes),
# if2 = list(name = "if2", batch_sizes = batch_sizes),
uniform = list(name = "uniform", batch_sizes = A_stage2) # Single batch = uniform allocation
)set.seed(60637)
hist(win_mat_stage2[upper.tri(win_mat_stage2)], breaks = 20,
main = glue("Distribution of win probabilities,\nStage 2,\nsd = {sd_stage2}"),
xlab = "Win probability", ylab = "Frequency")# Start timing for Stage 2 analysis
stage2_start_time <- Sys.time()
# Set remaining global parameters for Stage 2
P_win <- P_win_stage2
true_theta <- true_theta_stage2
win_mat <- win_mat_stage2
true_strength <- true_strength_stage2
rank_borda <- rank_borda_stage2
# Memory-efficient sequential processing of algorithms
base_seed <- 60637
# Initialize storage for final results
all_pc_stage2 <- tibble()
all_rank_stage2 <- tibble()
all_cum_avg_stage2 <- tibble()
strength_avg_stage2 <- tibble()
# Algorithm name mapping
alg_name_map <- c(
"dts" = "Double Thompson sampling",
"tt" = "Top-two Thompson sampling",
"topk5" = "Top-K Thompson sampling (k=5)",
"topk10" = "Top-K Thompson sampling (k=10)",
"weighted_topk5" = "Weighted Top-K Thompson sampling (k=5)",
"weighted_topk10" = "Weighted Top-K Thompson sampling (k=10)",
"if2" = "Interleaved Filter2",
"uniform" = "Uniform"
)
# Process each algorithm sequentially to avoid memory overload
for(alg_idx in seq_along(algorithms_stage2)) {
alg <- algorithms_stage2[[alg_idx]]
alg_name <- names(algorithms_stage2)[alg_idx]
# Run simulations for this algorithm only
sim_results_alg <- map(1:M_stage2, ~ {
run_single_sim(base_seed + .x, alg$name, alg$batch_sizes, K_stage2, A_stage2, alpha_stage2, delta_stage2, floor_mult_stage2, n_mc_stage2)
})
# Immediately process into the summaries needed for plots
# 1. Process PC data
pc_data_alg <- bind_rows(map(seq_along(sim_results_alg), ~ {
sim_results_alg[[.x]]$pc |> mutate(sim = .x)
})) |>
mutate(algorithm = alg_name_map[alg_name])
all_pc_stage2 <- bind_rows(all_pc_stage2, pc_data_alg)
# 2. Process rank data
rank_data_alg <- map_dfr(seq_along(sim_results_alg), function(idx) {
mat <- sim_results_alg[[idx]]$rank_order
colnames(mat) <- paste0("V", seq_len(ncol(mat)))
as_tibble(mat, .name_repair = "minimal") |>
mutate(Trial = row_number()) |>
pivot_longer(
cols = -Trial,
names_to = "RankPos",
values_to = "Arm"
) |> mutate(
RankPos = as.integer(sub("^V", "", RankPos)),
sim = idx
)
}) |>
mutate(algorithm = alg_name)
all_rank_stage2 <- bind_rows(all_rank_stage2, rank_data_alg)
# 3. Process cumulative data (averaged)
combined_cum <- map_dfr(seq_along(sim_results_alg), function(sim_idx) {
sim_results_alg[[sim_idx]]$cum |>
mutate(sim = sim_idx)
})
count_col <- if("Cumulative_Count" %in% names(combined_cum)) "Cumulative_Count" else "Cumulative"
cum_avg_alg <- combined_cum |>
group_by(Trial, Arm) |>
summarise(Cumulative = mean(.data[[count_col]]), .groups = "drop") |>
mutate(
algorithm = alg_name_map[alg_name],
color = case_when(
Arm %in% rank_borda_stage2[1] ~ "Top 1",
Arm %in% rank_borda_stage2[2:3] ~ "Top 3",
Arm %in% rank_borda_stage2[4:5] ~ "Top 5",
TRUE ~ "Other"
)
)
all_cum_avg_stage2 <- bind_rows(all_cum_avg_stage2, cum_avg_alg)
# 4. Process strength/regret data
trial_strengths <- map_dfr(1:A_stage2, function(trial) {
all_rankings <- map(sim_results_alg, ~ .x$rank_order[trial, ])
tibble(
Trial = trial,
strength1 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:1]]))),
strength3 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:3]]))),
strength5 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:5]])))
)
}) |>
mutate(algorithm = alg_name_map[alg_name])
strength_avg_alg <- trial_strengths |>
pivot_longer(starts_with("strength"), names_prefix = "strength", names_to = "TopK") |>
mutate(
TopK_label = paste0("Top ", TopK),
Regret = case_when(
TopK == "1" ~ mean(true_strength_stage2[rank_borda_stage2[1:1]]) - value,
TopK == "3" ~ mean(true_strength_stage2[rank_borda_stage2[1:3]]) - value,
TopK == "5" ~ mean(true_strength_stage2[rank_borda_stage2[1:5]]) - value,
TRUE ~ NA_real_
)
)
strength_avg_stage2 <- bind_rows(strength_avg_stage2, strength_avg_alg)
# Clear the raw results to free memory
rm(sim_results_alg, pc_data_alg, rank_data_alg, combined_cum, cum_avg_alg, trial_strengths, strength_avg_alg)
gc() # Force garbage collection
}# load data for interactive sessions (updated for lazy load)
# cache_files <- list.files("dueling_bandits_iterated_cache/html",
# pattern = "^stage2_run.*\\.RData$",
# full.names = TRUE)
# if (length(cache_files) > 0) {
# load(cache_files[1]) # Load the first matching file
# } else {
# warning("No stage2_run cache file found")
# }
# Calculate average performance across algorithms
all_pc_long_avg_stage2 <-
all_pc_stage2 |>
pivot_longer(starts_with("pc"),
names_prefix = "pc",
names_to = "Top",
values_to = "Value") |>
group_by(algorithm, Row, Top) |>
summarise(Value = mean(Value), .groups = "drop") |>
mutate(Top = factor(Top, levels = c("1", "3", "5", "10", "20")))# Stage 2 Algorithm Comparison Plots
# Data has already been processed sequentially in the previous chunk
## Algorithm comparison across top-k performance
p3_stage2_algorithms <- ggplot(all_pc_long_avg_stage2 |> filter(Top %in% c("1", "3", "5")),
aes(x = Row, y = Value, colour = algorithm, alpha = algorithm, linetype = algorithm)) +
geom_line(linewidth = 0.5) +
scale_alpha_manual(values = c("Uniform" = 1.0, "Double Thompson sampling" = 0.7, "Top-two Thompson sampling" = 0.7, "Top-K Thompson sampling (k=5)" = 0.7, "Top-K Thompson sampling (k=10)" = 0.7, "Weighted Top-K Thompson sampling (k=5)" = 0.7, "Weighted Top-K Thompson sampling (k=10)" = 0.7, "Interleaved Filter2" = 0.7)) +
scale_linetype_manual(values = c("Uniform" = "dashed", "Double Thompson sampling" = "solid", "Top-two Thompson sampling" = "solid", "Top-K Thompson sampling (k=5)" = "solid", "Top-K Thompson sampling (k=10)" = "solid", "Weighted Top-K Thompson sampling (k=5)" = "solid", "Weighted Top-K Thompson sampling (k=10)" = "solid", "Interleaved Filter2" = "solid")) +
facet_wrap(~ Top, scales = "free_y",
labeller = labeller(.cols = function(x) paste0("Top ", x))) +
labs(title = "Stage 2: Best arm identification",
subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
x = "Number of comparisons",
y = "Proportion correct",
colour = "Algorithm") +
coord_cartesian(ylim = c(0, 1)) +
scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
theme_minimal() +
scale_color_manual(values = algorithm_colors) +
geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
guides(alpha = "none", linetype = "none") +
theme(
plot.subtitle = element_text(size = 10),
strip.text = element_text(size = 12)
)
options(repr.plot.width = 16, repr.plot.height = 10)
print(p3_stage2_algorithms)ggsave("../figures/stage2_algorithm_comparison.png", p3_stage2_algorithms, width = 12, height = 8, dpi = 300)
## Cumulative arm appearances faceted by algorithm
# all_cum_avg_stage2 computed above in optimized section
p4_stage2_cumulative <- ggplot(all_cum_avg_stage2,
aes(x = Trial, y = Cumulative, group = Arm, colour = factor(color))) +
geom_line(alpha = 0.75) +
facet_wrap(~ algorithm) +
labs(title = "Stage 2: Average cumulative arm appearances",
subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
x = "Number of comparisons",
y = "Mean cumulative count",
colour = "Arms") +
theme_minimal() +
scale_color_manual(values = ranking_colors[c("Top 1", "Top 3", "Top 5", "Other")]) +
theme(
plot.subtitle = element_text(size = 10),
strip.text = element_text(size = 12)
)
options(repr.plot.width = 16, repr.plot.height = 10)
print(p4_stage2_cumulative)ggsave("../figures/stage2_cumulative_appearances.png", p4_stage2_cumulative, width = 12, height = 8, dpi = 300)
# Duplicate computation removed - using working version above
p5_stage2_regret <- ggplot(strength_avg_stage2,
aes(Trial, Regret, colour = algorithm, alpha = algorithm, linetype = algorithm)) +
geom_line(linewidth = 0.5) +
scale_alpha_manual(values = c("Uniform" = 1.0, "Double Thompson sampling" = 0.7, "Top-two Thompson sampling" = 0.7, "Top-K Thompson sampling (k=5)" = 0.7, "Top-K Thompson sampling (k=10)" = 0.7, "Weighted Top-K Thompson sampling (k=5)" = 0.7, "Weighted Top-K Thompson sampling (k=10)" = 0.7, "Interleaved Filter2" = 0.7)) +
scale_linetype_manual(values = c("Uniform" = "dashed", "Double Thompson sampling" = "solid", "Top-two Thompson sampling" = "solid", "Top-K Thompson sampling (k=5)" = "solid", "Top-K Thompson sampling (k=10)" = "solid", "Weighted Top-K Thompson sampling (k=5)" = "solid", "Weighted Top-K Thompson sampling (k=10)" = "solid", "Interleaved Filter2" = "solid")) +
facet_wrap(~ TopK_label) +
labs(title = "Stage 2: Average regret",
subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
x = "Number of comparisons",
y = "Average regret",
colour = "Arms") +
theme_minimal() +
scale_color_manual(values = algorithm_colors) +
guides(alpha = "none", linetype = "none") +
theme(
plot.subtitle = element_text(size = 10),
strip.text = element_text(size = 12)
)
options(repr.plot.width = 16, repr.plot.height = 10)
print(p5_stage2_regret)