0.1 Mathematical setup

Individuals indexed by \(i = 1, \dots , N\) are tasked with comparing two profiles \(j \in \{1, 2\}\).

Each profile \(j\) is associated with a unique treatment \(w_j \in \mathcal{W}\), where \(|\mathcal{W}| = K\). Outcomes \(Y_{ij}\in \{0,1\}\) are binary, where \(Y_{i1} = 1\) and \(Y_{i2} = 0\) if individual \(i\) prefers profile \(1\) over profile \(2\).

Using potential outcomes notation, let \(Y_i(\bar w)\) denote the two dimensional vector of outcomes for individual \(i\) when assigned to compare profiles associated with the treatment vector \(\bar w = \{w_1, w_2\}\). Treatment vectors are constrained so that \(w_1 \neq w_2\), so each individual has \(K(K-1)/2\) potential outcome vectors for comparing profiles of two treatments. (Assumptions: stability, no profile ordering effects.)

The estimand of interest is \[ \sum_{w'\neq w}\textrm{E}\left[ Y_{i}(\{w, w'\}) \right]\times \textrm{Pr}^{\textrm{U}}\left[\{w, w'\} \right]. \] Expectation is taken over the population, \(\textrm{Pr}\left[\{w, w'\} \right]\) is the probability of \(w\) and \(w'\) being compared, \(\textrm{Pr}^{\textrm{U}}\) is when pairs are assigned uniformly. I.e., the estimand is the expected rate at which profile \(w\) is selected when compared to all other profiles \(w' \neq w\), when profiles are assigned uniformly at random.

0.2 Simulation data generating process

set.seed(60637)

# Experiment design for Stage 1
K_stage1 <- 70 # number of arms (treatments)
sd_stage1 <- 0.75  # standard deviation of underlying thetas

# the sd will affect the dispersion of win probabilities
true_theta <- rnorm(K_stage1, mean = 0, sd = sd_stage1)

# Check dispersion: what is the starting distribution of win probabilities?
# More dispersed means easier to find winner; less dispersed means harder

# Pairwise win probability: P(i beats j)
P_win <- function(i, j) plogis(true_theta[i] - true_theta[j])

win_mat <- matrix(0, nrow = K_stage1, ncol = K_stage1)

invisible(sapply(1:K_stage1, function(i) {
  sapply(1:K_stage1, function(j) {
    if (i != j) {
      win_mat[i, j] <<- P_win(i, j)
    }
  })
}))

# Truth (Borda)
true_strength <- rowSums(win_mat) / (K_stage1 - 1)   # ignore diagonal zeros
rank_borda    <- order(true_strength, decreasing = TRUE)

# Truth (Maximin)
win_rate_mat <- win_mat / (win_mat + t(win_mat))
diag(win_rate_mat) <- NA

hist(win_mat[upper.tri(win_mat)], breaks = 20, 
     main = glue("Distribution of win probabilities,\nStage 1,\nsd = {sd_stage1}"),
     xlab = "Win probability", ylab = "Frequency")

run_single_sim <- function(sim_seed, algorithm, batch_sizes, K, A, alpha, delta, floor_mult, n_mc) {
  set.seed(sim_seed)
  
  # Storage for win/loss counts
  wins <- losses <- wins_running <- losses_running <- 
    wins_filter <- losses_filter <- 
    matrix(1, nrow = K, ncol = K)  # prior alpha
  
  # Storage for results
  probs_wide <- matrix(NA, nrow = A, ncol = K*(K-1)/2)
  yobs_wide <- matrix(NA, nrow = A, ncol = 2)
  ws_wide <- matrix(NA, nrow = A, ncol = 2)
  
  # stores proportion correct during experiment for top 1, 3, 5, 10, 20 arms
  pc_1 <- rep(NA, A)
  pc_3 <- rep(NA, A)
  pc_5 <- rep(NA, A)
  pc_10 <- rep(NA, A)
  pc_20 <- rep(NA, A)
  pc_top5_in_20 <- rep(NA, A)  # Percent of true top 5 that are in estimated top 20
  pc_top5_in_25 <- rep(NA, A)  # Percent of true top 5 that are in estimated top 25
  
  rank_order <- matrix(NA, nrow = A, ncol = K)
  
  sampled_matrix <- matrix(0, K, K)
  colnames(sampled_matrix) <- rownames(sampled_matrix) <- paste0("Arm", 1:K)
  update_times <- cumsum(batch_sizes)
  
  conf_lower <- matrix(0, nrow = K, ncol = K)
  conf_upper <- matrix(1, nrow = K, ncol= K)
  
  for (t in 1:A) {
    # 0. For first batch assign arms uniformly at random
    if (t <= update_times[1]) {
      arm1 <- sample(1:K, 1)
      arm2 <- sample(setdiff(1:K, arm1), 1)
    } else {
      
      # probs and idx is set below at the end of the batch
      choice <- sample.int(length(probs), 1L, prob = probs)
      out <- c(idx[choice, "row"], idx[choice, "col"])
      arm1 <- out[1]
      arm2 <- out[2]
    }
    # Compare results
    winner <- ifelse(runif(1) < P_win(arm1, arm2), arm1, arm2)
    loser  <- arm1 + arm2 - winner
    
    wins_running[winner, loser] <- wins_running[winner, loser] + 1
    losses_running[loser, winner] <- losses_running[loser, winner] + 1
    
    # update probability correct here
    # sum up the rows for wins_running and losses_running, make vector wins_running/ (wins_running + losses_running), sort, 
    row_wins_running <- rowSums(wins_running) - diag(wins_running)
    row_losses_running <- rowSums(losses_running) - diag(losses_running)
    phat_running <- wins_running / (wins_running + losses_running) # Beta–posterior means
    diag(phat_running) <- 0 # ignore self-duels
    borda_hat_running <- rowSums(phat_running) / (K - 1)
    ranked_running <- order(borda_hat_running, decreasing = TRUE)
    
    # compare ranking to rank_borda (truth)
    pc_1[t] <- length(intersect(ranked_running[1], rank_borda[1]))
    pc_3[t] <- length(intersect(ranked_running[1:3], rank_borda[1:3])) / 3
    pc_5[t] <- length(intersect(ranked_running[1:5], rank_borda[1:5])) / 5
    pc_10[t] <- length(intersect(ranked_running[1:10], rank_borda[1:10])) / 10
    pc_20[t] <- length(intersect(ranked_running[1:20], rank_borda[1:20])) / 20
    pc_top5_in_20[t] <- length(intersect(rank_borda[1:5], ranked_running[1:20])) / 5
    pc_top5_in_25[t] <- length(intersect(rank_borda[1:5], ranked_running[1:25])) / 5
    
    rank_order[t,] <- ranked_running
    
    ws_wide[t,] <- sort(c(arm1, arm2))
    yobs_wide[t,] <- as.integer(ws_wide[t, ] == winner)
    
    # IF2 Filter algorithm from 2009 Yue paper
    # begins here, after first batch assigned treatment uniformly at random
    if(algorithm == "if2"& t >= update_times[1]){
      if(!exists("W")){
        # 2: δ ← 1/(TK^2) (initialized above)
        # 3: Choose ˆb ∈ B randomly (this was done in the first batch)
        # 4: W ← {b1,...,bK} \ {ˆb}
        w1 <- sample(1:K, 1)
        W <- setdiff(1:K, w1)
      }
      # 5: ∀b ∈ W, maintain estimate ˆP_{ˆb,b} of P(ˆb, b)
      # (initialized above)
      # 6: ∀b ∈ W, maintain 1 − δ confidence interval Cˆ_{ˆb,b} of ˆP_{ˆb,b}
      # (initialized above)
      
      # 7: while W is not empty do
      # 8: for b ∈ W do
      # 9: compare ˆb and b
      wins_filter[winner, loser] <-  wins_filter[winner, loser] + 1
      losses_filter[loser, winner] <- losses_filter[loser, winner] + 1
      
      # 10: update Cˆ_{ˆb,b}, ˆP_{ˆb,b}
      CI_filter <- binom.confint(wins_filter[arm1, arm2], 
                                 n = wins_filter[arm1, arm2] 
                                 + losses_filter[arm1, arm2], 
                                 conf.level = 1-delta, methods = "wilson")
      conf_lower[arm1, arm2] = CI_filter$lower
      conf_upper[arm1, arm2] = CI_filter$upper
      
      # 11: end for
      w2 <- sample(W, 1)
      
      probs_mat <- matrix(0, K, K)
      # set treatment deterministically
      probs_mat[w1, w2] <- probs_mat[w2, w1] <- 1
      idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
      probs <- probs_mat[upper.tri(probs_mat)] 
      
      # then add probability floor
      prob_floor <- 1/(K*(K-1)/2) * floor_mult
      probs <- pmax(probs, prob_floor)
      # renormalize
      row_sums <- sum(probs)
      probs <- probs / row_sums
    }
    
    # Update posteriors if batch completed
    if(t %in% update_times) {
      wins <- wins_running
      losses <- losses_running
      
      # save historical probabilities
      if(t == update_times[1]){
        probs_wide[1:t,] <- 1/(K*(K-1)/2)
        W <- setdiff(1:K, arm1)
      } else {
        t_0 <- update_times[max(which(update_times<t))]+1
        probs_wide[t_0:t,] <-  matrix(rep(probs, length(t_0:t)), 
                                      nrow =  length(t_0:t), 
                                      byrow = TRUE)
      }
      if(algorithm != "uniform") {  # Skip algorithm-specific logic for uniform
        if(algorithm == "if2"){
          # 12: while ∃b ∈ W s.t. (ˆP_{ˆb,b} > 1/2 ∧ 1/2 ∈/ Cˆ_{ˆb,b}) do:
          # Pruning
          # if bandit is empirically inferior to b-hat/arm1 with confidence, it is removed from W
          phat_filter <- wins_filter / (wins_filter + losses_filter)
          weak_arms <- intersect(which(phat_filter[w1,] > 0.5), W) # more than 50% chance of arm1 beating it, AND it is in W
          for (w in weak_arms){
            if ((0.5 < conf_lower[w1, w]) | (0.5 > conf_upper[w1, w])){
              # 13: W ← W \ {b}
              W <- setdiff(W, w)
            }
          }
          # 14: end while
          
          # 15: if ∃b' ∈ W s.t. (ˆP_{ˆb,b'} > 1/2 ∧ 1/2 ∈/ Cˆ_{ˆb,b'}) then
          strong_arms <- intersect(which(phat_filter[w1,] < 0.5), W)
          # 16: ˆb ← b', W ← W \ {b'} //new round
          if (length(strong_arms) != 0){
            w1 <- sample(strong_arms, 1)
            W <- setdiff(W, w1)
          }
          # if W becomes empty, refill it
          if (length(W) < 2){
            w1 <-  sample(1:K, 1)
            W <- setdiff(1:K, w1)
          }
          # 17: ∀b ∈ W, reset ˆP_{ˆb,b} and Cˆ_{ˆb,b}
          wins_filter <- losses_filter <- matrix(1, nrow = K, ncol = K)
          conf_lower <- matrix(0, nrow = K, ncol = K)
          conf_upper <- matrix(1, nrow = K, ncol= K)
          # 18: end if
          # 19: end while
          
          w2 <- sample(W, 1)
          probs_mat <- matrix(0, K, K)
          # set treatment deterministically
          probs_mat[w1, w2] <- probs_mat[w2, w1] <- 1
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "dts"){
          # recalculate probabilities
          probs_mat <- sample_probs_dts(wins, losses, t, K = K, alpha = alpha, n_mc = 1e3)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "tt"){
          # recalculate probabilities
          probs_mat <- sample_probs_TT(wins, losses, K = K, n_mc = 1e3)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "topk5"){
          # Top-K Thompson sampling (samples from top 5 arms)
          probs_mat <- sample_probs_TopK(wins, losses, K = K, n_mc = 1e3, top_k = 5)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "topk10"){
          # Top-K Thompson sampling (samples from top 10 arms)
          probs_mat <- sample_probs_TopK(wins, losses, K = K, n_mc = 1e3, top_k = 10)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "weighted_topk5"){
          # Weighted Top-K Thompson sampling (top 5)
          probs_mat <- sample_probs_WeightedTopK(wins, losses, K = K, n_mc = 1e3, top_k = 5, decay = 0.7)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } else if(algorithm == "weighted_topk10"){
          # Weighted Top-K Thompson sampling (top 10)
          probs_mat <- sample_probs_WeightedTopK(wins, losses, K = K, n_mc = 1e3, top_k = 10, decay = 0.7)
          idx <- which(upper.tri(probs_mat), arr.ind = TRUE)
          probs <- probs_mat[upper.tri(probs_mat)] 
        } 
        
        # add probability floor
        prob_floor <- 1/(K*(K-1)/2) * floor_mult
        probs <- pmax(probs, prob_floor)
        # renormalize
        row_sums <- sum(probs)
        probs <- probs / row_sums
      }
      
      # print(paste0("batch done, t = ", t))
    }
  }
  
  df_long <- tibble(
    Trial = rep(seq_len(A), each = 2),   # 1,1,2,2,3,3,…
    Arm   = as.vector(t(ws_wide))           # flatten the A×2 matrix
  ) |>
    filter(!is.na(Arm)) |>               # drop the NAs from pre-allocation
    mutate(Played = 1L)                  # indicator that this arm was played
  
  # 2.  Fill in the (Arm, Trial) combos where the arm was NOT chosen
  df_cumulative <- df_long |>
    complete(Arm = 1:K, Trial = 1:A,     # all combos you ever want to see
             fill   = list(Played = 0L)) |>
    arrange(Arm, Trial) |>
    group_by(Arm) |>
    mutate(Cumulative_Count = cumsum(Played)) |>
    ungroup()
  
  list(
    pc  = tibble(
      Row  = seq_len(A),
      pc1 = pc_1, 
      pc3 = pc_3, 
      pc5 = pc_5, 
      pc10 = pc_10,
      pc20 = pc_20,
      pc_top5_in_20 = pc_top5_in_20,
      pc_top5_in_25 = pc_top5_in_25
    ),
    cum = df_cumulative,
    rank_order = rank_order#,
    # ws = ws_wide,
    # probs = probs_wide,
    # yobs = yobs_wide
  )
}

1 Stage: Assess measures

Our goal is to:

Determine if two measures are correlated
Prune treatment arms

Treatment is assigned uniformly at random, both measures are ranked

set.seed(60637)

# Multiple true correlation coefficients in underlying Gaussian (including negative)
r_true_values <- c(0.95, 0.9, 0.8, 0.6, 0.3, 0.1, -0.6, -1)

# Calculate corresponding binomial correlations for each Gaussian correlation
cor_true_values <- map_dbl(r_true_values, function(r_val) {
  cormat <- t(sapply(sample(win_mat, 1e4, replace = TRUE), 
                     function(x) {bingen(r = r_val, p = x)}))
  cor(cormat[,1], cormat[,2])
})

# Create mapping for later use
correlation_mapping <- tibble(
  r_gaussian = r_true_values,
  r_binomial = cor_true_values
)

## 95% confidence interval for the correlation
aa  <- 0.05

# Simple uniform allocation simulation
run_stage1_sim <- function(sim_seed, K, A, r_true) {
  set.seed(sim_seed)
  
  # Storage for wins/losses for both measures
  wins1 <- losses1 <- wins2 <- losses2 <- matrix(1, nrow = K, ncol = K)  # Beta(1,1) prior
  Y1 <- Y2 <- matrix(0, nrow = A, ncol = 2)  # Outcomes for both measures
  
  # stores proportion correct during experiment for top 1, 3, 5, 10, 20 arms
  pc_1 <- rep(NA, A)
  pc_3 <- rep(NA, A)
  pc_5 <- rep(NA, A)
  pc_10 <- rep(NA, A)
  pc_20 <- rep(NA, A)
  pc_top5_in_20 <- rep(NA, A)  # Percent of true top 5 that are in estimated top 20
  pc_top5_in_25 <- rep(NA, A)  # Percent of true top 5 that are in estimated top 25
  
  # store two-sided confidence interval
  ci_r <- matrix(NA, nrow = A, ncol = 2)
  
  # store one-sided confidence interval
  # ci_low_r <- rep(NA, A)
  
  # Uniform allocation
  for (t in 1:A) {
    arm1 <- sample(1:K, 1)
    arm2 <- sample(setdiff(1:K, arm1), 1)
    # Generate correlated outcomes using Gaussian copula approach
    p <- P_win(arm1, arm2)
    out <- bingen(r = r_true, p = p)
    
    # Outcome for measure 1
    winner1 <- ifelse(out[1] == 1, arm1, arm2)
    loser1 <- arm1 + arm2 - winner1
    wins1[winner1, loser1] <- wins1[winner1, loser1] + 1
    losses1[loser1, winner1] <- losses1[loser1, winner1] + 1
    Y1[t, ] <- c(1*(arm1 == winner1), 1*(arm2 == winner1))
    
    # Outcome for measure 2  
    winner2 <- ifelse(out[2] == 1, arm1, arm2)
    loser2 <- arm1 + arm2 - winner2
    wins2[winner2, loser2] <- wins2[winner2, loser2] + 1
    losses2[loser2, winner2] <- losses2[loser2, winner2] + 1
    Y2[t, ] <- c(1*(arm1 == winner2), 1*(arm2 == winner2))
    
    # Compare results using just winner1
    
    # update probability correct here
    # sum up the rows for wins_running and losses_running, make vector wins_running/ (wins_running + losses_running), sort, 
    row_wins_running <- rowSums(wins1) - diag(wins1)
    row_losses_running <- rowSums(losses1) - diag(losses1)
    phat_running <- wins1 / (wins1 + losses1) # Beta–posterior means
    diag(phat_running) <- 0 # ignore self-duels
    borda_hat_running <- rowSums(phat_running) / (K - 1)
    ranked_running <- order(borda_hat_running, decreasing = TRUE)
    
    # compare ranking to rank_borda (truth)
    pc_1[t] <- length(intersect(ranked_running[1], rank_borda[1]))
    pc_3[t] <- length(intersect(ranked_running[1:3], rank_borda[1:3])) / 3
    pc_5[t] <- length(intersect(ranked_running[1:5], rank_borda[1:5])) / 5
    pc_10[t] <- length(intersect(ranked_running[1:10], rank_borda[1:10])) / 10
    pc_20[t] <- length(intersect(ranked_running[1:20], rank_borda[1:20])) / 20
    pc_top5_in_20[t] <- length(intersect(rank_borda[1:5], ranked_running[1:20])) / 5
    pc_top5_in_25[t] <- length(intersect(rank_borda[1:5], ranked_running[1:25])) / 5
    
    # warnings suppressed because there are issues with the first couple of obs
    r_hat  <- suppressWarnings(cor(Y1[1:t, 1], Y2[1:t, 1]))
    ## Fisher z‑transform
    z      <- atanh(r_hat)
    se_z   <- 1 / suppressWarnings(sqrt(t - 3))
    ## back‑transform: SE for r
    se_r   <- se_z * (1 - r_hat^2) # delta method: SE(r̂) = SE(ẑ)*(1−r̂²)
    
    # two-sided confidence interval
    z_crit <- qnorm(1 - aa/2)     # 1.96 for 95 %
    ci_r[t,]   <- tanh( z + c(-1, 1) * z_crit * se_z )
    
    # # one sided lower-confidence interval
    # z_crit <- qnorm(1-aa)
    # ci_low_r[t] <- tanh(z - z_crit * se_z)
  }
  
  list(
    ci_r  = ci_r,
    pc  = tibble(
      Row  = seq_len(A),
      pc1 = pc_1, 
      pc3 = pc_3, 
      pc5 = pc_5, 
      pc10 = pc_10,
      pc20 = pc_20,
      pc_top5_in_20 = pc_top5_in_20,
      pc_top5_in_25 = pc_top5_in_25
    )
  )
}

# Run multiple simulations across different correlation values
M_stage1 <- 100
A_stage1 <- 2.5e3  # Define here for clarity

# Run simulations for each correlation value
stage1_results <- map_dfr(seq_along(r_true_values), function(r_idx) {
  r_val <- r_true_values[r_idx]
  
  sim_results <- map(1:M_stage1, ~ run_stage1_sim(.x + 60637, K_stage1, A_stage1, r_val))
  
  # Add correlation info to each result
  map_dfr(seq_along(sim_results), ~ {
    result <- sim_results[[.x]]
    list(
      r_gaussian = r_val,
      r_binomial = cor_true_values[r_idx],
      sim = .x,
      ci_r = list(result$ci_r),
      pc = list(result$pc)
    )
  })
})

# Process results for width of CIs and proportion correct analysis

# Extract confidence intervals with correlation info
precision_data <- map_dfr(1:nrow(stage1_results), function(row_idx) {
  result <- stage1_results[row_idx, ]
  ci_matrix <- result$ci_r[[1]]
  
  tibble(
    # r_gaussian = result$r_gaussian,
    r_binomial = result$r_binomial,
    sim = result$sim,
    trial = 1:A_stage1,
    ci_low = ci_matrix[,1],
    ci_hgh = ci_matrix[,2],
    ci_width = ci_matrix[,2] - ci_matrix[,1]
  )
})

# Calculate precision curves for each correlation value
precision_curves <- precision_data |>
  group_by(r_binomial, trial) |>
  summarise(
    width = mean(ci_width, na.rm = TRUE),
    .groups = "drop"
  )|>
  mutate(
    r_binomial_rounded = round(r_binomial, 2),
    correlation_label = factor(paste0("r = ", r_binomial_rounded),
                              levels = paste0("r = ", sort(unique(round(r_binomial, 2)))))
  )

# Extract proportion correct data with correlation info
pc_data <- map_dfr(1:nrow(stage1_results), function(row_idx) {
  result <- stage1_results[row_idx, ]
  pc_df <- result$pc[[1]]
  
  pc_df |> mutate(
    r_gaussian = result$r_gaussian,
    r_binomial = result$r_binomial,
    sim = result$sim
  )
})

# Calculate average proportion correct at each time point for each correlation
pc_curves <- pc_data |>
  group_by(Row) |>
  summarise(
    pc1_mean = mean(pc1, na.rm = TRUE),
    pc3_mean = mean(pc3, na.rm = TRUE),
    pc5_mean = mean(pc5, na.rm = TRUE),
    pc10_mean = mean(pc10, na.rm = TRUE),
    pc20_mean = mean(pc20, na.rm = TRUE),
    pc_top5_in_20_mean = mean(pc_top5_in_20, na.rm = TRUE),
    pc_top5_in_25_mean = mean(pc_top5_in_25, na.rm = TRUE),
    .groups = "drop"
  )


# Plot 1: Precision curves colored by true binomial correlation
p1_stage1_precision <- ggplot(precision_curves, 
                              aes(x = trial, y = width, color = correlation_label)) +
  geom_line(linewidth = 1, na.rm = TRUE) +
  geom_hline(yintercept = 0.1, linetype = "dashed", color = "red", alpha = 0.7) +
  geom_hline(yintercept = 0.05, linetype = "dashed", color = "red", alpha = 0.7) +  
  geom_hline(yintercept = 0.02, linetype = "dashed", color = "red", alpha = 0.7) +
  # Add text labels for the reference lines
  annotate("text", x = A_stage1 * 0.05, y = 0.1, label = "0.1", 
           color = "red", vjust = -0.3, size = 3) +
  annotate("text", x = A_stage1 * 0.05, y = 0.05, label = "0.05", 
           color = "red", vjust = -0.3, size = 3) +
  annotate("text", x = A_stage1 * 0.05, y = 0.02, label = "0.02", 
           color = "red", vjust = -0.3, size = 3) +
  labs(
    title = "Stage 1: Precision of correlation estimates",
    subtitle = glue("M = {M_stage1}, K = {K_stage1}, sd = {sd_stage1}"),
    x = "Number of comparisons",
    y = "95% confidence interval width",
    color = "True binomial\ncorrelation"
  ) +
  coord_cartesian(ylim = c(0, 0.5), xlim = c(0, A_stage1)) +
  # Use regular even breaks for clean gridlines
  scale_y_continuous(breaks = seq(0, 0.5, 0.1), minor_breaks = NULL) +
  scale_color_manual(values = correlation_colors_new) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    plot.subtitle = element_text(size = 10)
  )

print(p1_stage1_precision)

ggsave("../figures/stage1_precision_curves.png", p1_stage1_precision, width = 10, height = 6, dpi = 300)

# Plot 2: Proportion correct curves colored by correlation
pc_long <- pc_curves |>
  dplyr::select(Row, pc20_mean, pc_top5_in_20_mean, pc_top5_in_25_mean) |>
  pivot_longer(
    cols = c(pc20_mean, pc_top5_in_20_mean, pc_top5_in_25_mean),
    names_to = "metric",
    values_to = "proportion_correct",
    names_pattern = "pc(.+)_mean"
  ) |>
  mutate(
    metric = case_when(
      metric == "_top5_in_25" ~ "Best 5 in top 25",
      metric == "_top5_in_20" ~ "Best 5 in top 20", 
      metric == "20" ~ "Top 20"
    ),
    metric = factor(metric, levels = c("Top 20", "Best 5 in top 20", "Best 5 in top 25"))
  )

p2_stage1_pc <- ggplot(pc_long, aes(x = Row, y = proportion_correct, color = metric)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Stage 1: Proportion correct over time",
    subtitle = glue("Uniform allocation, M = {M_stage1}, K = {K_stage1}, sd = {sd_stage1}"),
    x = "Number of comparisons",
    y = "Proportion correct",
    color = "Metric"
  ) +
  coord_cartesian(ylim = c(0, 1), xlim = c(0, A_stage1)) +
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  scale_color_manual(values = ranking_colors[c("Top 20", "Best 5 in top 20", "Best 5 in top 25")]) +
  geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
  geom_hline(yintercept = 0.9, linetype = "dashed", color = "red", alpha = 0.7) +
  annotate("text", x = A_stage1 * 0.05, y = 0.8, label = "80%", 
           color = "red", vjust = -0.3, size = 3) +
  annotate("text", x = A_stage1 * 0.05, y = 0.9, label = "90%", 
           color = "red", vjust = -0.3, size = 3) +
  theme_minimal() +
  theme(
    plot.subtitle = element_text(size = 10)
  )

print(p2_stage1_pc)

ggsave("../figures/stage1_proportion_correct.png", p2_stage1_pc, width = 10, height = 6, dpi = 300)

# Additional Stage 1 analysis with K=50 arms
set.seed(60637 + 500)

# Parameters for K=50 analysis
K_stage1_50 <- 50

# Generate true arm strengths for K=50
true_theta_50 <- rnorm(K_stage1_50, mean = 0, sd = sd_stage1)

# Create win matrix and rankings for K=50
P_win_50 <- function(i, j) plogis(true_theta_50[i] - true_theta_50[j])
win_mat_50 <- matrix(0, nrow = K_stage1_50, ncol = K_stage1_50)
invisible(sapply(1:K_stage1_50, function(i) {
  sapply(1:K_stage1_50, function(j) {
    if (i != j) win_mat_50[i, j] <<- P_win_50(i, j)
  })
}))

true_strength_50 <- rowSums(win_mat_50) / (K_stage1_50 - 1)
rank_borda_50 <- order(true_strength_50, decreasing = TRUE)

# Run simulations (reuse existing function with modified globals)
P_win_temp <- P_win
rank_borda_temp <- rank_borda
P_win <- P_win_50
rank_borda <- rank_borda_50

sim_results_k50 <- map_dfr(1:M_stage1, function(sim_idx) {
  run_stage1_sim(60637 + 500 + sim_idx, K_stage1_50, A_stage1, r_true_values[1])$pc |>
    mutate(sim = sim_idx)
})

# Restore original globals
P_win <- P_win_temp
rank_borda <- rank_borda_temp

# Calculate average performance
pc_curves_k50 <- sim_results_k50 |>
  group_by(Row) |>
  summarise(across(starts_with("pc"), ~ mean(.x, na.rm = TRUE)), .groups = "drop")

# Create plot for K=50
pc_long_k50 <- pc_curves_k50 |>
  dplyr::select(Row, pc20, pc_top5_in_20, pc_top5_in_25) |>
  pivot_longer(
    cols = -Row,
    names_to = "metric",
    values_to = "proportion_correct"
  ) |>
  mutate(
    metric = case_when(
      metric == "pc_top5_in_25" ~ "Best 5 in top 25",
      metric == "pc_top5_in_20" ~ "Best 5 in top 20", 
      metric == "pc20" ~ "Top 20"
    ),
    metric = factor(metric, levels = c("Top 20", "Best 5 in top 20", "Best 5 in top 25"))
  )

p2_stage1_pc_k50 <- ggplot(pc_long_k50, aes(x = Row, y = proportion_correct, color = metric)) +
  geom_line(linewidth = 1) +
  labs(
    title = "Stage 1: Proportion correct over time (K = 50)",
    subtitle = glue("Uniform allocation, M = {M_stage1}, K = {K_stage1_50}, sd = {sd_stage1}"),
    x = "Number of comparisons",
    y = "Proportion correct",
    color = "Metric"
  ) +
  coord_cartesian(ylim = c(0, 1), xlim = c(0, A_stage1)) +
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  scale_color_manual(values = ranking_colors[c("Top 20", "Best 5 in top 20", "Best 5 in top 25")]) +
  geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
  geom_hline(yintercept = 0.9, linetype = "dashed", color = "red", alpha = 0.7) +
  annotate("text", x = A_stage1 * 0.05, y = 0.8, label = "80%", 
           color = "red", vjust = -0.3, size = 3) +
  annotate("text", x = A_stage1 * 0.05, y = 0.9, label = "90%", 
           color = "red", vjust = -0.3, size = 3) +
  theme_minimal() +
  theme(plot.subtitle = element_text(size = 10))

print(p2_stage1_pc_k50)

ggsave("../figures/stage1_proportion_correct_k50.png", p2_stage1_pc_k50, width = 10, height = 6, dpi = 300)

2 Stage: Adaptive allocation

Our goal is to:

Compare performance of different bandit algorithms (DTS, TT, IF2, Top K) vs. uniform on a pruned set of arms.

Best arm identification
Regret

Treatment is assigned adaptively, both measures are ranked

# Stage 2 Parameters (defined once)
set.seed(60637 + 1000)  # Different seed for Stage 2

K_stage2 <- 20          # Focused set of arms (eventually from Stage 1 pruning)
A_stage2 <- 1e3       # Total comparisons 
M_stage2 <- 1e2          # Number of simulation replications
first_batch_stage2 <- 400  # Initial batch size for adaptive algorithms
nbatches_stage2 <- 5    # Batches for adaptive algorithms
sd_stage2 <- 0.5        # Moderate dispersion (harder than Stage 1)

# Algorithm comparison parameters
alpha_stage2 <- 0.5                           # DTS confidence parameter
delta_stage2 <- 1/(A_stage2 * K_stage2^2)     # IF2 confidence parameter  
floor_mult_stage2 <- 0.30                     # Probability floor multiplier
n_mc_stage2 <- 1e3                            # Monte Carlo samples

# Generate Stage 2 true arm strengths
true_theta_stage2 <- rnorm(K_stage2, mean = 0, sd = sd_stage2)

# Stage 2 win probabilities  
P_win_stage2 <- function(i, j) plogis(true_theta_stage2[i] - true_theta_stage2[j])

# Generate win matrix for Stage 2
win_mat_stage2 <- matrix(0, nrow = K_stage2, ncol = K_stage2)
for(i in 1:K_stage2) {
  for(j in 1:K_stage2) {
    if (i != j) {
      win_mat_stage2[i, j] <- P_win_stage2(i, j)
    }
  }
}

batch_sizes <- c(first_batch_stage2, rep((A_stage2-first_batch_stage2)/(nbatches_stage2-1), nbatches_stage2-1))

# True rankings for Stage 2
true_strength_stage2 <- rowSums(win_mat_stage2) / (K_stage2 - 1)
rank_borda_stage2 <- order(true_strength_stage2, decreasing = TRUE)

# Define algorithms and their settings for Stage 2
algorithms_stage2 <- list(
  dts = list(name = "dts", batch_sizes = batch_sizes),
  # tt = list(name = "tt", batch_sizes = batch_sizes),
  topk5 = list(name = "topk5", batch_sizes = batch_sizes),
  topk10 = list(name = "topk10", batch_sizes = batch_sizes),
  # weighted_topk5 = list(name = "weighted_topk5", batch_sizes = batch_sizes),
  # weighted_topk10 = list(name = "weighted_topk10", batch_sizes = batch_sizes),
  # if2 = list(name = "if2", batch_sizes = batch_sizes),
  uniform = list(name = "uniform", batch_sizes = A_stage2)  # Single batch = uniform allocation
)

set.seed(60637)

hist(win_mat_stage2[upper.tri(win_mat_stage2)], breaks = 20, 
     main = glue("Distribution of win probabilities,\nStage 2,\nsd = {sd_stage2}"),
     xlab = "Win probability", ylab = "Frequency")

# Start timing for Stage 2 analysis
stage2_start_time <- Sys.time()

# Set remaining global parameters for Stage 2
P_win <- P_win_stage2
true_theta <- true_theta_stage2
win_mat <- win_mat_stage2
true_strength <- true_strength_stage2
rank_borda <- rank_borda_stage2

# Memory-efficient sequential processing of algorithms
base_seed <- 60637

# Initialize storage for final results
all_pc_stage2 <- tibble()
all_rank_stage2 <- tibble()
all_cum_avg_stage2 <- tibble()
strength_avg_stage2 <- tibble()

# Algorithm name mapping
alg_name_map <- c(
  "dts" = "Double Thompson sampling",
  "tt" = "Top-two Thompson sampling", 
  "topk5" = "Top-K Thompson sampling (k=5)",
  "topk10" = "Top-K Thompson sampling (k=10)",
  "weighted_topk5" = "Weighted Top-K Thompson sampling (k=5)",
  "weighted_topk10" = "Weighted Top-K Thompson sampling (k=10)",
  "if2" = "Interleaved Filter2",
  "uniform" = "Uniform"
)

# Process each algorithm sequentially to avoid memory overload
for(alg_idx in seq_along(algorithms_stage2)) {
  alg <- algorithms_stage2[[alg_idx]]
  alg_name <- names(algorithms_stage2)[alg_idx]
  
  # Run simulations for this algorithm only
  sim_results_alg <- map(1:M_stage2, ~ {
    run_single_sim(base_seed + .x, alg$name, alg$batch_sizes, K_stage2, A_stage2, alpha_stage2, delta_stage2, floor_mult_stage2, n_mc_stage2)
  })
  
  # Immediately process into the summaries needed for plots
  
  # 1. Process PC data
  pc_data_alg <- bind_rows(map(seq_along(sim_results_alg), ~ {
    sim_results_alg[[.x]]$pc |> mutate(sim = .x)
  })) |>
    mutate(algorithm = alg_name_map[alg_name])
  all_pc_stage2 <- bind_rows(all_pc_stage2, pc_data_alg)
  
  # 2. Process rank data
  rank_data_alg <- map_dfr(seq_along(sim_results_alg), function(idx) {
    mat <- sim_results_alg[[idx]]$rank_order
    colnames(mat) <- paste0("V", seq_len(ncol(mat)))
    
    as_tibble(mat, .name_repair = "minimal") |> 
      mutate(Trial = row_number()) |> 
      pivot_longer(
        cols = -Trial,
        names_to = "RankPos",
        values_to = "Arm"
      ) |> mutate(
        RankPos = as.integer(sub("^V", "", RankPos)),
        sim = idx
      )
  }) |>
    mutate(algorithm = alg_name)
  all_rank_stage2 <- bind_rows(all_rank_stage2, rank_data_alg)
  
  # 3. Process cumulative data (averaged)
  combined_cum <- map_dfr(seq_along(sim_results_alg), function(sim_idx) {
    sim_results_alg[[sim_idx]]$cum |>
      mutate(sim = sim_idx)
  })
  
  count_col <- if("Cumulative_Count" %in% names(combined_cum)) "Cumulative_Count" else "Cumulative"
  
  cum_avg_alg <- combined_cum |>
    group_by(Trial, Arm) |>
    summarise(Cumulative = mean(.data[[count_col]]), .groups = "drop") |>
    mutate(
      algorithm = alg_name_map[alg_name],
      color = case_when(
        Arm %in% rank_borda_stage2[1] ~ "Top 1",
        Arm %in% rank_borda_stage2[2:3] ~ "Top 3",
        Arm %in% rank_borda_stage2[4:5] ~ "Top 5",
        TRUE ~ "Other"
      )
    )
  all_cum_avg_stage2 <- bind_rows(all_cum_avg_stage2, cum_avg_alg)
  
  # 4. Process strength/regret data
  trial_strengths <- map_dfr(1:A_stage2, function(trial) {
    all_rankings <- map(sim_results_alg, ~ .x$rank_order[trial, ])
    
    tibble(
      Trial = trial,
      strength1 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:1]]))),
      strength3 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:3]]))),
      strength5 = mean(map_dbl(all_rankings, ~ mean(true_strength_stage2[.x[1:5]])))
    )
  }) |>
    mutate(algorithm = alg_name_map[alg_name])
  
  strength_avg_alg <- trial_strengths |>
    pivot_longer(starts_with("strength"), names_prefix = "strength", names_to = "TopK") |>
    mutate(
      TopK_label = paste0("Top ", TopK),
      Regret = case_when(
        TopK == "1" ~ mean(true_strength_stage2[rank_borda_stage2[1:1]]) - value,
        TopK == "3" ~ mean(true_strength_stage2[rank_borda_stage2[1:3]]) - value,
        TopK == "5" ~ mean(true_strength_stage2[rank_borda_stage2[1:5]]) - value,
        TRUE ~ NA_real_
      )
    )
  strength_avg_stage2 <- bind_rows(strength_avg_stage2, strength_avg_alg)
  
  # Clear the raw results to free memory
  rm(sim_results_alg, pc_data_alg, rank_data_alg, combined_cum, cum_avg_alg, trial_strengths, strength_avg_alg)
  gc()  # Force garbage collection
  
}

# load data for interactive sessions (updated for lazy load)
# cache_files <- list.files("dueling_bandits_iterated_cache/html",
#                          pattern = "^stage2_run.*\\.RData$",
#                          full.names = TRUE)
# if (length(cache_files) > 0) {
#   load(cache_files[1])  # Load the first matching file
# } else {
#   warning("No stage2_run cache file found")
# }

# Calculate average performance across algorithms  
all_pc_long_avg_stage2 <-
  all_pc_stage2 |>
  pivot_longer(starts_with("pc"),
               names_prefix = "pc",
               names_to   = "Top",
               values_to  = "Value") |>
  group_by(algorithm, Row, Top) |>
  summarise(Value = mean(Value), .groups = "drop") |>
  mutate(Top = factor(Top, levels = c("1", "3", "5", "10", "20")))

# Stage 2 Algorithm Comparison Plots
# Data has already been processed sequentially in the previous chunk

## Algorithm comparison across top-k performance
p3_stage2_algorithms <- ggplot(all_pc_long_avg_stage2 |> filter(Top %in% c("1", "3", "5")),
                               aes(x = Row, y = Value, colour = algorithm, alpha = algorithm, linetype = algorithm)) +
  geom_line(linewidth = 0.5) +
  scale_alpha_manual(values = c("Uniform" = 1.0, "Double Thompson sampling" = 0.7, "Top-two Thompson sampling" = 0.7, "Top-K Thompson sampling (k=5)" = 0.7, "Top-K Thompson sampling (k=10)" = 0.7, "Weighted Top-K Thompson sampling (k=5)" = 0.7, "Weighted Top-K Thompson sampling (k=10)" = 0.7, "Interleaved Filter2" = 0.7)) +
  scale_linetype_manual(values = c("Uniform" = "dashed", "Double Thompson sampling" = "solid", "Top-two Thompson sampling" = "solid", "Top-K Thompson sampling (k=5)" = "solid", "Top-K Thompson sampling (k=10)" = "solid", "Weighted Top-K Thompson sampling (k=5)" = "solid", "Weighted Top-K Thompson sampling (k=10)" = "solid", "Interleaved Filter2" = "solid")) +
  facet_wrap(~ Top, scales = "free_y", 
             labeller = labeller(.cols = function(x) paste0("Top ", x))) +
  labs(title = "Stage 2: Best arm identification",
       subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
       x = "Number of comparisons",
       y = "Proportion correct",
       colour = "Algorithm") + 
  coord_cartesian(ylim = c(0, 1)) + 
  scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
  theme_minimal() + 
  scale_color_manual(values = algorithm_colors) + 
  geom_hline(yintercept = 0.8, linetype = "dashed", color = "red", alpha = 0.7) +
  guides(alpha = "none", linetype = "none") +
  theme(
    plot.subtitle = element_text(size = 10),
    strip.text = element_text(size = 12)
  )

options(repr.plot.width = 16, repr.plot.height = 10)
print(p3_stage2_algorithms)

ggsave("../figures/stage2_algorithm_comparison.png", p3_stage2_algorithms, width = 12, height = 8, dpi = 300)

## Cumulative arm appearances faceted by algorithm
# all_cum_avg_stage2 computed above in optimized section

p4_stage2_cumulative <- ggplot(all_cum_avg_stage2,
                               aes(x = Trial, y = Cumulative, group = Arm, colour = factor(color))) +
  geom_line(alpha = 0.75) +
  facet_wrap(~ algorithm) +
  labs(title = "Stage 2: Average cumulative arm appearances",
       subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
       x = "Number of comparisons",
       y = "Mean cumulative count",
       colour = "Arms") +
  theme_minimal() + 
  scale_color_manual(values = ranking_colors[c("Top 1", "Top 3", "Top 5", "Other")]) +
  theme(
    plot.subtitle = element_text(size = 10),
    strip.text = element_text(size = 12)
  )

options(repr.plot.width = 16, repr.plot.height = 10)
print(p4_stage2_cumulative)

ggsave("../figures/stage2_cumulative_appearances.png", p4_stage2_cumulative, width = 12, height = 8, dpi = 300)

# Duplicate computation removed - using working version above

p5_stage2_regret <- ggplot(strength_avg_stage2,
                           aes(Trial, Regret, colour = algorithm, alpha = algorithm, linetype = algorithm)) +
  geom_line(linewidth = 0.5) +
  scale_alpha_manual(values = c("Uniform" = 1.0, "Double Thompson sampling" = 0.7, "Top-two Thompson sampling" = 0.7, "Top-K Thompson sampling (k=5)" = 0.7, "Top-K Thompson sampling (k=10)" = 0.7, "Weighted Top-K Thompson sampling (k=5)" = 0.7, "Weighted Top-K Thompson sampling (k=10)" = 0.7, "Interleaved Filter2" = 0.7)) +
  scale_linetype_manual(values = c("Uniform" = "dashed", "Double Thompson sampling" = "solid", "Top-two Thompson sampling" = "solid", "Top-K Thompson sampling (k=5)" = "solid", "Top-K Thompson sampling (k=10)" = "solid", "Weighted Top-K Thompson sampling (k=5)" = "solid", "Weighted Top-K Thompson sampling (k=10)" = "solid", "Interleaved Filter2" = "solid")) +
  facet_wrap(~ TopK_label) +
  labs(title = "Stage 2: Average regret",
       subtitle = glue("M = {M_stage2}, K = {K_stage2}, sd = {sd_stage2}"),
       x = "Number of comparisons",
       y = "Average regret",
       colour = "Arms") +
  theme_minimal() + 
  scale_color_manual(values = algorithm_colors) + 
  guides(alpha = "none", linetype = "none") +
  theme(
    plot.subtitle = element_text(size = 10),
    strip.text = element_text(size = 12)
  )

options(repr.plot.width = 16, repr.plot.height = 10)
print(p5_stage2_regret)

ggsave("../figures/stage2_regret.png", p5_stage2_regret, width = 12, height = 8, dpi = 300)

Dueling bandits simulation

2025-07-21

0.1 Mathematical setup

0.2 Simulation data generating process

1 Stage: Assess measures

2 Stage: Adaptive allocation