WLS Bin Hyperparameter Tuning

Author

Gabriel Krotkov

This markdown file will implement hyperparameter tuning to optimize the weights estimated in this paper. First I will tune over the number of bins, fix that number, and then explore various stepwise optimization strategies to find a best weighting.

Timing note: running this code took my machine ~ 25 minutes.

Setup

Code
library(devtools)
load_all()
rm(list = ls())
load("../../data/district_quals_09_24.rda")

# normalize match scores so we can do cross-week and cross-season comparisons
# this simplifies a lot of later code
matches <- lapply(
    matches, 
    function(match_df){
        scaled <- scale(c(match_df$red_score, match_df$blue_score))
        match_df$red_score <- scaled[1:nrow(match_df)]
        match_df$blue_score <- scaled[(nrow(match_df) + 1):length(scaled)]
        return(match_df)
    })

library(extrafont)
library(gt)
gos_blue <- "#337DFC"
gos_red <- "#F7041A"

gos_theme <- theme_bw() + 
    theme(text = element_text(family = "Futura"))

# fits WLS on the match dataframes
weighted_fit <- function(match_df, w){
    return(
        fit_lineup_lm(
            match_df, 
            responses = list(
                red = match_df$red_score, blue = match_df$blue_score
            ), 
            w = w
        )
    )
}

# fits the standard opr model on a regular match dataframe from TheBlueAlliance
fit_opr <- function(match_df){
    return(
        fit_lineup_lm(
            match_df, responses = list(
                red = match_df$red_score, blue = match_df$blue_score)
        )
    )
}

normalize_match_number <- function(opr_residual){
    # we know we can divide the length by 2 because there are 2 alliances/match
    alliance_corrected <- ((1:length(opr_residual) - 1) %% 
                               (length(opr_residual) / 2)) + 1
    return((alliance_corrected / length(opr_residual)) * 2)
}

fits <- lapply(matches, fit_opr)

residuals <- lapply(fits, function(fit){return(residuals(fit))})
# match percentile is the match number divided by the number of matches
match_percentile <- unlist(lapply(residuals, normalize_match_number), 
                           use.names = FALSE)

Bin Number Optimization

The key numbers to cover for the optimization were 2 through 12. 2 bins represents splitting the tournament in half, the fewest divisions we can make, and 12 represents treating each “round” of matches differently. Because teams will not fit precisely into evenly divided rounds, we cannot guarantee with 12 bins that each team gets 1 match in each bin, but on average it represents that division.

Code
# assumes that residuals have been standardized
generate_weights <- function(n_bins, residuals, match_percentile){
    cuts <- cut(match_percentile, breaks = n_bins)
    bin_vars <- rep(0, length(unique(cuts)))
    for (i in 1:length(unique(cuts))){
        bin_vars[i] <- var(unlist(residuals)[cuts == levels(cuts)[i]])
    }
    return(1 / bin_vars)
}

mean_loocvs <- c() # define an empty vector
n_bins <- 2:15 # define n_bins
for (k in n_bins){
    bin_weight_fits <- lapply(
        matches, weighted_fit, 
        w = generate_weights(k, residuals, match_percentile)
    )
    mean_loocvs <- c(mean_loocvs, mean(sapply(bin_weight_fits, LOOCV)))
}
names(mean_loocvs) <- paste0("Bins = ", n_bins)

viz <- data.frame(bins = n_bins, mean_loocvs = mean_loocvs)

ggplot(viz, aes(x = n_bins, y = mean_loocvs)) + 
    geom_point(col = gos_red) + 
    labs(title = "Hyperparameter Tuning for Bins", 
         x = "Bin #", y = "LOOCV Mean") + 
    scale_x_continuous(breaks = n_bins) + 
    gos_theme

Code
loocv_min_bins <- mean_loocvs[which.min(mean_loocvs)]

cat("The minimum occurs at", names(which.min(mean_loocvs)), 
    "with LOOCV:", loocv_min_bins, "\n")
The minimum occurs at Bins = 12 with LOOCV: 0.6689429 

Takeaway: Really any number between 6 and 12 will be fine, but 12 does technically come out on top.

Stepwise Grid Optimization

To optimize over 12 bins, we could skip interaction effects and optimize in a stepwise fashion. This will give up some precision, but \(O(n^{12})\) is too much to optimize over exhaustively.

Single bin

The strategy only considers weightings that are identical to the originally estimated weights, except for a change in one bin.

Code
load("../../data/opr_weights.rda")

step_size <- 0.1
steps_radius <- 10

# store the CV errors
mean_loocvs <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

# store the weights in an indexable way
w_lookup <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

for (i in seq_along(w_bin_resid_vars)){ # i indexes the bins
    grid <- seq(
        w_bin_resid_vars[i] - (step_size * steps_radius), 
        w_bin_resid_vars[i] + (step_size * steps_radius), by = step_size
    )
    # store the grid in our lookup matrix
    w_lookup[i, ] <- grid
    # reset w_star to the estimated weighting
    w_star <- w_bin_resid_vars
    # optimization within a single bin
    for (j in seq_along(grid)){ # j indexes the grid
        w_star[i] <- grid[j]
        fits_star <- lapply(matches, weighted_fit, w = w_star)
        mean_loocvs[i, j] <- mean(sapply(fits_star, LOOCV))
    }
}

loocv_min_onebin <- min(mean_loocvs)

min_idx <- which(mean_loocvs == min(mean_loocvs), arr.ind = TRUE)

w_onebin <- w_bin_resid_vars
w_onebin[min_idx[1, 1]] <- w_lookup[min_idx[1, 1], min_idx[1, 2]]

cat("The minimum while changing only one bin occurs at",
    min_idx, "\n",
    "with LOOCV:", loocv_min_onebin, "\n", 
    "and weighting:", w_onebin, "\n")
The minimum while changing only one bin occurs at 1 4 
 with LOOCV: 0.6680777 
 and weighting: 1.900177 2.760391 2.842327 2.833273 2.843733 2.898895 2.871657 2.851502 2.823041 2.791074 2.76293 2.729227 

Stepwise Simultaneous

This version optimizes each bin in the context of the weights estimated by residual variance binning, then adopts the optimal weight for each individual bin as the final weighting.

Code
min_idx <- cbind(row = 1:12, col = apply(mean_loocvs, 1, which.min))

w_stepwise <- w_lookup[cbind(min_idx[, 1], min_idx[, 2])]

fits_stepwise <- lapply(matches, weighted_fit, w = w_stepwise)

loocv_stepwise <- mean(sapply(fits_stepwise, LOOCV))

cat("The minimum using the optimum from each bin occurs with weighting:", "\n", 
    w_stepwise, "\n", 
    "with LOOCV:", loocv_stepwise, "\n")
The minimum using the optimum from each bin occurs with weighting: 
 1.900177 2.560391 2.942327 3.133273 3.243733 3.198895 3.171657 3.151502 2.923041 2.791074 2.66293 2.429227 
 with LOOCV: 0.6674667 
Code
cat("Stepwise Optimization over all bins improves on the estimated binning by:", 
    loocv_min_bins - loocv_stepwise, "standard deviations.", "\n", 
    "Stepwise Optimization over a single bin improves on the estimated binning by:", 
    loocv_min_bins - loocv_min_onebin, "standard deviations.", "\n")
Stepwise Optimization over all bins improves on the estimated binning by: 0.001476205 standard deviations. 
 Stepwise Optimization over a single bin improves on the estimated binning by: 0.0008652703 standard deviations. 

Sequential Weight Fixing

This version optimizes in a single bin and then fixes that optimized value before evaluating the rest of the weights.

Code
step_size <- 0.1
steps_radius <- 10

mean_loocvs <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

w_star <- w_bin_resid_vars
for (i in seq_along(w_bin_resid_vars)){
    grid <- seq(
        w_bin_resid_vars[i] - (step_size * steps_radius), 
        w_bin_resid_vars[i] + (step_size * steps_radius), by = step_size
    )
    # optimization within a single bin
    for (j in seq_along(grid)){
        w_star[i] <- grid[j]
        fits_star <- lapply(matches, weighted_fit, w = w_star)
        mean_loocvs[i, j] <- mean(sapply(fits_star, LOOCV))
    }
    # now fix the optimal value for bin i for the rest of the optimization
    w_star[i] <- grid[which.min(mean_loocvs[, i])]
}

w_sequential <- w_star

fits_star <- lapply(matches, weighted_fit, w_sequential)

cat("The minimum using fixing occurs at:\n", 
    w_sequential, "\n",
    "with LOOCV:", mean(sapply(fits_star, LOOCV)), "\n")
The minimum using fixing occurs at:
 1.700177 1.960391 2.142327 2.233273 2.343733 2.498895 2.571657 2.651502 2.723041 2.791074 2.86293 1.829227 
 with LOOCV: 0.6696457 

Results

Stepwise simultaneous weight optimization performed better than fixing and single bin optimization, so we’ll save that weighting.

Code
save(w_stepwise, file = "../../data/optimized_opr_weighting.rda")

Given the new weighting, let’s show the improvement over unweighted OPR.

Code
rm(list = ls())
library(devtools)
load_all()
rm(list = ls())
load("../../data/district_quals_09_24.rda")
load("../../data/optimized_opr_weighting.rda")

# normalize match scores so we can do cross-week and cross-season comparisons
# this simplifies a lot of later code
matches <- lapply(
    matches, 
    function(match_df){
        scaled <- scale(c(match_df$red_score, match_df$blue_score))
        match_df$red_score <- scaled[1:nrow(match_df)]
        match_df$blue_score <- scaled[(nrow(match_df) + 1):length(scaled)]
        return(match_df)
    })

library(extrafont)
library(gt)
gos_blue <- "#337DFC"
gos_red <- "#F7041A"

gos_theme <- theme_bw() + 
    theme(text = element_text(family = "Futura"))

# fits WLS on the match dataframes
weighted_fit <- function(match_df, w){
    return(
        fit_lineup_lm(
            match_df, 
            responses = list(
                red = match_df$red_score, blue = match_df$blue_score
            ), 
            w = w
        )
    )
}

# fits the standard opr model on a regular match dataframe from TheBlueAlliance
fit_opr <- function(match_df){
    return(
        fit_lineup_lm(
            match_df, responses = list(
                red = match_df$red_score, blue = match_df$blue_score)
        )
    )
}

normalize_match_number <- function(opr_residual){
    # we know we can divide the length by 2 because there are 2 alliances/match
    alliance_corrected <- ((1:length(opr_residual) - 1) %% 
                               (length(opr_residual) / 2)) + 1
    return((alliance_corrected / length(opr_residual)) * 2)
}

fits <- lapply(matches, fit_opr)

residuals <- lapply(fits, function(fit){return(residuals(fit))})
# match percentile is the match number divided by the number of matches
match_percentile <- unlist(lapply(residuals, normalize_match_number), 
                           use.names = FALSE)
Code
linear_weight_fits <- lapply(matches, weighted_fit, w = w_linear)
bin_weight_fits <- lapply(matches, weighted_fit, w = w_bin_resid_vars)
stepwise_fits <- lapply(matches, weighted_fit, w = w_stepwise)
raw_fits <- lapply(matches, weighted_fit, w = 1)

linear_weight_loocvs <- sapply(linear_weight_fits, LOOCV)
resid_weight_loocvs <- sapply(bin_weight_fits, LOOCV)
stepwise_loocvs <- sapply(stepwise_fits, LOOCV)
raw_loocvs <- sapply(raw_fits, LOOCV)

viz <- data.frame(linear_ratio = raw_loocvs / linear_weight_loocvs, 
                  residvar_ratio = raw_loocvs / resid_weight_loocvs,
                  stepwise_ratio = raw_loocvs / stepwise_loocvs, 
                  year = as.factor(substr(names(raw_loocvs), 1, 4)))

ggplot(viz, aes(x = stepwise_ratio)) + 
    geom_histogram(fill = gos_blue, color = "black", 
                   bins = ceiling(sqrt(nrow(viz)))) + 
    geom_vline(aes(xintercept = mean(stepwise_ratio)), color = gos_red) + 
    annotate("label", x = mean(viz$stepwise_ratio), y = 15, 
             label = paste0("Mean: ", 
                            round(mean(viz$stepwise_ratio), 3))) +
    labs(title = "OPR Weighting LOOCV Ratio", 
         subtitle = "District Event Qualifications, 2009 - 2024", 
         x = "Event-Normalized LOOCV Error Ratio (raw:stepwise-optimized)", 
         y = "# Events") +
    gos_theme

Code
ggplot(viz, aes(x = stepwise_ratio)) + 
    geom_density(alpha = 0.2, fill = gos_blue) + 
    facet_wrap(~year) +
    labs(title = "OPR Weighting LOOCV Ratio", x = "LOOCV Ratio (raw:stepwise-optimized", 
         y = "Density") + 
    gos_theme

Code
viz <- viz %>%
    group_by(year) %>%
    summarize(events = n(), 
              stepwise_ratio = mean(stepwise_ratio), 
              linear_ratio = mean(linear_ratio), 
              residvar_ratio = mean(residvar_ratio))

colnames(viz) <- c(
    "Year", "# Events", "Stepwise", "Linear", "Binned"
)

viz$Year <- as.numeric(as.character(viz$Year))
viz <- round(viz, digits = 4)

gt(viz) %>%
    tab_header(
        title = "Mean LOOCV Ratios", 
        subtitle = "unweighted:weighted"
    ) %>%
    tab_options(
        column_labels.background.color = gos_blue
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Linear", 
                               rows = which((viz$Linear > viz$Binned) & 
                                                viz$Linear > viz$Stepwise))
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Binned", 
                               rows = which((viz$Binned > viz$Linear) & 
                                                (viz$Binned > viz$Stepwise)))
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Stepwise", 
                               rows = which((viz$Stepwise >= viz$Binned) &
                                                (viz$Stepwise >= viz$Linear)))
    ) %>%
    tab_style(
        cell_borders(color = gos_blue), 
        cells_body()
    ) %>%
    opt_table_font(
        font = "Futura"
    ) %>%
    tab_footnote(
        footnote = "Higher values indicate the weighting is performing better", 
        locations = cells_title("subtitle")
    )
Mean LOOCV Ratios
unweighted:weighted1
Year # Events Stepwise Linear Binned
2009 7 0.9989 1.0005 1.0007
2010 8 0.9932 0.9987 0.9993
2011 10 1.0013 1.0001 1.0009
2012 17 1.0009 1.0002 1.0008
2013 19 1.0058 1.0011 1.0019
2014 43 0.9985 1.0003 1.0003
2015 50 1.0010 1.0004 1.0008
2016 72 1.0091 1.0021 1.0025
2017 81 1.0053 1.0016 1.0018
2018 94 0.9988 1.0004 1.0004
2019 109 1.0085 1.0021 1.0024
2022 90 1.0061 1.0016 1.0020
2023 109 1.0019 1.0008 1.0010
2024 112 1.0025 1.0009 1.0011
1 Higher values indicate the weighting is performing better
Code
viz$mean_pct_improvement <- (viz$`Stepwise` - 1)

ggplot(viz, aes(x = as.character(Year), y = mean_pct_improvement)) + 
    geom_point(aes(size = `# Events`), col = gos_red) + 
    geom_hline(linetype = "dotted", linewidth = 1.25, col = gos_blue, 
               yintercept = mean(viz$mean_pct_improvement)) + 
    geom_hline(yintercept = 0) +
    scale_y_continuous(labels = scales::percent) +
    labs(title = "Weighting Percent Improvement by Year", 
         subtitle = "By Stepwise Bin Optimization", 
         x = "Year", y = "Mean LOOCV Percent Improvement") + 
    gos_theme