WLS Bin Hyperparameter Tuning

Author

Gabriel Krotkov

This markdown file will implement hyperparameter tuning to optimize the weights estimated in this paper. First I will tune over the number of bins, fix that number, and then explore various stepwise optimization strategies to find a best weighting.

Timing note: running this code took my machine ~ 25 minutes.

Setup

Code

library(devtools)
load_all()
rm(list = ls())
load("../../data/district_quals_09_24.rda")

# normalize match scores so we can do cross-week and cross-season comparisons
# this simplifies a lot of later code
matches <- lapply(
    matches, 
    function(match_df){
        scaled <- scale(c(match_df$red_score, match_df$blue_score))
        match_df$red_score <- scaled[1:nrow(match_df)]
        match_df$blue_score <- scaled[(nrow(match_df) + 1):length(scaled)]
        return(match_df)
    })

library(extrafont)
library(gt)
gos_blue <- "#337DFC"
gos_red <- "#F7041A"

gos_theme <- theme_bw() + 
    theme(text = element_text(family = "Futura"))

# fits WLS on the match dataframes
weighted_fit <- function(match_df, w){
    return(
        fit_lineup_lm(
            match_df, 
            responses = list(
                red = match_df$red_score, blue = match_df$blue_score
            ), 
            w = w
        )
    )
}

# fits the standard opr model on a regular match dataframe from TheBlueAlliance
fit_opr <- function(match_df){
    return(
        fit_lineup_lm(
            match_df, responses = list(
                red = match_df$red_score, blue = match_df$blue_score)
        )
    )
}

normalize_match_number <- function(opr_residual){
    # we know we can divide the length by 2 because there are 2 alliances/match
    alliance_corrected <- ((1:length(opr_residual) - 1) %% 
                               (length(opr_residual) / 2)) + 1
    return((alliance_corrected / length(opr_residual)) * 2)
}

fits <- lapply(matches, fit_opr)

residuals <- lapply(fits, function(fit){return(residuals(fit))})
# match percentile is the match number divided by the number of matches
match_percentile <- unlist(lapply(residuals, normalize_match_number), 
                           use.names = FALSE)

Bin Number Optimization

The key numbers to cover for the optimization were 2 through 12. 2 bins represents splitting the tournament in half, the fewest divisions we can make, and 12 represents treating each “round” of matches differently. Because teams will not fit precisely into evenly divided rounds, we cannot guarantee with 12 bins that each team gets 1 match in each bin, but on average it represents that division.

Code

# assumes that residuals have been standardized
generate_weights <- function(n_bins, residuals, match_percentile){
    cuts <- cut(match_percentile, breaks = n_bins)
    bin_vars <- rep(0, length(unique(cuts)))
    for (i in 1:length(unique(cuts))){
        bin_vars[i] <- var(unlist(residuals)[cuts == levels(cuts)[i]])
    }
    return(1 / bin_vars)
}

mean_loocvs <- c() # define an empty vector
n_bins <- 2:15 # define n_bins
for (k in n_bins){
    bin_weight_fits <- lapply(
        matches, weighted_fit, 
        w = generate_weights(k, residuals, match_percentile)
    )
    mean_loocvs <- c(mean_loocvs, mean(sapply(bin_weight_fits, LOOCV)))
}
names(mean_loocvs) <- paste0("Bins = ", n_bins)

viz <- data.frame(bins = n_bins, mean_loocvs = mean_loocvs)

ggplot(viz, aes(x = n_bins, y = mean_loocvs)) + 
    geom_point(col = gos_red) + 
    labs(title = "Hyperparameter Tuning for Bins", 
         x = "Bin #", y = "LOOCV Mean") + 
    scale_x_continuous(breaks = n_bins) + 
    gos_theme

Code

loocv_min_bins <- mean_loocvs[which.min(mean_loocvs)]

cat("The minimum occurs at", names(which.min(mean_loocvs)), 
    "with LOOCV:", loocv_min_bins, "\n")

The minimum occurs at Bins = 12 with LOOCV: 0.6689429

Takeaway: Really any number between 6 and 12 will be fine, but 12 does technically come out on top.

Stepwise Grid Optimization

To optimize over 12 bins, we could skip interaction effects and optimize in a stepwise fashion. This will give up some precision, but \(O(n^{12})\) is too much to optimize over exhaustively.

Single bin

The strategy only considers weightings that are identical to the originally estimated weights, except for a change in one bin.

Code

load("../../data/opr_weights.rda")

step_size <- 0.1
steps_radius <- 10

# store the CV errors
mean_loocvs <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

# store the weights in an indexable way
w_lookup <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

for (i in seq_along(w_bin_resid_vars)){ # i indexes the bins
    grid <- seq(
        w_bin_resid_vars[i] - (step_size * steps_radius), 
        w_bin_resid_vars[i] + (step_size * steps_radius), by = step_size
    )
    # store the grid in our lookup matrix
    w_lookup[i, ] <- grid
    # reset w_star to the estimated weighting
    w_star <- w_bin_resid_vars
    # optimization within a single bin
    for (j in seq_along(grid)){ # j indexes the grid
        w_star[i] <- grid[j]
        fits_star <- lapply(matches, weighted_fit, w = w_star)
        mean_loocvs[i, j] <- mean(sapply(fits_star, LOOCV))
    }
}

loocv_min_onebin <- min(mean_loocvs)

min_idx <- which(mean_loocvs == min(mean_loocvs), arr.ind = TRUE)

w_onebin <- w_bin_resid_vars
w_onebin[min_idx[1, 1]] <- w_lookup[min_idx[1, 1], min_idx[1, 2]]

cat("The minimum while changing only one bin occurs at",
    min_idx, "\n",
    "with LOOCV:", loocv_min_onebin, "\n", 
    "and weighting:", w_onebin, "\n")

The minimum while changing only one bin occurs at 1 4 
 with LOOCV: 0.6680777 
 and weighting: 1.900177 2.760391 2.842327 2.833273 2.843733 2.898895 2.871657 2.851502 2.823041 2.791074 2.76293 2.729227

Stepwise Simultaneous

This version optimizes each bin in the context of the weights estimated by residual variance binning, then adopts the optimal weight for each individual bin as the final weighting.

Code

min_idx <- cbind(row = 1:12, col = apply(mean_loocvs, 1, which.min))

w_stepwise <- w_lookup[cbind(min_idx[, 1], min_idx[, 2])]

fits_stepwise <- lapply(matches, weighted_fit, w = w_stepwise)

loocv_stepwise <- mean(sapply(fits_stepwise, LOOCV))

cat("The minimum using the optimum from each bin occurs with weighting:", "\n", 
    w_stepwise, "\n", 
    "with LOOCV:", loocv_stepwise, "\n")

The minimum using the optimum from each bin occurs with weighting: 
 1.900177 2.560391 2.942327 3.133273 3.243733 3.198895 3.171657 3.151502 2.923041 2.791074 2.66293 2.429227 
 with LOOCV: 0.6674667

Code

cat("Stepwise Optimization over all bins improves on the estimated binning by:", 
    loocv_min_bins - loocv_stepwise, "standard deviations.", "\n", 
    "Stepwise Optimization over a single bin improves on the estimated binning by:", 
    loocv_min_bins - loocv_min_onebin, "standard deviations.", "\n")

Stepwise Optimization over all bins improves on the estimated binning by: 0.001476205 standard deviations. 
 Stepwise Optimization over a single bin improves on the estimated binning by: 0.0008652703 standard deviations.

Sequential Weight Fixing

This version optimizes in a single bin and then fixes that optimized value before evaluating the rest of the weights.

Code

step_size <- 0.1
steps_radius <- 10

mean_loocvs <- matrix(
    0, nrow = length(w_bin_resid_vars), ncol = steps_radius * 2 + 1
)

w_star <- w_bin_resid_vars
for (i in seq_along(w_bin_resid_vars)){
    grid <- seq(
        w_bin_resid_vars[i] - (step_size * steps_radius), 
        w_bin_resid_vars[i] + (step_size * steps_radius), by = step_size
    )
    # optimization within a single bin
    for (j in seq_along(grid)){
        w_star[i] <- grid[j]
        fits_star <- lapply(matches, weighted_fit, w = w_star)
        mean_loocvs[i, j] <- mean(sapply(fits_star, LOOCV))
    }
    # now fix the optimal value for bin i for the rest of the optimization
    w_star[i] <- grid[which.min(mean_loocvs[, i])]
}

w_sequential <- w_star

fits_star <- lapply(matches, weighted_fit, w_sequential)

cat("The minimum using fixing occurs at:\n", 
    w_sequential, "\n",
    "with LOOCV:", mean(sapply(fits_star, LOOCV)), "\n")

The minimum using fixing occurs at:
 1.700177 1.960391 2.142327 2.233273 2.343733 2.498895 2.571657 2.651502 2.723041 2.791074 2.86293 1.829227 
 with LOOCV: 0.6696457

Results

Stepwise simultaneous weight optimization performed better than fixing and single bin optimization, so we’ll save that weighting.

Code

save(w_stepwise, file = "../../data/optimized_opr_weighting.rda")

Given the new weighting, let’s show the improvement over unweighted OPR.

Code

rm(list = ls())
library(devtools)
load_all()
rm(list = ls())
load("../../data/district_quals_09_24.rda")
load("../../data/optimized_opr_weighting.rda")

# normalize match scores so we can do cross-week and cross-season comparisons
# this simplifies a lot of later code
matches <- lapply(
    matches, 
    function(match_df){
        scaled <- scale(c(match_df$red_score, match_df$blue_score))
        match_df$red_score <- scaled[1:nrow(match_df)]
        match_df$blue_score <- scaled[(nrow(match_df) + 1):length(scaled)]
        return(match_df)
    })

library(extrafont)
library(gt)
gos_blue <- "#337DFC"
gos_red <- "#F7041A"

gos_theme <- theme_bw() + 
    theme(text = element_text(family = "Futura"))

# fits WLS on the match dataframes
weighted_fit <- function(match_df, w){
    return(
        fit_lineup_lm(
            match_df, 
            responses = list(
                red = match_df$red_score, blue = match_df$blue_score
            ), 
            w = w
        )
    )
}

# fits the standard opr model on a regular match dataframe from TheBlueAlliance
fit_opr <- function(match_df){
    return(
        fit_lineup_lm(
            match_df, responses = list(
                red = match_df$red_score, blue = match_df$blue_score)
        )
    )
}

normalize_match_number <- function(opr_residual){
    # we know we can divide the length by 2 because there are 2 alliances/match
    alliance_corrected <- ((1:length(opr_residual) - 1) %% 
                               (length(opr_residual) / 2)) + 1
    return((alliance_corrected / length(opr_residual)) * 2)
}

fits <- lapply(matches, fit_opr)

residuals <- lapply(fits, function(fit){return(residuals(fit))})
# match percentile is the match number divided by the number of matches
match_percentile <- unlist(lapply(residuals, normalize_match_number), 
                           use.names = FALSE)

Code

linear_weight_fits <- lapply(matches, weighted_fit, w = w_linear)
bin_weight_fits <- lapply(matches, weighted_fit, w = w_bin_resid_vars)
stepwise_fits <- lapply(matches, weighted_fit, w = w_stepwise)
raw_fits <- lapply(matches, weighted_fit, w = 1)

linear_weight_loocvs <- sapply(linear_weight_fits, LOOCV)
resid_weight_loocvs <- sapply(bin_weight_fits, LOOCV)
stepwise_loocvs <- sapply(stepwise_fits, LOOCV)
raw_loocvs <- sapply(raw_fits, LOOCV)

viz <- data.frame(linear_ratio = raw_loocvs / linear_weight_loocvs, 
                  residvar_ratio = raw_loocvs / resid_weight_loocvs,
                  stepwise_ratio = raw_loocvs / stepwise_loocvs, 
                  year = as.factor(substr(names(raw_loocvs), 1, 4)))

ggplot(viz, aes(x = stepwise_ratio)) + 
    geom_histogram(fill = gos_blue, color = "black", 
                   bins = ceiling(sqrt(nrow(viz)))) + 
    geom_vline(aes(xintercept = mean(stepwise_ratio)), color = gos_red) + 
    annotate("label", x = mean(viz$stepwise_ratio), y = 15, 
             label = paste0("Mean: ", 
                            round(mean(viz$stepwise_ratio), 3))) +
    labs(title = "OPR Weighting LOOCV Ratio", 
         subtitle = "District Event Qualifications, 2009 - 2024", 
         x = "Event-Normalized LOOCV Error Ratio (raw:stepwise-optimized)", 
         y = "# Events") +
    gos_theme

Code

ggplot(viz, aes(x = stepwise_ratio)) + 
    geom_density(alpha = 0.2, fill = gos_blue) + 
    facet_wrap(~year) +
    labs(title = "OPR Weighting LOOCV Ratio", x = "LOOCV Ratio (raw:stepwise-optimized", 
         y = "Density") + 
    gos_theme

Code

viz <- viz %>%
    group_by(year) %>%
    summarize(events = n(), 
              stepwise_ratio = mean(stepwise_ratio), 
              linear_ratio = mean(linear_ratio), 
              residvar_ratio = mean(residvar_ratio))

colnames(viz) <- c(
    "Year", "# Events", "Stepwise", "Linear", "Binned"
)

viz$Year <- as.numeric(as.character(viz$Year))
viz <- round(viz, digits = 4)

gt(viz) %>%
    tab_header(
        title = "Mean LOOCV Ratios", 
        subtitle = "unweighted:weighted"
    ) %>%
    tab_options(
        column_labels.background.color = gos_blue
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Linear", 
                               rows = which((viz$Linear > viz$Binned) & 
                                                viz$Linear > viz$Stepwise))
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Binned", 
                               rows = which((viz$Binned > viz$Linear) & 
                                                (viz$Binned > viz$Stepwise)))
    ) %>%
    tab_style(
        style = cell_text(weight = "bold"), 
        locations = cells_body(columns = "Stepwise", 
                               rows = which((viz$Stepwise >= viz$Binned) &
                                                (viz$Stepwise >= viz$Linear)))
    ) %>%
    tab_style(
        cell_borders(color = gos_blue), 
        cells_body()
    ) %>%
    opt_table_font(
        font = "Futura"
    ) %>%
    tab_footnote(
        footnote = "Higher values indicate the weighting is performing better", 
        locations = cells_title("subtitle")
    )

Year	# Events	Stepwise	Linear	Binned
Mean LOOCV Ratios
unweighted:weighted¹
2009	7	0.9989	1.0005	1.0007
2010	8	0.9932	0.9987	0.9993
2011	10	1.0013	1.0001	1.0009
2012	17	1.0009	1.0002	1.0008
2013	19	1.0058	1.0011	1.0019
2014	43	0.9985	1.0003	1.0003
2015	50	1.0010	1.0004	1.0008
2016	72	1.0091	1.0021	1.0025
2017	81	1.0053	1.0016	1.0018
2018	94	0.9988	1.0004	1.0004
2019	109	1.0085	1.0021	1.0024
2022	90	1.0061	1.0016	1.0020
2023	109	1.0019	1.0008	1.0010
2024	112	1.0025	1.0009	1.0011
¹ Higher values indicate the weighting is performing better

Code

viz$mean_pct_improvement <- (viz$`Stepwise` - 1)

ggplot(viz, aes(x = as.character(Year), y = mean_pct_improvement)) + 
    geom_point(aes(size = `# Events`), col = gos_red) + 
    geom_hline(linetype = "dotted", linewidth = 1.25, col = gos_blue, 
               yintercept = mean(viz$mean_pct_improvement)) + 
    geom_hline(yintercept = 0) +
    scale_y_continuous(labels = scales::percent) +
    labs(title = "Weighting Percent Improvement by Year", 
         subtitle = "By Stepwise Bin Optimization", 
         x = "Year", y = "Mean LOOCV Percent Improvement") + 
    gos_theme