Possible ways to improve: * features * aggregation method for models

Notes on model: * The model is bias to the offensive side of the floor.

library(tidyverse)
library(dplyr)
library(lpSolve)
library(nbastatR)
library(jsonlite)
library(lubridate)
library(rsample)
library(parsnip)
library(caret)
library(class)
library(Metrics)
library(shiny)
library(rsconnect)
library(rvest)

Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)

player stats

get_season_data <- function(season) {
  base_url <- "https://www.basketball-reference.com/leagues/NBA_"
  
  # URLs for the per-minute and advanced stats pages
  url_per_minute <- paste0(base_url, season, "_per_minute.html")
  url_advanced <- paste0(base_url, season, "_advanced.html")
  
  # Scrape per-minute stats
  stats <- read_html(url_per_minute) %>%
    html_node("table") %>%
    html_table(fill = TRUE) %>%
    filter(Player != "Player") %>%
    select(-Rk)
  
  # Scrape advanced stats
  adv_stats <- read_html(url_advanced) %>%
    html_node("table") %>%
    html_table(fill = TRUE) %>%
    select(2:19, 21:24, 26:29) %>%
    filter(Player != "Player")
  
  # Combine the stats based on Player, Position, and Team
  combined_stats <- right_join(stats, adv_stats, by = c("Player", "Pos", "Tm"))
  
  # Add a column identifying the season
  combined_stats <- combined_stats %>%
    mutate(Season = season)
  
  return(combined_stats)
}

# Loop through the seasons and get data
get_multiple_seasons_data <- function(start_year, end_year) {
  season_data <- list()
  
  for (season in start_year:end_year) {
    print(paste("Getting data for season:", season))
    season_data[[as.character(season)]] <- get_season_data(season)
  }
  
  return(season_data)
}

# Get data for seasons from 2015 to 2024
all_seasons_data <- get_multiple_seasons_data(2015, 2024)
## [1] "Getting data for season: 2015"
## [1] "Getting data for season: 2016"
## [1] "Getting data for season: 2017"
## [1] "Getting data for season: 2018"
## [1] "Getting data for season: 2019"
## [1] "Getting data for season: 2020"
## [1] "Getting data for season: 2021"
## [1] "Getting data for season: 2022"
## [1] "Getting data for season: 2023"
## [1] "Getting data for season: 2024"
combined_df <- bind_rows(all_seasons_data)
library(stringi)
combined_df$Player <- stri_trans_general(combined_df$Player, "Latin-ASCII")

combined_df$`3P%` <- as.numeric(combined_df$`3P%`)
combined_df$`FT%` <- as.numeric(combined_df$`FT%`)
combined_df$`TS%` <- as.numeric(combined_df$`TS%`)
combined_df$`3PAr` <- as.numeric(combined_df$`3PAr`)
combined_df$`FTr` <- as.numeric(combined_df$`FTr`)
combined_df$`TRB%` <- as.numeric(combined_df$`TRB%`)
combined_df$`BLK%` <- as.numeric(combined_df$`BLK%`)
combined_df$`AST%` <- as.numeric(combined_df$`AST%`)

player_pos <- combined_df %>%
  mutate(groupPosition = case_when(
    Pos == "PF" ~ 2,
    Pos == "SG" ~ 2,
    Pos == "C" ~ 3,
    Pos == "SF" ~ 2,
    Pos == "PG" ~ 1,
    Pos == "SG-PG" ~ 1,
    Pos == "PG-SG" ~ 1,
    Pos == "SF-PF" ~ 2,
    Pos == "SF-SG" ~ 2,
    Pos == "SG-SF" ~ 2,
    Pos == "PF-SF" ~ 2,
    Pos == "PF-C" ~ 1,
    Pos == "SG-PF" ~ 2,
    Pos == "C-PF" ~ 1,
    Pos == "SF-C" ~ 2,
    Pos == "SG-PG-SF" ~ 1,
    TRUE ~ 2
  )) %>%
  select(Player, Tm, Season, groupPosition)

combined_df$`USG%` <- as.numeric(combined_df$`USG%`)

player_df <- combined_df %>%
  mutate(`TRB%` = `TRB%` * .01,
         `AST%` = `AST%` * .01,
         `BLK%` = `BLK%` * .01,
         `USG%` = `USG%` * .01) %>%
  group_by(Player, Season) %>%
  mutate(avg_pct_fg3 = (`3P%` + ifelse(is.na(lag(`3P%`)), 0.32, lag(`3P%`))) / 2,
         avg_3pt_rate = (`3PAr` + ifelse(is.na(lag(`3PAr`)), 0.36, lag(`3PAr`))) / 2,
         avg_TS = (`TS%` + ifelse(is.na(lag(`TS%`)), 0.53, lag(`TS%`))) / 2,
         avg_ft_pct = (`FT%` + ifelse(is.na(lag(`FT%`)), 0.74, lag(`FT%`))) / 2,
         avg_ft_rate = (`FTr` + ifelse(is.na(lag(`FTr`)), 0.26, lag(`FTr`))) / 2,
         avg_treb = (`TRB%` + ifelse(is.na(lag(`TRB%`)), 0.09, lag(`TRB%`))) / 2,
         avg_ast_pct = (`AST%` + ifelse(is.na(lag(`AST%`)), 0.13, lag(`AST%`))) / 2,
         avg_blk_pct = (`BLK%` + ifelse(is.na(lag(`BLK%`)), 0.02, lag(`BLK%`))) / 2,
         avg_usage = (`USG%` + ifelse(is.na(lag(`USG%`)), 0.19, lag(`USG%`))) / 2
           ) %>%
  ungroup() %>%
  na.omit()

select_stats <- right_join(player_df, player_pos, by = c("Player", "Season")) %>%
  rename(namePlayer = Player,
         slugTeamBREF = Tm.x) %>%
  mutate(pred_season = Season + 1) %>%
  select(namePlayer,
         slugTeamBREF,
         avg_pct_fg3,
         avg_3pt_rate,
         avg_TS,
         avg_ft_pct,
         avg_ft_rate,
         avg_treb,
         avg_ast_pct,
         avg_blk_pct,
         groupPosition,
         pred_season)
stats <- right_join(player_df, player_pos, by = c("Player", "Season")) %>%
  rename(namePlayer = Player,
         teamAbbreviation = Tm.x,
         yearSeason = Season) %>%
  select(namePlayer,
         teamAbbreviation,
         avg_pct_fg3,
         avg_3pt_rate,
         avg_TS,
         avg_ft_pct,
         avg_ft_rate,
         avg_treb,
         avg_ast_pct,
         avg_blk_pct,
         groupPosition,
         yearSeason,
         avg_usage)

lineup stats

seasons <- c("2023-24", "2022-23", "2021-22", "2020-21", "2019-20", "2018-19", "2017-18", "2016-17", "2015-16", "2014-15")

# Initialize an empty list to store data frames
all_seasons_data <- list()

# Loop through each season and fetch the data
for (season in seasons) {
  url <- paste0("https://api.pbpstats.com/get-totals/nba?Season=", season, "&SeasonType=Regular%2BSeason&Type=Lineup")
  data <- fromJSON(url)
  lineup_data <- data[["multi_row_table_data"]]
  
  # Convert to data frame, add season column
  lineup_data <- as.data.frame(lineup_data)
  lineup_data$season <- season  # Add season year
  
  # Store the data frame in the list
  all_seasons_data[[season]] <- lineup_data
}

# Combine all seasons data into one data frame
combined_data <- bind_rows(all_seasons_data)
combined_data <- combined_data %>%
  filter(!is.na(PlusMinus))

lineup_data <- combined_data %>%
  filter(OffPoss > 150) %>%
  separate(Name, into = c("Player 1", "Player 2", "Player 3", "Player 4", "Player 5"), sep = ", ") %>%
  mutate(pm_per48 = (PlusMinus/Minutes) * 48) %>%
  rename(lineup_pm = PlusMinus) %>%
  select(TeamAbbreviation,
         season,
         pm_per48,
         `Player 1`,
         `Player 2`,
         `Player 3`,
         `Player 4`,
         `Player 5`) %>%
  mutate(pred_season = case_when(
    season == "2023-24" ~ "2024",
    season == "2022-23" ~ "2023",
    season == "2021-22" ~ "2022",
    season == "2020-21" ~ "2021",
    season == "2019-20" ~ "2020",
    season == "2018-19" ~ "2019",
    season == "2017-18" ~ "2018",
    season == "2016-17" ~ "2017",
    season == "2015-16" ~ "2016",
    TRUE ~ "2015"
  )) %>%
  select(-c(season))
lineup_data$pred_season <- as.numeric(lineup_data$pred_season)
df_if <- lineup_data %>% 
  pivot_longer(
    cols = starts_with("Player"),   # Select the player columns
    names_to = "Player",            # Create a new column for the player identifier
    values_to = "namePlayer"        # Create a new column for player names
  ) %>%
  arrange(pm_per48) %>%
  mutate(yearSeason = pred_season - 1) %>%
  select(-c("Player")) %>%
  distinct()
df_if2 <- right_join( stats, df_if, by = c("namePlayer", "yearSeason")) %>%
  distinct() %>%
  select(pm_per48,
         TeamAbbreviation,
         namePlayer,
         groupPosition,
         yearSeason,
         avg_usage) %>%
  group_by(pm_per48, TeamAbbreviation) %>%
  mutate(usg_rank = rank(desc(avg_usage))) %>%
  arrange(usg_rank, pm_per48) %>%
  ungroup() %>%
  mutate(pred_season = yearSeason + 1) %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         usg_rank,
         pred_season) %>%
  filter(usg_rank < 6) %>%
  mutate(`Player 1` = case_when(
    usg_rank == 1 ~ namePlayer,
    TRUE ~ "NA"
  ),
  `Player 2` = case_when(
    usg_rank == 2 ~ namePlayer,
    TRUE ~ "NA"
  ),
  `Player 3` = case_when(
    usg_rank == 3 ~ namePlayer,
    TRUE ~ "NA"
  ),
  `Player 4` = case_when(
    usg_rank == 4 ~ namePlayer,
    TRUE ~ "NA"
  ),
  `Player 5` = case_when(
    usg_rank == 5 ~ namePlayer,
    TRUE ~ "NA"
  ))

combining data frames

player_1 <- df_if2 %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         pred_season,
         `Player 1`) %>%
  filter( `Player 1` != "NA") %>%
  inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
  rename_with(.fn = ~ paste0(., ".1"), .cols = c(7:15)) %>%
  select(2:5, 7:14)
player_2 <- df_if2 %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         pred_season,
         `Player 2`) %>%
  filter( `Player 2` != "NA") %>%
  inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
  rename_with(.fn = ~ paste0(., ".2"), .cols = c(7:15)) %>%
  select(2:5, 7:14)
player_3 <- df_if2 %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         pred_season,
         `Player 3`) %>%
  filter( `Player 3` != "NA") %>%
  inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
  rename_with(.fn = ~ paste0(., ".3"), .cols = c(7:15)) %>%
  select(2:5, 7:14)
player_4 <- df_if2 %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         pred_season,
         `Player 4`) %>%
  filter( `Player 4` != "NA") %>%
  inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
  rename_with(.fn = ~ paste0(., ".4"), .cols = c(7:15)) %>%
  select(2:5, 7:14)
player_5 <- df_if2 %>%
  select(namePlayer,
         pm_per48,
         TeamAbbreviation,
         pred_season,
         `Player 5`) %>%
  filter( `Player 5` != "NA") %>%
  inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
  rename_with(.fn = ~ paste0(., ".5"), .cols = c(7:15)) %>%
  select(2:5, 7:14)


player_lineup <- inner_join(player_1, player_2, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
  inner_join(player_3, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
  inner_join(player_4, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
  inner_join(player_5, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
  distinct() %>%
  na.omit()

data splitting

model_df <- player_lineup %>%
  arrange()

set.seed(3445)
v_split <- initial_split(model_df, prop = .90, strata = pm_per48)

v_train <- v_split %>%
  training() 
v_train_c <- v_train %>%
  select(1, 5:12, 14:21, 23:30, 32:39, 41:48)

v_test <- v_split %>%
  testing() 
v_test_c <- v_test %>%
  select(1, 5:12, 14:21, 23:30, 32:39, 41:48)

sd(v_train$pm_per48)
## [1] 13.11273

SVM

svm_model <- svm_linear() %>%
  set_engine("kernlab") %>%
  set_mode("regression")

svm_fit <- svm_model %>%
  fit(pm_per48 ~ ., data = v_train_c)
##  Setting default kernel parameters
svm_preds <- predict(svm_fit, new_data = v_test_c) %>%
  as.data.frame() %>%
  cbind(v_test$pm_per48)

MAE(svm_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 9.18055

rand forest

rf_model <- rand_forest() %>%
  set_engine("ranger") %>%
  set_mode("regression")
rf_fit <- rf_model %>%
  fit(pm_per48 ~ ., data = v_train_c)
rf_preds <- predict(rf_fit, new_data = v_test_c) %>%
  as.data.frame() %>%
  cbind(v_test$pm_per48)

MAE(rf_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 3.934291

gbm

gbm_model <- boost_tree() %>%
  set_engine("xgboost") %>%
  set_mode("regression")

gbm_fit <- gbm_model %>%
  fit(pm_per48 ~ ., data = v_train_c)

gbm_preds <- predict(gbm_fit, new_data = v_test_c) %>%
  as.data.frame() %>%
  cbind(v_test$pm_per48)

MAE(gbm_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 4.471482

data for predictions # adjust data for 2025 rosters, could not find data source for ’25 rosters

stats_24 <- select_stats %>%
  filter(pred_season == 2024)
player_pool <- stats_24 %>%
  select(-c(pred_season)) %>%
  rename(team = slugTeamBREF) %>%
  distinct(namePlayer, .keep_all = TRUE) %>%
  na.omit()

lineup function

lineup_function <- function(usage_1, usage_2, usage_3, usage_4, option) {
  starter_1 <- player_pool %>%
  filter(namePlayer == usage_1) %>%
  rename_with(.fn = ~ paste0(., ".1"), .cols = c(2:11)) %>%
  select(-c(1:2))
starter_2 <- player_pool %>%
  filter(namePlayer == usage_2) %>%
  rename_with(.fn = ~ paste0(., ".2"), .cols = c(2:11)) %>%
  select(-c(1:2))
starter_3 <- player_pool %>%
  filter(namePlayer == usage_3) %>%
  rename_with(.fn = ~ paste0(., ".3"), .cols = c(2:11)) %>%
  select(-c(1:2))

starter_4 <- player_pool %>%
  filter(namePlayer == usage_4) %>%
  rename_with(.fn = ~ paste0(., ".4"), .cols = c(2:11)) %>%
  select(-c(1:2))
starter_5 <- player_pool %>%
  filter(namePlayer == option) %>%
  rename_with(.fn = ~ paste0(., ".5"), .cols = c(2:11)) %>%
  select(-c(1:2))

pred_df <- cbind(starter_1, starter_2) %>%
  cbind(starter_3) %>%
  cbind(starter_4) %>%
  cbind(starter_5)

svm <- predict(svm_fit, new_data = pred_df)
rf <- predict(rf_fit, pred_df)
gbm <- predict(gbm_fit, pred_df)

return((svm + rf + gbm)/3)
}

lineup_function("Nikola Jokic","Jamal Murray", "Michael Porter Jr.", "Aaron Gordon", "Christian Braun")
##      .pred
## 1 13.14604

4 + 1 optimizer

set_players <- c("Nikola Jokic", "Jamal Murray","Michael Porter Jr.", "Aaron Gordon" )

# Possible player options from the player pool, excluding set players
possible_options <- player_pool %>%
  filter(team == "DEN", !namePlayer %in% set_players) %>%
  select(namePlayer) %>%
  pull(namePlayer)


# Function to evaluate a lineup
optimize_lineup <- function(index) {
  index <- round(index)
  index <- max(min(index, length(possible_options)), 1)
  
  option <- possible_options[index]
  
  score <- lineup_function("Nikola Jokic", "Jamal Murray","Michael Porter Jr.", "Aaron Gordon", option)
  
  return(as.numeric(score))
}

# Perform grid search over all possible options
scores <- sapply(1:length(possible_options), optimize_lineup)

# Find the index of the maximum score
best_index <- which.max(scores)

# Get the best player name
possible_options[best_index]
## [1] "Jack White"

3 + 2 optimizer

set_players <- c("Nikola Jokic", "Jamal Murray",  "Michael Porter Jr.")

# Possible player options from the player pool, excluding set players
possible_options <- player_pool %>%
  filter(team == "DEN", !namePlayer %in% set_players) %>%
  select(namePlayer) %>%
  pull(namePlayer)

# Function to evaluate a lineup with two additional players
optimize_lineup <- function(index1, index2) {
  index1 <- max(min(round(index1), length(possible_options)), 1)
  index2 <- max(min(round(index2), length(possible_options)), 1)
  
  option1 <- possible_options[index1]
  option2 <- possible_options[index2]
  
  # Ensure the two options are different
  if (option1 == option2) {
    return(-Inf) # Return a very low score if the same player is selected twice
  }
  
  # Calculate the score
  score <- lineup_function("Nikola Jokic", "Jamal Murray",  "Michael Porter Jr.", option1, option2)
  
  # Check if score is a list and extract numeric value
  if (is.list(score)) {
    score <- unlist(score)  # Convert list to vector
    score <- as.numeric(score)  # Ensure it’s numeric
  }
  
  # Print diagnostic information
  cat("Testing lineup:", option1, "&", option2, "- Score:", score, "\n")
  
  return(score)
}

# Initialize best score tracking
best_score <- -Inf
best_indices <- c(NA, NA)

# Perform grid search over all possible pairs of options
for (i in 1:length(possible_options)) {
  for (j in 1:length(possible_options)) {
    if (i != j) {
      score <- optimize_lineup(i, j)
      if (score > best_score) {
        best_score <- score
        best_indices <- c(i, j)
      }
    }
  }
}
## Testing lineup: Christian Braun & Bruce Brown - Score: 12.81038 
## Testing lineup: Christian Braun & Kentavious Caldwell-Pope - Score: 12.22376 
## Testing lineup: Christian Braun & Vlatko Cancar - Score: 13.01562 
## Testing lineup: Christian Braun & Aaron Gordon - Score: 10.26757 
## Testing lineup: Christian Braun & Jeff Green - Score: 10.61906 
## Testing lineup: Christian Braun & DeAndre Jordan - Score: 10.05918 
## Testing lineup: Christian Braun & Zeke Nnaji - Score: 11.19497 
## Testing lineup: Christian Braun & Ish Smith - Score: 12.30803 
## Testing lineup: Christian Braun & Peyton Watson - Score: 12.39207 
## Testing lineup: Christian Braun & Jack White - Score: 14.5304 
## Testing lineup: Bruce Brown & Christian Braun - Score: 11.42019 
## Testing lineup: Bruce Brown & Kentavious Caldwell-Pope - Score: 13.01836 
## Testing lineup: Bruce Brown & Vlatko Cancar - Score: 13.43407 
## Testing lineup: Bruce Brown & Aaron Gordon - Score: 11.00125 
## Testing lineup: Bruce Brown & Jeff Green - Score: 10.6682 
## Testing lineup: Bruce Brown & DeAndre Jordan - Score: 10.71295 
## Testing lineup: Bruce Brown & Zeke Nnaji - Score: 12.13993 
## Testing lineup: Bruce Brown & Ish Smith - Score: 12.7724 
## Testing lineup: Bruce Brown & Peyton Watson - Score: 13.20257 
## Testing lineup: Bruce Brown & Jack White - Score: 14.28003 
## Testing lineup: Kentavious Caldwell-Pope & Christian Braun - Score: 9.596965 
## Testing lineup: Kentavious Caldwell-Pope & Bruce Brown - Score: 10.91677 
## Testing lineup: Kentavious Caldwell-Pope & Vlatko Cancar - Score: 10.88597 
## Testing lineup: Kentavious Caldwell-Pope & Aaron Gordon - Score: 9.622495 
## Testing lineup: Kentavious Caldwell-Pope & Jeff Green - Score: 9.286749 
## Testing lineup: Kentavious Caldwell-Pope & DeAndre Jordan - Score: 9.56706 
## Testing lineup: Kentavious Caldwell-Pope & Zeke Nnaji - Score: 10.88296 
## Testing lineup: Kentavious Caldwell-Pope & Ish Smith - Score: 11.36598 
## Testing lineup: Kentavious Caldwell-Pope & Peyton Watson - Score: 11.6906 
## Testing lineup: Kentavious Caldwell-Pope & Jack White - Score: 12.83109 
## Testing lineup: Vlatko Cancar & Christian Braun - Score: 10.49234 
## Testing lineup: Vlatko Cancar & Bruce Brown - Score: 11.92766 
## Testing lineup: Vlatko Cancar & Kentavious Caldwell-Pope - Score: 12.01056 
## Testing lineup: Vlatko Cancar & Aaron Gordon - Score: 10.53862 
## Testing lineup: Vlatko Cancar & Jeff Green - Score: 10.30396 
## Testing lineup: Vlatko Cancar & DeAndre Jordan - Score: 10.34721 
## Testing lineup: Vlatko Cancar & Zeke Nnaji - Score: 11.78844 
## Testing lineup: Vlatko Cancar & Ish Smith - Score: 12.01256 
## Testing lineup: Vlatko Cancar & Peyton Watson - Score: 12.65775 
## Testing lineup: Vlatko Cancar & Jack White - Score: 13.46175 
## Testing lineup: Aaron Gordon & Christian Braun - Score: 13.14604 
## Testing lineup: Aaron Gordon & Bruce Brown - Score: 14.62781 
## Testing lineup: Aaron Gordon & Kentavious Caldwell-Pope - Score: 14.99774 
## Testing lineup: Aaron Gordon & Vlatko Cancar - Score: 15.45113 
## Testing lineup: Aaron Gordon & Jeff Green - Score: 12.46436 
## Testing lineup: Aaron Gordon & DeAndre Jordan - Score: 12.95941 
## Testing lineup: Aaron Gordon & Zeke Nnaji - Score: 13.15494 
## Testing lineup: Aaron Gordon & Ish Smith - Score: 14.00192 
## Testing lineup: Aaron Gordon & Peyton Watson - Score: 15.32453 
## Testing lineup: Aaron Gordon & Jack White - Score: 16.48719 
## Testing lineup: Jeff Green & Christian Braun - Score: 9.985766 
## Testing lineup: Jeff Green & Bruce Brown - Score: 11.80155 
## Testing lineup: Jeff Green & Kentavious Caldwell-Pope - Score: 11.13157 
## Testing lineup: Jeff Green & Vlatko Cancar - Score: 11.50021 
## Testing lineup: Jeff Green & Aaron Gordon - Score: 9.907702 
## Testing lineup: Jeff Green & DeAndre Jordan - Score: 9.68252 
## Testing lineup: Jeff Green & Zeke Nnaji - Score: 11.12452 
## Testing lineup: Jeff Green & Ish Smith - Score: 11.89651 
## Testing lineup: Jeff Green & Peyton Watson - Score: 11.94738 
## Testing lineup: Jeff Green & Jack White - Score: 13.26404 
## Testing lineup: DeAndre Jordan & Christian Braun - Score: 13.29611 
## Testing lineup: DeAndre Jordan & Bruce Brown - Score: 15.29853 
## Testing lineup: DeAndre Jordan & Kentavious Caldwell-Pope - Score: 13.63813 
## Testing lineup: DeAndre Jordan & Vlatko Cancar - Score: 14.25197 
## Testing lineup: DeAndre Jordan & Aaron Gordon - Score: 13.84443 
## Testing lineup: DeAndre Jordan & Jeff Green - Score: 13.13885 
## Testing lineup: DeAndre Jordan & Zeke Nnaji - Score: 13.37733 
## Testing lineup: DeAndre Jordan & Ish Smith - Score: 15.69764 
## Testing lineup: DeAndre Jordan & Peyton Watson - Score: 15.9682 
## Testing lineup: DeAndre Jordan & Jack White - Score: 17.0646 
## Testing lineup: Zeke Nnaji & Christian Braun - Score: 10.2177 
## Testing lineup: Zeke Nnaji & Bruce Brown - Score: 12.01631 
## Testing lineup: Zeke Nnaji & Kentavious Caldwell-Pope - Score: 11.61722 
## Testing lineup: Zeke Nnaji & Vlatko Cancar - Score: 12.17461 
## Testing lineup: Zeke Nnaji & Aaron Gordon - Score: 9.241953 
## Testing lineup: Zeke Nnaji & Jeff Green - Score: 9.859347 
## Testing lineup: Zeke Nnaji & DeAndre Jordan - Score: 9.188334 
## Testing lineup: Zeke Nnaji & Ish Smith - Score: 11.51874 
## Testing lineup: Zeke Nnaji & Peyton Watson - Score: 11.54159 
## Testing lineup: Zeke Nnaji & Jack White - Score: 13.83217 
## Testing lineup: Ish Smith & Christian Braun - Score: 12.73167 
## Testing lineup: Ish Smith & Bruce Brown - Score: 13.8487 
## Testing lineup: Ish Smith & Kentavious Caldwell-Pope - Score: 14.5614 
## Testing lineup: Ish Smith & Vlatko Cancar - Score: 14.7328 
## Testing lineup: Ish Smith & Aaron Gordon - Score: 13.02006 
## Testing lineup: Ish Smith & Jeff Green - Score: 12.15873 
## Testing lineup: Ish Smith & DeAndre Jordan - Score: 13.11353 
## Testing lineup: Ish Smith & Zeke Nnaji - Score: 12.87061 
## Testing lineup: Ish Smith & Peyton Watson - Score: 15.67459 
## Testing lineup: Ish Smith & Jack White - Score: 16.12249 
## Testing lineup: Peyton Watson & Christian Braun - Score: 10.66258 
## Testing lineup: Peyton Watson & Bruce Brown - Score: 12.44581 
## Testing lineup: Peyton Watson & Kentavious Caldwell-Pope - Score: 12.01607 
## Testing lineup: Peyton Watson & Vlatko Cancar - Score: 12.70016 
## Testing lineup: Peyton Watson & Aaron Gordon - Score: 10.69442 
## Testing lineup: Peyton Watson & Jeff Green - Score: 10.29612 
## Testing lineup: Peyton Watson & DeAndre Jordan - Score: 10.65856 
## Testing lineup: Peyton Watson & Zeke Nnaji - Score: 10.88338 
## Testing lineup: Peyton Watson & Ish Smith - Score: 11.98263 
## Testing lineup: Peyton Watson & Jack White - Score: 14.3618 
## Testing lineup: Jack White & Christian Braun - Score: 11.49973 
## Testing lineup: Jack White & Bruce Brown - Score: 12.83736 
## Testing lineup: Jack White & Kentavious Caldwell-Pope - Score: 12.44981 
## Testing lineup: Jack White & Vlatko Cancar - Score: 13.09586 
## Testing lineup: Jack White & Aaron Gordon - Score: 12.06232 
## Testing lineup: Jack White & Jeff Green - Score: 11.09656 
## Testing lineup: Jack White & DeAndre Jordan - Score: 12.00284 
## Testing lineup: Jack White & Zeke Nnaji - Score: 12.67209 
## Testing lineup: Jack White & Ish Smith - Score: 13.643 
## Testing lineup: Jack White & Peyton Watson - Score: 14.31534
# Get the best player names
best_player1 <- possible_options[best_indices[1]]
best_player2 <- possible_options[best_indices[2]]

shiny app - 4 + 1

ui <- fluidPage(
  titlePanel("Lineup Optimizer"),
  
  sidebarLayout(
    sidebarPanel(
      helpText("Note: The order of players matters. Usage Player 1 is expected to lead the lineup in usage and so forth with the last Player being the player with the lowest expected usage."),
      selectInput("team", "Select Team:", choices = unique(player_pool$team)),
      selectInput("set_player1", "Usage Player 1:", choices = NULL),
      selectInput("set_player2", "Usage Player 2:", choices = NULL),
      selectInput("set_player3", "Usage Player 3:", choices = NULL),
      selectInput("set_player4", "Usage Player 4:", choices = NULL),
      
      checkboxGroupInput("exclude_players", "Exclude Players:",
                         choices = NULL, 
                         selected = NULL),
      
      actionButton("optimize", "Optimize Lineup")
    ),
    
    mainPanel(
      h3("Set Players"),
      verbatimTextOutput("set_players"),
      h3("Best Additional Player"),
      verbatimTextOutput("best_player"),
      h3("Scores"),
      tableOutput("score_table")
    )
  )
)

server <- function(input, output, session) {
  
  observe({
    team_players <- player_pool %>% 
      filter(team == input$team) %>% 
      select(namePlayer) %>% 
      pull(namePlayer)
    
    updateSelectInput(session, "set_player1", choices = team_players, selected = team_players[1])
    updateSelectInput(session, "set_player2", choices = team_players, selected = team_players[2])
    updateSelectInput(session, "set_player3", choices = team_players, selected = team_players[3])
    updateSelectInput(session, "set_player4", choices = team_players, selected = team_players[4])
    
    updateCheckboxGroupInput(session, "exclude_players", choices = team_players, selected = NULL)
  })
  
  observeEvent(input$optimize, {
    set_players <- c(input$set_player1, input$set_player2, input$set_player3, input$set_player4)
    
    possible_options <- player_pool %>%
      filter(team == input$team, !namePlayer %in% set_players, !namePlayer %in% input$exclude_players) %>%
      select(namePlayer) %>%
      pull(namePlayer)
    
    optimize_lineup <- function(index) {
      index <- round(index)
      index <- max(min(index, length(possible_options)), 1)
      
      option <- possible_options[index]
      score <- lineup_function(input$set_player1, input$set_player2, input$set_player3, input$set_player4, option)
      
      return(as.numeric(score))
    }
    
    scores <- sapply(1:length(possible_options), optimize_lineup)
    
    best_index <- which.max(scores)
    
    best_player <- possible_options[best_index]
    
    output$set_players <- renderText({ paste(set_players, collapse = ", ") })
    output$best_player <- renderText({ best_player })
    output$score_table <- renderTable({
      data.frame(Player = possible_options, Score = scores)
    })
  })
}
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents

shiny app - 3 + 2

ui_2 <- fluidPage(
  titlePanel("Lineup Optimization"),
  
  sidebarLayout(
    sidebarPanel(
      helpText("Note: The order of players matters. Usage Player 1 is expected to lead the lineup in usage and so forth with the last Player being the player with the lowest expected usage."),
      selectInput("team_filter", "Select Team:",
                  choices = unique(player_pool$team),
                  selected = NULL, 
                  multiple = FALSE),
      
      selectInput("set_player1", "Select Usage Player 1:",
                  choices = NULL, 
                  selected = NULL),
      
      selectInput("set_player2", "Select Usage Player 2:",
                  choices = NULL, 
                  selected = NULL),
      
      selectInput("set_player3", "Select Usage Player 3:",
                  choices = NULL, 
                  selected = NULL),
      
      checkboxGroupInput("exclude_players", "Exclude Players:",
                         choices = NULL, 
                         selected = NULL),
      
      actionButton("run_optimization", "Run Optimization")
    ),
    
    mainPanel(
      textOutput("best_players")
    )
  )
)

server_2 <- function(input, output, session) {
  
  observe({
    req(input$team_filter)
    
    team_players <- player_pool %>%
      filter(team == input$team_filter) %>%
      pull(namePlayer)
    
    updateSelectInput(session, "set_player1", choices = team_players, selected = NULL)
    updateSelectInput(session, "set_player2", choices = team_players, selected = NULL)
    updateSelectInput(session, "set_player3", choices = team_players, selected = NULL)
    updateCheckboxGroupInput(session, "exclude_players", choices = team_players, selected = NULL)
  })
  
  observeEvent(input$run_optimization, {
    req(input$set_player1, input$set_player2, input$set_player3)
    
    set_players <- c(input$set_player1, input$set_player2, input$set_player3)
    
    # Possible player options from the player pool, excluding set and excluded players
    possible_options <- player_pool %>%
      filter(team == input$team_filter, !namePlayer %in% set_players, !namePlayer %in% input$exclude_players) %>%
      pull(namePlayer)
    
    # Check if possible_options is empty
    if (length(possible_options) < 2) {
      output$best_players <- renderText({
        "Not enough players available for optimization."
      })
      return()
    }
    
    # Function to evaluate a lineup with two additional players
    optimize_lineup <- function(index1, index2) {
      index1 <- max(min(round(index1), length(possible_options)), 1)
      index2 <- max(min(round(index2), length(possible_options)), 1)
      
      option1 <- possible_options[index1]
      option2 <- possible_options[index2]
      
      # Ensure the two options are different
      if (option1 == option2) {
        return(-Inf) # Return a very low score if the same player is selected twice
      }
      
      # Calculate the score
      score <- lineup_function(set_players[1], set_players[2], set_players[3], option1, option2)
      
      # Ensure score is numeric
      if (is.list(score)) {
        score <- unlist(score)  # Convert list to vector
        score <- as.numeric(score)  # Ensure it’s numeric
      }
      
      # Print diagnostic information
      if (is.numeric(score)) {
        cat("Testing lineup:", option1, "&", option2, "- Score:", score, "\n")
      } else {
        cat("Testing lineup:", option1, "&", option2, "- Score: Error\n")
      }
      
      return(score)
    }
    
    # Initialize best score tracking
    best_score <- -Inf
    best_indices <- c(NA, NA)
    
    # Perform grid search over all possible pairs of options
    for (i in 1:length(possible_options)) {
      for (j in 1:length(possible_options)) {
        if (i != j) {
          score <- optimize_lineup(i, j)
          if (score > best_score) {
            best_score <- score
            best_indices <- c(i, j)
          }
        }
      }
    }
    
    # Get the best player names
    best_player1 <- possible_options[best_indices[1]]
    best_player2 <- possible_options[best_indices[2]]
    
    output$best_players <- renderText({
      paste("Best players to add:", best_player1, "and", best_player2)
    })
  })
}

shinyApp(ui = ui_2, server = server_2)
Shiny applications not supported in static R Markdown documents

model check for ’24

v_24 <- v_train %>%
  filter(pred_season == 2024) 
v_24c <- v_24 %>%
  select(1, 5:12, 14:21, 23:30, 32:39, 41:48)

svm_24 <- predict(svm_fit, new_data = v_24c) %>%
  as.data.frame() %>%
  rename(svm_pred = .pred)
rf_24 <- predict(rf_fit, new_data = v_24c) %>%
  as.data.frame() %>%
  rename(rf_pred = .pred)
gbm_24 <- predict(gbm_fit, new_data = v_24c) %>%
  as.data.frame() %>%
  rename(gbm_pred = .pred)

df_24 <- cbind(svm_24, rf_24) %>%
  cbind(gbm_24) %>%
  mutate(combined_pred = (svm_pred + rf_pred + gbm_pred)/3) %>%
  cbind(v_24$pm_per48)

RMSE(df_24$combined_pred, df_24$`v_24$pm_per48`)
## [1] 5.156495
mean(df_24$combined_pred)
## [1] 4.319881
mean(df_24$`v_24$pm_per48`)
## [1] 3.637195