Possible ways to improve: * features * aggregation method for models
Notes on model: * The model is bias to the offensive side of the floor.
library(tidyverse)
library(dplyr)
library(lpSolve)
library(nbastatR)
library(jsonlite)
library(lubridate)
library(rsample)
library(parsnip)
library(caret)
library(class)
library(Metrics)
library(shiny)
library(rsconnect)
library(rvest)
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
player stats
get_season_data <- function(season) {
base_url <- "https://www.basketball-reference.com/leagues/NBA_"
# URLs for the per-minute and advanced stats pages
url_per_minute <- paste0(base_url, season, "_per_minute.html")
url_advanced <- paste0(base_url, season, "_advanced.html")
# Scrape per-minute stats
stats <- read_html(url_per_minute) %>%
html_node("table") %>%
html_table(fill = TRUE) %>%
filter(Player != "Player") %>%
select(-Rk)
# Scrape advanced stats
adv_stats <- read_html(url_advanced) %>%
html_node("table") %>%
html_table(fill = TRUE) %>%
select(2:19, 21:24, 26:29) %>%
filter(Player != "Player")
# Combine the stats based on Player, Position, and Team
combined_stats <- right_join(stats, adv_stats, by = c("Player", "Pos", "Tm"))
# Add a column identifying the season
combined_stats <- combined_stats %>%
mutate(Season = season)
return(combined_stats)
}
# Loop through the seasons and get data
get_multiple_seasons_data <- function(start_year, end_year) {
season_data <- list()
for (season in start_year:end_year) {
print(paste("Getting data for season:", season))
season_data[[as.character(season)]] <- get_season_data(season)
}
return(season_data)
}
# Get data for seasons from 2015 to 2024
all_seasons_data <- get_multiple_seasons_data(2015, 2024)
## [1] "Getting data for season: 2015"
## [1] "Getting data for season: 2016"
## [1] "Getting data for season: 2017"
## [1] "Getting data for season: 2018"
## [1] "Getting data for season: 2019"
## [1] "Getting data for season: 2020"
## [1] "Getting data for season: 2021"
## [1] "Getting data for season: 2022"
## [1] "Getting data for season: 2023"
## [1] "Getting data for season: 2024"
combined_df <- bind_rows(all_seasons_data)
library(stringi)
combined_df$Player <- stri_trans_general(combined_df$Player, "Latin-ASCII")
combined_df$`3P%` <- as.numeric(combined_df$`3P%`)
combined_df$`FT%` <- as.numeric(combined_df$`FT%`)
combined_df$`TS%` <- as.numeric(combined_df$`TS%`)
combined_df$`3PAr` <- as.numeric(combined_df$`3PAr`)
combined_df$`FTr` <- as.numeric(combined_df$`FTr`)
combined_df$`TRB%` <- as.numeric(combined_df$`TRB%`)
combined_df$`BLK%` <- as.numeric(combined_df$`BLK%`)
combined_df$`AST%` <- as.numeric(combined_df$`AST%`)
player_pos <- combined_df %>%
mutate(groupPosition = case_when(
Pos == "PF" ~ 2,
Pos == "SG" ~ 2,
Pos == "C" ~ 3,
Pos == "SF" ~ 2,
Pos == "PG" ~ 1,
Pos == "SG-PG" ~ 1,
Pos == "PG-SG" ~ 1,
Pos == "SF-PF" ~ 2,
Pos == "SF-SG" ~ 2,
Pos == "SG-SF" ~ 2,
Pos == "PF-SF" ~ 2,
Pos == "PF-C" ~ 1,
Pos == "SG-PF" ~ 2,
Pos == "C-PF" ~ 1,
Pos == "SF-C" ~ 2,
Pos == "SG-PG-SF" ~ 1,
TRUE ~ 2
)) %>%
select(Player, Tm, Season, groupPosition)
combined_df$`USG%` <- as.numeric(combined_df$`USG%`)
player_df <- combined_df %>%
mutate(`TRB%` = `TRB%` * .01,
`AST%` = `AST%` * .01,
`BLK%` = `BLK%` * .01,
`USG%` = `USG%` * .01) %>%
group_by(Player, Season) %>%
mutate(avg_pct_fg3 = (`3P%` + ifelse(is.na(lag(`3P%`)), 0.32, lag(`3P%`))) / 2,
avg_3pt_rate = (`3PAr` + ifelse(is.na(lag(`3PAr`)), 0.36, lag(`3PAr`))) / 2,
avg_TS = (`TS%` + ifelse(is.na(lag(`TS%`)), 0.53, lag(`TS%`))) / 2,
avg_ft_pct = (`FT%` + ifelse(is.na(lag(`FT%`)), 0.74, lag(`FT%`))) / 2,
avg_ft_rate = (`FTr` + ifelse(is.na(lag(`FTr`)), 0.26, lag(`FTr`))) / 2,
avg_treb = (`TRB%` + ifelse(is.na(lag(`TRB%`)), 0.09, lag(`TRB%`))) / 2,
avg_ast_pct = (`AST%` + ifelse(is.na(lag(`AST%`)), 0.13, lag(`AST%`))) / 2,
avg_blk_pct = (`BLK%` + ifelse(is.na(lag(`BLK%`)), 0.02, lag(`BLK%`))) / 2,
avg_usage = (`USG%` + ifelse(is.na(lag(`USG%`)), 0.19, lag(`USG%`))) / 2
) %>%
ungroup() %>%
na.omit()
select_stats <- right_join(player_df, player_pos, by = c("Player", "Season")) %>%
rename(namePlayer = Player,
slugTeamBREF = Tm.x) %>%
mutate(pred_season = Season + 1) %>%
select(namePlayer,
slugTeamBREF,
avg_pct_fg3,
avg_3pt_rate,
avg_TS,
avg_ft_pct,
avg_ft_rate,
avg_treb,
avg_ast_pct,
avg_blk_pct,
groupPosition,
pred_season)
stats <- right_join(player_df, player_pos, by = c("Player", "Season")) %>%
rename(namePlayer = Player,
teamAbbreviation = Tm.x,
yearSeason = Season) %>%
select(namePlayer,
teamAbbreviation,
avg_pct_fg3,
avg_3pt_rate,
avg_TS,
avg_ft_pct,
avg_ft_rate,
avg_treb,
avg_ast_pct,
avg_blk_pct,
groupPosition,
yearSeason,
avg_usage)
lineup stats
seasons <- c("2023-24", "2022-23", "2021-22", "2020-21", "2019-20", "2018-19", "2017-18", "2016-17", "2015-16", "2014-15")
# Initialize an empty list to store data frames
all_seasons_data <- list()
# Loop through each season and fetch the data
for (season in seasons) {
url <- paste0("https://api.pbpstats.com/get-totals/nba?Season=", season, "&SeasonType=Regular%2BSeason&Type=Lineup")
data <- fromJSON(url)
lineup_data <- data[["multi_row_table_data"]]
# Convert to data frame, add season column
lineup_data <- as.data.frame(lineup_data)
lineup_data$season <- season # Add season year
# Store the data frame in the list
all_seasons_data[[season]] <- lineup_data
}
# Combine all seasons data into one data frame
combined_data <- bind_rows(all_seasons_data)
combined_data <- combined_data %>%
filter(!is.na(PlusMinus))
lineup_data <- combined_data %>%
filter(OffPoss > 150) %>%
separate(Name, into = c("Player 1", "Player 2", "Player 3", "Player 4", "Player 5"), sep = ", ") %>%
mutate(pm_per48 = (PlusMinus/Minutes) * 48) %>%
rename(lineup_pm = PlusMinus) %>%
select(TeamAbbreviation,
season,
pm_per48,
`Player 1`,
`Player 2`,
`Player 3`,
`Player 4`,
`Player 5`) %>%
mutate(pred_season = case_when(
season == "2023-24" ~ "2024",
season == "2022-23" ~ "2023",
season == "2021-22" ~ "2022",
season == "2020-21" ~ "2021",
season == "2019-20" ~ "2020",
season == "2018-19" ~ "2019",
season == "2017-18" ~ "2018",
season == "2016-17" ~ "2017",
season == "2015-16" ~ "2016",
TRUE ~ "2015"
)) %>%
select(-c(season))
lineup_data$pred_season <- as.numeric(lineup_data$pred_season)
df_if <- lineup_data %>%
pivot_longer(
cols = starts_with("Player"), # Select the player columns
names_to = "Player", # Create a new column for the player identifier
values_to = "namePlayer" # Create a new column for player names
) %>%
arrange(pm_per48) %>%
mutate(yearSeason = pred_season - 1) %>%
select(-c("Player")) %>%
distinct()
df_if2 <- right_join( stats, df_if, by = c("namePlayer", "yearSeason")) %>%
distinct() %>%
select(pm_per48,
TeamAbbreviation,
namePlayer,
groupPosition,
yearSeason,
avg_usage) %>%
group_by(pm_per48, TeamAbbreviation) %>%
mutate(usg_rank = rank(desc(avg_usage))) %>%
arrange(usg_rank, pm_per48) %>%
ungroup() %>%
mutate(pred_season = yearSeason + 1) %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
usg_rank,
pred_season) %>%
filter(usg_rank < 6) %>%
mutate(`Player 1` = case_when(
usg_rank == 1 ~ namePlayer,
TRUE ~ "NA"
),
`Player 2` = case_when(
usg_rank == 2 ~ namePlayer,
TRUE ~ "NA"
),
`Player 3` = case_when(
usg_rank == 3 ~ namePlayer,
TRUE ~ "NA"
),
`Player 4` = case_when(
usg_rank == 4 ~ namePlayer,
TRUE ~ "NA"
),
`Player 5` = case_when(
usg_rank == 5 ~ namePlayer,
TRUE ~ "NA"
))
combining data frames
player_1 <- df_if2 %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
pred_season,
`Player 1`) %>%
filter( `Player 1` != "NA") %>%
inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
rename_with(.fn = ~ paste0(., ".1"), .cols = c(7:15)) %>%
select(2:5, 7:14)
player_2 <- df_if2 %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
pred_season,
`Player 2`) %>%
filter( `Player 2` != "NA") %>%
inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
rename_with(.fn = ~ paste0(., ".2"), .cols = c(7:15)) %>%
select(2:5, 7:14)
player_3 <- df_if2 %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
pred_season,
`Player 3`) %>%
filter( `Player 3` != "NA") %>%
inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
rename_with(.fn = ~ paste0(., ".3"), .cols = c(7:15)) %>%
select(2:5, 7:14)
player_4 <- df_if2 %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
pred_season,
`Player 4`) %>%
filter( `Player 4` != "NA") %>%
inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
rename_with(.fn = ~ paste0(., ".4"), .cols = c(7:15)) %>%
select(2:5, 7:14)
player_5 <- df_if2 %>%
select(namePlayer,
pm_per48,
TeamAbbreviation,
pred_season,
`Player 5`) %>%
filter( `Player 5` != "NA") %>%
inner_join(select_stats, by = c("namePlayer", "pred_season")) %>%
rename_with(.fn = ~ paste0(., ".5"), .cols = c(7:15)) %>%
select(2:5, 7:14)
player_lineup <- inner_join(player_1, player_2, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
inner_join(player_3, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
inner_join(player_4, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
inner_join(player_5, by = c("TeamAbbreviation", "pm_per48", "pred_season")) %>%
distinct() %>%
na.omit()
data splitting
model_df <- player_lineup %>%
arrange()
set.seed(3445)
v_split <- initial_split(model_df, prop = .90, strata = pm_per48)
v_train <- v_split %>%
training()
v_train_c <- v_train %>%
select(1, 5:12, 14:21, 23:30, 32:39, 41:48)
v_test <- v_split %>%
testing()
v_test_c <- v_test %>%
select(1, 5:12, 14:21, 23:30, 32:39, 41:48)
sd(v_train$pm_per48)
## [1] 13.11273
SVM
svm_model <- svm_linear() %>%
set_engine("kernlab") %>%
set_mode("regression")
svm_fit <- svm_model %>%
fit(pm_per48 ~ ., data = v_train_c)
## Setting default kernel parameters
svm_preds <- predict(svm_fit, new_data = v_test_c) %>%
as.data.frame() %>%
cbind(v_test$pm_per48)
MAE(svm_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 9.18055
rand forest
rf_model <- rand_forest() %>%
set_engine("ranger") %>%
set_mode("regression")
rf_fit <- rf_model %>%
fit(pm_per48 ~ ., data = v_train_c)
rf_preds <- predict(rf_fit, new_data = v_test_c) %>%
as.data.frame() %>%
cbind(v_test$pm_per48)
MAE(rf_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 3.934291
gbm
gbm_model <- boost_tree() %>%
set_engine("xgboost") %>%
set_mode("regression")
gbm_fit <- gbm_model %>%
fit(pm_per48 ~ ., data = v_train_c)
gbm_preds <- predict(gbm_fit, new_data = v_test_c) %>%
as.data.frame() %>%
cbind(v_test$pm_per48)
MAE(gbm_preds$.pred, v_test$pm_per48, na.rm = FALSE)
## [1] 4.471482
data for predictions # adjust data for 2025 rosters, could not find data source for ’25 rosters
stats_24 <- select_stats %>%
filter(pred_season == 2024)
player_pool <- stats_24 %>%
select(-c(pred_season)) %>%
rename(team = slugTeamBREF) %>%
distinct(namePlayer, .keep_all = TRUE) %>%
na.omit()
lineup function
lineup_function <- function(usage_1, usage_2, usage_3, usage_4, option) {
starter_1 <- player_pool %>%
filter(namePlayer == usage_1) %>%
rename_with(.fn = ~ paste0(., ".1"), .cols = c(2:11)) %>%
select(-c(1:2))
starter_2 <- player_pool %>%
filter(namePlayer == usage_2) %>%
rename_with(.fn = ~ paste0(., ".2"), .cols = c(2:11)) %>%
select(-c(1:2))
starter_3 <- player_pool %>%
filter(namePlayer == usage_3) %>%
rename_with(.fn = ~ paste0(., ".3"), .cols = c(2:11)) %>%
select(-c(1:2))
starter_4 <- player_pool %>%
filter(namePlayer == usage_4) %>%
rename_with(.fn = ~ paste0(., ".4"), .cols = c(2:11)) %>%
select(-c(1:2))
starter_5 <- player_pool %>%
filter(namePlayer == option) %>%
rename_with(.fn = ~ paste0(., ".5"), .cols = c(2:11)) %>%
select(-c(1:2))
pred_df <- cbind(starter_1, starter_2) %>%
cbind(starter_3) %>%
cbind(starter_4) %>%
cbind(starter_5)
svm <- predict(svm_fit, new_data = pred_df)
rf <- predict(rf_fit, pred_df)
gbm <- predict(gbm_fit, pred_df)
return((svm + rf + gbm)/3)
}
lineup_function("Nikola Jokic","Jamal Murray", "Michael Porter Jr.", "Aaron Gordon", "Christian Braun")
## .pred
## 1 13.14604
4 + 1 optimizer
set_players <- c("Nikola Jokic", "Jamal Murray","Michael Porter Jr.", "Aaron Gordon" )
# Possible player options from the player pool, excluding set players
possible_options <- player_pool %>%
filter(team == "DEN", !namePlayer %in% set_players) %>%
select(namePlayer) %>%
pull(namePlayer)
# Function to evaluate a lineup
optimize_lineup <- function(index) {
index <- round(index)
index <- max(min(index, length(possible_options)), 1)
option <- possible_options[index]
score <- lineup_function("Nikola Jokic", "Jamal Murray","Michael Porter Jr.", "Aaron Gordon", option)
return(as.numeric(score))
}
# Perform grid search over all possible options
scores <- sapply(1:length(possible_options), optimize_lineup)
# Find the index of the maximum score
best_index <- which.max(scores)
# Get the best player name
possible_options[best_index]
## [1] "Jack White"
3 + 2 optimizer
set_players <- c("Nikola Jokic", "Jamal Murray", "Michael Porter Jr.")
# Possible player options from the player pool, excluding set players
possible_options <- player_pool %>%
filter(team == "DEN", !namePlayer %in% set_players) %>%
select(namePlayer) %>%
pull(namePlayer)
# Function to evaluate a lineup with two additional players
optimize_lineup <- function(index1, index2) {
index1 <- max(min(round(index1), length(possible_options)), 1)
index2 <- max(min(round(index2), length(possible_options)), 1)
option1 <- possible_options[index1]
option2 <- possible_options[index2]
# Ensure the two options are different
if (option1 == option2) {
return(-Inf) # Return a very low score if the same player is selected twice
}
# Calculate the score
score <- lineup_function("Nikola Jokic", "Jamal Murray", "Michael Porter Jr.", option1, option2)
# Check if score is a list and extract numeric value
if (is.list(score)) {
score <- unlist(score) # Convert list to vector
score <- as.numeric(score) # Ensure it’s numeric
}
# Print diagnostic information
cat("Testing lineup:", option1, "&", option2, "- Score:", score, "\n")
return(score)
}
# Initialize best score tracking
best_score <- -Inf
best_indices <- c(NA, NA)
# Perform grid search over all possible pairs of options
for (i in 1:length(possible_options)) {
for (j in 1:length(possible_options)) {
if (i != j) {
score <- optimize_lineup(i, j)
if (score > best_score) {
best_score <- score
best_indices <- c(i, j)
}
}
}
}
## Testing lineup: Christian Braun & Bruce Brown - Score: 12.81038
## Testing lineup: Christian Braun & Kentavious Caldwell-Pope - Score: 12.22376
## Testing lineup: Christian Braun & Vlatko Cancar - Score: 13.01562
## Testing lineup: Christian Braun & Aaron Gordon - Score: 10.26757
## Testing lineup: Christian Braun & Jeff Green - Score: 10.61906
## Testing lineup: Christian Braun & DeAndre Jordan - Score: 10.05918
## Testing lineup: Christian Braun & Zeke Nnaji - Score: 11.19497
## Testing lineup: Christian Braun & Ish Smith - Score: 12.30803
## Testing lineup: Christian Braun & Peyton Watson - Score: 12.39207
## Testing lineup: Christian Braun & Jack White - Score: 14.5304
## Testing lineup: Bruce Brown & Christian Braun - Score: 11.42019
## Testing lineup: Bruce Brown & Kentavious Caldwell-Pope - Score: 13.01836
## Testing lineup: Bruce Brown & Vlatko Cancar - Score: 13.43407
## Testing lineup: Bruce Brown & Aaron Gordon - Score: 11.00125
## Testing lineup: Bruce Brown & Jeff Green - Score: 10.6682
## Testing lineup: Bruce Brown & DeAndre Jordan - Score: 10.71295
## Testing lineup: Bruce Brown & Zeke Nnaji - Score: 12.13993
## Testing lineup: Bruce Brown & Ish Smith - Score: 12.7724
## Testing lineup: Bruce Brown & Peyton Watson - Score: 13.20257
## Testing lineup: Bruce Brown & Jack White - Score: 14.28003
## Testing lineup: Kentavious Caldwell-Pope & Christian Braun - Score: 9.596965
## Testing lineup: Kentavious Caldwell-Pope & Bruce Brown - Score: 10.91677
## Testing lineup: Kentavious Caldwell-Pope & Vlatko Cancar - Score: 10.88597
## Testing lineup: Kentavious Caldwell-Pope & Aaron Gordon - Score: 9.622495
## Testing lineup: Kentavious Caldwell-Pope & Jeff Green - Score: 9.286749
## Testing lineup: Kentavious Caldwell-Pope & DeAndre Jordan - Score: 9.56706
## Testing lineup: Kentavious Caldwell-Pope & Zeke Nnaji - Score: 10.88296
## Testing lineup: Kentavious Caldwell-Pope & Ish Smith - Score: 11.36598
## Testing lineup: Kentavious Caldwell-Pope & Peyton Watson - Score: 11.6906
## Testing lineup: Kentavious Caldwell-Pope & Jack White - Score: 12.83109
## Testing lineup: Vlatko Cancar & Christian Braun - Score: 10.49234
## Testing lineup: Vlatko Cancar & Bruce Brown - Score: 11.92766
## Testing lineup: Vlatko Cancar & Kentavious Caldwell-Pope - Score: 12.01056
## Testing lineup: Vlatko Cancar & Aaron Gordon - Score: 10.53862
## Testing lineup: Vlatko Cancar & Jeff Green - Score: 10.30396
## Testing lineup: Vlatko Cancar & DeAndre Jordan - Score: 10.34721
## Testing lineup: Vlatko Cancar & Zeke Nnaji - Score: 11.78844
## Testing lineup: Vlatko Cancar & Ish Smith - Score: 12.01256
## Testing lineup: Vlatko Cancar & Peyton Watson - Score: 12.65775
## Testing lineup: Vlatko Cancar & Jack White - Score: 13.46175
## Testing lineup: Aaron Gordon & Christian Braun - Score: 13.14604
## Testing lineup: Aaron Gordon & Bruce Brown - Score: 14.62781
## Testing lineup: Aaron Gordon & Kentavious Caldwell-Pope - Score: 14.99774
## Testing lineup: Aaron Gordon & Vlatko Cancar - Score: 15.45113
## Testing lineup: Aaron Gordon & Jeff Green - Score: 12.46436
## Testing lineup: Aaron Gordon & DeAndre Jordan - Score: 12.95941
## Testing lineup: Aaron Gordon & Zeke Nnaji - Score: 13.15494
## Testing lineup: Aaron Gordon & Ish Smith - Score: 14.00192
## Testing lineup: Aaron Gordon & Peyton Watson - Score: 15.32453
## Testing lineup: Aaron Gordon & Jack White - Score: 16.48719
## Testing lineup: Jeff Green & Christian Braun - Score: 9.985766
## Testing lineup: Jeff Green & Bruce Brown - Score: 11.80155
## Testing lineup: Jeff Green & Kentavious Caldwell-Pope - Score: 11.13157
## Testing lineup: Jeff Green & Vlatko Cancar - Score: 11.50021
## Testing lineup: Jeff Green & Aaron Gordon - Score: 9.907702
## Testing lineup: Jeff Green & DeAndre Jordan - Score: 9.68252
## Testing lineup: Jeff Green & Zeke Nnaji - Score: 11.12452
## Testing lineup: Jeff Green & Ish Smith - Score: 11.89651
## Testing lineup: Jeff Green & Peyton Watson - Score: 11.94738
## Testing lineup: Jeff Green & Jack White - Score: 13.26404
## Testing lineup: DeAndre Jordan & Christian Braun - Score: 13.29611
## Testing lineup: DeAndre Jordan & Bruce Brown - Score: 15.29853
## Testing lineup: DeAndre Jordan & Kentavious Caldwell-Pope - Score: 13.63813
## Testing lineup: DeAndre Jordan & Vlatko Cancar - Score: 14.25197
## Testing lineup: DeAndre Jordan & Aaron Gordon - Score: 13.84443
## Testing lineup: DeAndre Jordan & Jeff Green - Score: 13.13885
## Testing lineup: DeAndre Jordan & Zeke Nnaji - Score: 13.37733
## Testing lineup: DeAndre Jordan & Ish Smith - Score: 15.69764
## Testing lineup: DeAndre Jordan & Peyton Watson - Score: 15.9682
## Testing lineup: DeAndre Jordan & Jack White - Score: 17.0646
## Testing lineup: Zeke Nnaji & Christian Braun - Score: 10.2177
## Testing lineup: Zeke Nnaji & Bruce Brown - Score: 12.01631
## Testing lineup: Zeke Nnaji & Kentavious Caldwell-Pope - Score: 11.61722
## Testing lineup: Zeke Nnaji & Vlatko Cancar - Score: 12.17461
## Testing lineup: Zeke Nnaji & Aaron Gordon - Score: 9.241953
## Testing lineup: Zeke Nnaji & Jeff Green - Score: 9.859347
## Testing lineup: Zeke Nnaji & DeAndre Jordan - Score: 9.188334
## Testing lineup: Zeke Nnaji & Ish Smith - Score: 11.51874
## Testing lineup: Zeke Nnaji & Peyton Watson - Score: 11.54159
## Testing lineup: Zeke Nnaji & Jack White - Score: 13.83217
## Testing lineup: Ish Smith & Christian Braun - Score: 12.73167
## Testing lineup: Ish Smith & Bruce Brown - Score: 13.8487
## Testing lineup: Ish Smith & Kentavious Caldwell-Pope - Score: 14.5614
## Testing lineup: Ish Smith & Vlatko Cancar - Score: 14.7328
## Testing lineup: Ish Smith & Aaron Gordon - Score: 13.02006
## Testing lineup: Ish Smith & Jeff Green - Score: 12.15873
## Testing lineup: Ish Smith & DeAndre Jordan - Score: 13.11353
## Testing lineup: Ish Smith & Zeke Nnaji - Score: 12.87061
## Testing lineup: Ish Smith & Peyton Watson - Score: 15.67459
## Testing lineup: Ish Smith & Jack White - Score: 16.12249
## Testing lineup: Peyton Watson & Christian Braun - Score: 10.66258
## Testing lineup: Peyton Watson & Bruce Brown - Score: 12.44581
## Testing lineup: Peyton Watson & Kentavious Caldwell-Pope - Score: 12.01607
## Testing lineup: Peyton Watson & Vlatko Cancar - Score: 12.70016
## Testing lineup: Peyton Watson & Aaron Gordon - Score: 10.69442
## Testing lineup: Peyton Watson & Jeff Green - Score: 10.29612
## Testing lineup: Peyton Watson & DeAndre Jordan - Score: 10.65856
## Testing lineup: Peyton Watson & Zeke Nnaji - Score: 10.88338
## Testing lineup: Peyton Watson & Ish Smith - Score: 11.98263
## Testing lineup: Peyton Watson & Jack White - Score: 14.3618
## Testing lineup: Jack White & Christian Braun - Score: 11.49973
## Testing lineup: Jack White & Bruce Brown - Score: 12.83736
## Testing lineup: Jack White & Kentavious Caldwell-Pope - Score: 12.44981
## Testing lineup: Jack White & Vlatko Cancar - Score: 13.09586
## Testing lineup: Jack White & Aaron Gordon - Score: 12.06232
## Testing lineup: Jack White & Jeff Green - Score: 11.09656
## Testing lineup: Jack White & DeAndre Jordan - Score: 12.00284
## Testing lineup: Jack White & Zeke Nnaji - Score: 12.67209
## Testing lineup: Jack White & Ish Smith - Score: 13.643
## Testing lineup: Jack White & Peyton Watson - Score: 14.31534
# Get the best player names
best_player1 <- possible_options[best_indices[1]]
best_player2 <- possible_options[best_indices[2]]
shiny app - 4 + 1
ui <- fluidPage(
titlePanel("Lineup Optimizer"),
sidebarLayout(
sidebarPanel(
helpText("Note: The order of players matters. Usage Player 1 is expected to lead the lineup in usage and so forth with the last Player being the player with the lowest expected usage."),
selectInput("team", "Select Team:", choices = unique(player_pool$team)),
selectInput("set_player1", "Usage Player 1:", choices = NULL),
selectInput("set_player2", "Usage Player 2:", choices = NULL),
selectInput("set_player3", "Usage Player 3:", choices = NULL),
selectInput("set_player4", "Usage Player 4:", choices = NULL),
checkboxGroupInput("exclude_players", "Exclude Players:",
choices = NULL,
selected = NULL),
actionButton("optimize", "Optimize Lineup")
),
mainPanel(
h3("Set Players"),
verbatimTextOutput("set_players"),
h3("Best Additional Player"),
verbatimTextOutput("best_player"),
h3("Scores"),
tableOutput("score_table")
)
)
)
server <- function(input, output, session) {
observe({
team_players <- player_pool %>%
filter(team == input$team) %>%
select(namePlayer) %>%
pull(namePlayer)
updateSelectInput(session, "set_player1", choices = team_players, selected = team_players[1])
updateSelectInput(session, "set_player2", choices = team_players, selected = team_players[2])
updateSelectInput(session, "set_player3", choices = team_players, selected = team_players[3])
updateSelectInput(session, "set_player4", choices = team_players, selected = team_players[4])
updateCheckboxGroupInput(session, "exclude_players", choices = team_players, selected = NULL)
})
observeEvent(input$optimize, {
set_players <- c(input$set_player1, input$set_player2, input$set_player3, input$set_player4)
possible_options <- player_pool %>%
filter(team == input$team, !namePlayer %in% set_players, !namePlayer %in% input$exclude_players) %>%
select(namePlayer) %>%
pull(namePlayer)
optimize_lineup <- function(index) {
index <- round(index)
index <- max(min(index, length(possible_options)), 1)
option <- possible_options[index]
score <- lineup_function(input$set_player1, input$set_player2, input$set_player3, input$set_player4, option)
return(as.numeric(score))
}
scores <- sapply(1:length(possible_options), optimize_lineup)
best_index <- which.max(scores)
best_player <- possible_options[best_index]
output$set_players <- renderText({ paste(set_players, collapse = ", ") })
output$best_player <- renderText({ best_player })
output$score_table <- renderTable({
data.frame(Player = possible_options, Score = scores)
})
})
}
shinyApp(ui = ui, server = server)
shiny app - 3 + 2
ui_2 <- fluidPage(
titlePanel("Lineup Optimization"),
sidebarLayout(
sidebarPanel(
helpText("Note: The order of players matters. Usage Player 1 is expected to lead the lineup in usage and so forth with the last Player being the player with the lowest expected usage."),
selectInput("team_filter", "Select Team:",
choices = unique(player_pool$team),
selected = NULL,
multiple = FALSE),
selectInput("set_player1", "Select Usage Player 1:",
choices = NULL,
selected = NULL),
selectInput("set_player2", "Select Usage Player 2:",
choices = NULL,
selected = NULL),
selectInput("set_player3", "Select Usage Player 3:",
choices = NULL,
selected = NULL),
checkboxGroupInput("exclude_players", "Exclude Players:",
choices = NULL,
selected = NULL),
actionButton("run_optimization", "Run Optimization")
),
mainPanel(
textOutput("best_players")
)
)
)
server_2 <- function(input, output, session) {
observe({
req(input$team_filter)
team_players <- player_pool %>%
filter(team == input$team_filter) %>%
pull(namePlayer)
updateSelectInput(session, "set_player1", choices = team_players, selected = NULL)
updateSelectInput(session, "set_player2", choices = team_players, selected = NULL)
updateSelectInput(session, "set_player3", choices = team_players, selected = NULL)
updateCheckboxGroupInput(session, "exclude_players", choices = team_players, selected = NULL)
})
observeEvent(input$run_optimization, {
req(input$set_player1, input$set_player2, input$set_player3)
set_players <- c(input$set_player1, input$set_player2, input$set_player3)
# Possible player options from the player pool, excluding set and excluded players
possible_options <- player_pool %>%
filter(team == input$team_filter, !namePlayer %in% set_players, !namePlayer %in% input$exclude_players) %>%
pull(namePlayer)
# Check if possible_options is empty
if (length(possible_options) < 2) {
output$best_players <- renderText({
"Not enough players available for optimization."
})
return()
}
# Function to evaluate a lineup with two additional players
optimize_lineup <- function(index1, index2) {
index1 <- max(min(round(index1), length(possible_options)), 1)
index2 <- max(min(round(index2), length(possible_options)), 1)
option1 <- possible_options[index1]
option2 <- possible_options[index2]
# Ensure the two options are different
if (option1 == option2) {
return(-Inf) # Return a very low score if the same player is selected twice
}
# Calculate the score
score <- lineup_function(set_players[1], set_players[2], set_players[3], option1, option2)
# Ensure score is numeric
if (is.list(score)) {
score <- unlist(score) # Convert list to vector
score <- as.numeric(score) # Ensure it’s numeric
}
# Print diagnostic information
if (is.numeric(score)) {
cat("Testing lineup:", option1, "&", option2, "- Score:", score, "\n")
} else {
cat("Testing lineup:", option1, "&", option2, "- Score: Error\n")
}
return(score)
}
# Initialize best score tracking
best_score <- -Inf
best_indices <- c(NA, NA)
# Perform grid search over all possible pairs of options
for (i in 1:length(possible_options)) {
for (j in 1:length(possible_options)) {
if (i != j) {
score <- optimize_lineup(i, j)
if (score > best_score) {
best_score <- score
best_indices <- c(i, j)
}
}
}
}
# Get the best player names
best_player1 <- possible_options[best_indices[1]]
best_player2 <- possible_options[best_indices[2]]
output$best_players <- renderText({
paste("Best players to add:", best_player1, "and", best_player2)
})
})
}
shinyApp(ui = ui_2, server = server_2)
model check for ’24
v_24 <- v_train %>%
filter(pred_season == 2024)
v_24c <- v_24 %>%
select(1, 5:12, 14:21, 23:30, 32:39, 41:48)
svm_24 <- predict(svm_fit, new_data = v_24c) %>%
as.data.frame() %>%
rename(svm_pred = .pred)
rf_24 <- predict(rf_fit, new_data = v_24c) %>%
as.data.frame() %>%
rename(rf_pred = .pred)
gbm_24 <- predict(gbm_fit, new_data = v_24c) %>%
as.data.frame() %>%
rename(gbm_pred = .pred)
df_24 <- cbind(svm_24, rf_24) %>%
cbind(gbm_24) %>%
mutate(combined_pred = (svm_pred + rf_pred + gbm_pred)/3) %>%
cbind(v_24$pm_per48)
RMSE(df_24$combined_pred, df_24$`v_24$pm_per48`)
## [1] 5.156495
mean(df_24$combined_pred)
## [1] 4.319881
mean(df_24$`v_24$pm_per48`)
## [1] 3.637195