Packages & Data Manipulation

library(tidyverse)
library(jsonlite)
library(janitor)
library(tidymodels)
library(ggplot2)
library(rsample)
library(parsnip)
library(BBmisc)
library(corrplot)
library(rvest)
library(reactable)
library(reactablefmtr)
pbp_play_23_url <- "https://api.pbpstats.com/get-totals/nba?Season=2022-23&SeasonType=Playoffs&Type=Player"
pbp_play_23_json <- read_json(pbp_play_23_url)
pbp_play_23 <- pbp_play_23_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() %>%
  mutate( playoff_minutes_per_game = minutes/games_played) %>%
  select(
    name,
    playoff_minutes_per_game
  )
pbp_play_23[is.na(pbp_play_23)] = 0
pbp_reg_22_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Regular%2BSeason&Type=Player"
pbp_reg_22_json <- read_json(pbp_reg_22_url)
pbp_reg_22 <- pbp_reg_22_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() %>%
  select(
    name,
    games_played,
    minutes,
    plus_minus,
    on_off_rtg,
    on_def_rtg,
    assisted2s_pct,
    assisted3s_pct,
    fg3pct,
    second_chance_fg3pct,
    fg2pct,
    efg_pct,
    ts_pct,
    fg3a_pct,
    usage,
    live_ball_turnover_pct,
    at_rim_frequency,
    at_rim_accuracy,
    at_rim_pct_assisted,
    short_mid_range_frequency,
    short_mid_range_accuracy,
    long_mid_range_frequency,
    long_mid_range_accuracy,
    corner3frequency,
    corner3pct_assisted,
    arc3frequency,
    arc3accuracy,
    arc3pct_assisted,
    shot_quality_avg,
    shooting_fouls_drawn_pct
  )
pbp_reg_22[is.na(pbp_reg_22)] = 0

pbp_play_22_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Playoffs&Type=Player"
pbp_play_22_json <- read_json(pbp_play_22_url)
pbp_play_22 <- pbp_play_22_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() %>%
  mutate( playoff_minutes_per_game = minutes/games_played) %>%
  select(
    name,
    playoff_minutes_per_game
  )
pbp_play_22[is.na(pbp_play_22)] = 0

pbp_22 <- left_join(pbp_play_23, pbp_reg_22) %>%
  replace(is.na(.), 0) %>%
  mutate(season = 2022)
pbp_reg_21_url <- "https://api.pbpstats.com/get-totals/nba?Season=2020-21&SeasonType=Regular%2BSeason&FromDate=2020-12-23&StartType=All&ToDate=2021-03-08&Type=Player"
pbp_reg_21_json <- read_json(pbp_reg_21_url)
pbp_reg_21 <- pbp_reg_21_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() %>%
  select(
    name,
    games_played,
    minutes,
    plus_minus,
    on_off_rtg,
    on_def_rtg,
    assisted2s_pct,
    assisted3s_pct,
    fg3pct,
    second_chance_fg3pct,
    fg2pct,
    efg_pct,
    ts_pct,
    fg3a_pct,
    usage,
    live_ball_turnover_pct,
    at_rim_frequency,
    at_rim_accuracy,
    at_rim_pct_assisted,
    short_mid_range_frequency,
    short_mid_range_accuracy,
    long_mid_range_frequency,
    long_mid_range_accuracy,
    corner3frequency,
    corner3pct_assisted,
    arc3frequency,
    arc3accuracy,
    arc3pct_assisted,
    shot_quality_avg,
    shooting_fouls_drawn_pct
  )
pbp_reg_21[is.na(pbp_reg_21)] = 0

pbp_play_21_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Playoffs&Type=Player"
pbp_play_21_json <- read_json(pbp_play_21_url)
pbp_play_21 <- pbp_play_21_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() %>%
  mutate( playoff_minutes_per_game = minutes/games_played) %>%
  select(
    name,
    playoff_minutes_per_game
  )
pbp_play_21[is.na(pbp_play_21)] = 0

pbp_21 <- left_join(pbp_play_22, pbp_reg_21) %>%
  replace(is.na(.), 0) %>%
  mutate(season = 2021)
player_df <- rbind(pbp_22, pbp_21) %>%
  mutate( playoff_roto = case_when(
    playoff_minutes_per_game > 5 ~ "Yes",
    TRUE ~ "No"
  )
  )
salaries_22 <- read_html("https://hoopshype.com/salaries/players/2021-2022/") %>% 
  html_nodes("table") %>%
  html_table(fill = TRUE)

salaries_22 <- salaries_22[[1]] %>%
  clean_names() %>%
  select(x2,
         x3) 

salaries_22$x3 <- as.numeric(gsub('[$,]','', salaries_22$x3))

salaries_22 <- salaries_22 %>%
  mutate( season = 2022) %>%
  mutate(minimums = case_when(
    x3 < 2700000 ~ "minimum", 
    TRUE ~ "other"
  )
  ) %>%
  filter( minimums == "minimum")
salaries_21 <- read_html("https://hoopshype.com/salaries/players/2020-2021/") %>% 
  html_nodes("table") %>%
  html_table(fill = TRUE)

salaries_21 <- salaries_21[[1]] %>%
  clean_names() %>%
  select(x2,
         x3) 

salaries_21$x3 <- as.numeric(gsub('[$,]','', salaries_21$x3))

salaries_21 <- salaries_21 %>%
  mutate( season = 2021) %>%
  mutate(minimums = case_when(
    x3 < 2600000 ~ "minimum", 
    TRUE ~ "other"
  )
  ) %>%
  filter( minimums == "minimum")
salaries_21_to_22 <- rbind(salaries_21, salaries_22) %>% 
  as_data_frame() %>%
  rename(name = x2)



f <- left_join(salaries_21_to_22, player_df, by = c("name", "season")) %>%
  select(-c(minimums,x3)) %>%
  filter(playoff_roto != "N/A")

f$playoff_roto <- as.factor(f$playoff_roto)

Data Splitting

set.seed(132)

f_split <- initial_split(f, prop = .75, strata = playoff_roto)

train <- f_split %>%
  training() 
train_clean <- train %>%
  select(-c(1:3))

test <- f_split %>%
  testing()
test_clean <- test %>%
  select(-c(1:3))

Recipe & Unused log_reg model

f_model <- logistic_reg() %>%
  set_engine('glm') %>%
  set_mode('classification')


f_recipe <- recipe(playoff_roto ~ .,
                   data = train_clean) %>%
  step_corr(all_numeric(), threshold = .50) %>%
  step_normalize(all_numeric())

f_recipe_prep <- f_recipe %>%
  prep(training = train_clean)

f_training_prep <- f_recipe_prep %>%
  bake(new_data = NULL)

f_test_prep <- f_recipe_prep %>%
  bake(new_data = test_clean)

f_fit <- f_model %>%
  fit(playoff_roto ~ .,
      data = f_training_prep)


f_workflow <- workflow() %>%
  add_model(f_model) %>%
  add_recipe(f_recipe)

f_wkfl_fit <- f_workflow %>%
  last_fit(split = f_split)

f_preds <- f_wkfl_fit %>%
  collect_predictions()

f_metrics <- metric_set(roc_auc, sens, spec)

Knn Model

library(class)

f_training_prep1 <- f_training_prep %>%
  mutate(factor = case_when(
    playoff_roto == "Yes" ~ 1,
    TRUE ~ 0
  )) %>%
  select(-playoff_roto)

f_test_prep1 <- f_test_prep %>%
  mutate(factor = case_when(
    playoff_roto == "Yes" ~ 1,
    TRUE ~ 0
  )) %>%
  select(-playoff_roto)

test_pred_knn <- knn(
  train = f_training_prep1,
  test = f_test_prep1,
  cl = f_training_prep1$factor,
  k = 10
)

actual <- f_test_prep1$factor

cm <- table(actual,test_pred_knn)
cm
##       test_pred_knn
## actual  0  1
##      0 10  1
##      1  1 18
accuracy <- sum(diag(cm))/length(actual)
sprintf("Accuracy: %.2f%%", accuracy*100)
## [1] "Accuracy: 93.33%"
prob_test  <- knn(
  train = f_training_prep1,
  test = f_test_prep1,
  cl = f_training_prep1$factor,
  k = 10,
  prob = TRUE
)
pbp_reg_23_url <- "https://api.pbpstats.com/get-totals/nba?Season=2022-23&SeasonType=Regular%2BSeason&Type=Player"
pbp_reg_23_json <- read_json(pbp_reg_23_url)
pbp_reg_23_2 <- pbp_reg_23_json[["multi_row_table_data"]] %>%
  bind_rows() %>%
  clean_names() 
pbp_reg_23 <- pbp_reg_23_2 %>%
  select(
    name,
    games_played,
    minutes,
    plus_minus,
    on_off_rtg,
    on_def_rtg,
    assisted2s_pct,
    assisted3s_pct,
    fg3pct,
    second_chance_fg3pct,
    fg2pct,
    efg_pct,
    ts_pct,
    fg3a_pct,
    usage,
    live_ball_turnover_pct,
    at_rim_frequency,
    at_rim_accuracy,
    at_rim_pct_assisted,
    short_mid_range_frequency,
    short_mid_range_accuracy,
    long_mid_range_frequency,
    long_mid_range_accuracy,
    corner3frequency,
    corner3pct_assisted,
    arc3frequency,
    arc3accuracy,
    arc3pct_assisted,
    shot_quality_avg,
    shooting_fouls_drawn_pct
  )

pbp_reg_23[is.na(pbp_reg_23)] = 0

name_team <- pbp_reg_23_2 %>%
  select(
    name,
    team_abbreviation
  ) %>%
  rename(Player = name)
salaries_24 <- read_html("https://hoopshype.com/salaries/players/") %>% 
  html_nodes("table") %>%
  html_table(fill = TRUE)

salaries_24 <- salaries_24[[1]] %>%
  clean_names() %>%
  select(x2,
         x3) 

salaries_24$x3 <- as.numeric(gsub('[$,]','', salaries_24$x3))

salaries_24 <- salaries_24 %>%
  mutate( season = 2024) %>%
  mutate(minimums = case_when(
    x3 < 3200000 ~ "minimum", 
    TRUE ~ "other"
  )
  ) %>%
  filter( minimums == "minimum") %>%
  rename(name = x2) %>% 
  select(name)
mins_24 <- right_join(pbp_reg_23, salaries_24) %>% drop_na()

t2 <- f_recipe_prep %>% bake(new_data = mins_24)

t3 <- knn(
  train = f_training_prep1[,-11],
  test = t2,
  cl = f_training_prep1$factor,
  k = 10,
  prob = TRUE
)

probs1 <- attributes(t3)

final_df <- cbind(mins_24$name, t3) %>%
  cbind(probs1$prob) %>%
  as.data.frame() %>%
  rename(Player = V1,
         Probability = V3) %>%
  mutate('Playoff Rotation' = case_when(
    t3 == 2 ~ "Yes",
    TRUE ~ "No"
  )) %>%
  select(
    Player,
    'Playoff Rotation',
    Probability
  )

Predictions for ’24 Playoffs

final_df %>%
  reactable( theme = espn(),
             searchable = TRUE,
             filterable = TRUE,
             sortable = TRUE) %>%
  add_title("All minimum players in 2023-2024 season")

All minimum players in 2023-2024 season

final_df %>%
  filter(final_df$`Playoff Rotation` == "Yes") %>%
  filter(Probability > .5) %>%
  reactable( theme = espn(),
             searchable = TRUE,
             filterable = TRUE,
             sortable = TRUE) %>%
  add_title("Minimum players projected to be in '24 Playoff rotations")

Minimum players projected to be in '24 Playoff rotations