Packages & Data Manipulation
library(tidyverse)
library(jsonlite)
library(janitor)
library(tidymodels)
library(ggplot2)
library(rsample)
library(parsnip)
library(BBmisc)
library(corrplot)
library(rvest)
library(reactable)
library(reactablefmtr)
pbp_play_23_url <- "https://api.pbpstats.com/get-totals/nba?Season=2022-23&SeasonType=Playoffs&Type=Player"
pbp_play_23_json <- read_json(pbp_play_23_url)
pbp_play_23 <- pbp_play_23_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names() %>%
mutate( playoff_minutes_per_game = minutes/games_played) %>%
select(
name,
playoff_minutes_per_game
)
pbp_play_23[is.na(pbp_play_23)] = 0
pbp_reg_22_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Regular%2BSeason&Type=Player"
pbp_reg_22_json <- read_json(pbp_reg_22_url)
pbp_reg_22 <- pbp_reg_22_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names() %>%
select(
name,
games_played,
minutes,
plus_minus,
on_off_rtg,
on_def_rtg,
assisted2s_pct,
assisted3s_pct,
fg3pct,
second_chance_fg3pct,
fg2pct,
efg_pct,
ts_pct,
fg3a_pct,
usage,
live_ball_turnover_pct,
at_rim_frequency,
at_rim_accuracy,
at_rim_pct_assisted,
short_mid_range_frequency,
short_mid_range_accuracy,
long_mid_range_frequency,
long_mid_range_accuracy,
corner3frequency,
corner3pct_assisted,
arc3frequency,
arc3accuracy,
arc3pct_assisted,
shot_quality_avg,
shooting_fouls_drawn_pct
)
pbp_reg_22[is.na(pbp_reg_22)] = 0
pbp_play_22_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Playoffs&Type=Player"
pbp_play_22_json <- read_json(pbp_play_22_url)
pbp_play_22 <- pbp_play_22_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names() %>%
mutate( playoff_minutes_per_game = minutes/games_played) %>%
select(
name,
playoff_minutes_per_game
)
pbp_play_22[is.na(pbp_play_22)] = 0
pbp_22 <- left_join(pbp_play_23, pbp_reg_22) %>%
replace(is.na(.), 0) %>%
mutate(season = 2022)
pbp_reg_21_url <- "https://api.pbpstats.com/get-totals/nba?Season=2020-21&SeasonType=Regular%2BSeason&FromDate=2020-12-23&StartType=All&ToDate=2021-03-08&Type=Player"
pbp_reg_21_json <- read_json(pbp_reg_21_url)
pbp_reg_21 <- pbp_reg_21_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names() %>%
select(
name,
games_played,
minutes,
plus_minus,
on_off_rtg,
on_def_rtg,
assisted2s_pct,
assisted3s_pct,
fg3pct,
second_chance_fg3pct,
fg2pct,
efg_pct,
ts_pct,
fg3a_pct,
usage,
live_ball_turnover_pct,
at_rim_frequency,
at_rim_accuracy,
at_rim_pct_assisted,
short_mid_range_frequency,
short_mid_range_accuracy,
long_mid_range_frequency,
long_mid_range_accuracy,
corner3frequency,
corner3pct_assisted,
arc3frequency,
arc3accuracy,
arc3pct_assisted,
shot_quality_avg,
shooting_fouls_drawn_pct
)
pbp_reg_21[is.na(pbp_reg_21)] = 0
pbp_play_21_url <- "https://api.pbpstats.com/get-totals/nba?Season=2021-22&SeasonType=Playoffs&Type=Player"
pbp_play_21_json <- read_json(pbp_play_21_url)
pbp_play_21 <- pbp_play_21_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names() %>%
mutate( playoff_minutes_per_game = minutes/games_played) %>%
select(
name,
playoff_minutes_per_game
)
pbp_play_21[is.na(pbp_play_21)] = 0
pbp_21 <- left_join(pbp_play_22, pbp_reg_21) %>%
replace(is.na(.), 0) %>%
mutate(season = 2021)
player_df <- rbind(pbp_22, pbp_21) %>%
mutate( playoff_roto = case_when(
playoff_minutes_per_game > 5 ~ "Yes",
TRUE ~ "No"
)
)
salaries_22 <- read_html("https://hoopshype.com/salaries/players/2021-2022/") %>%
html_nodes("table") %>%
html_table(fill = TRUE)
salaries_22 <- salaries_22[[1]] %>%
clean_names() %>%
select(x2,
x3)
salaries_22$x3 <- as.numeric(gsub('[$,]','', salaries_22$x3))
salaries_22 <- salaries_22 %>%
mutate( season = 2022) %>%
mutate(minimums = case_when(
x3 < 2700000 ~ "minimum",
TRUE ~ "other"
)
) %>%
filter( minimums == "minimum")
salaries_21 <- read_html("https://hoopshype.com/salaries/players/2020-2021/") %>%
html_nodes("table") %>%
html_table(fill = TRUE)
salaries_21 <- salaries_21[[1]] %>%
clean_names() %>%
select(x2,
x3)
salaries_21$x3 <- as.numeric(gsub('[$,]','', salaries_21$x3))
salaries_21 <- salaries_21 %>%
mutate( season = 2021) %>%
mutate(minimums = case_when(
x3 < 2600000 ~ "minimum",
TRUE ~ "other"
)
) %>%
filter( minimums == "minimum")
salaries_21_to_22 <- rbind(salaries_21, salaries_22) %>%
as_data_frame() %>%
rename(name = x2)
f <- left_join(salaries_21_to_22, player_df, by = c("name", "season")) %>%
select(-c(minimums,x3)) %>%
filter(playoff_roto != "N/A")
f$playoff_roto <- as.factor(f$playoff_roto)
Data Splitting
set.seed(132)
f_split <- initial_split(f, prop = .75, strata = playoff_roto)
train <- f_split %>%
training()
train_clean <- train %>%
select(-c(1:3))
test <- f_split %>%
testing()
test_clean <- test %>%
select(-c(1:3))
Recipe & Unused log_reg model
- Ultimately, I did not use the logistic regression model because it
had too low of a sensitivity.
f_model <- logistic_reg() %>%
set_engine('glm') %>%
set_mode('classification')
f_recipe <- recipe(playoff_roto ~ .,
data = train_clean) %>%
step_corr(all_numeric(), threshold = .50) %>%
step_normalize(all_numeric())
f_recipe_prep <- f_recipe %>%
prep(training = train_clean)
f_training_prep <- f_recipe_prep %>%
bake(new_data = NULL)
f_test_prep <- f_recipe_prep %>%
bake(new_data = test_clean)
f_fit <- f_model %>%
fit(playoff_roto ~ .,
data = f_training_prep)
f_workflow <- workflow() %>%
add_model(f_model) %>%
add_recipe(f_recipe)
f_wkfl_fit <- f_workflow %>%
last_fit(split = f_split)
f_preds <- f_wkfl_fit %>%
collect_predictions()
f_metrics <- metric_set(roc_auc, sens, spec)
Knn Model
library(class)
f_training_prep1 <- f_training_prep %>%
mutate(factor = case_when(
playoff_roto == "Yes" ~ 1,
TRUE ~ 0
)) %>%
select(-playoff_roto)
f_test_prep1 <- f_test_prep %>%
mutate(factor = case_when(
playoff_roto == "Yes" ~ 1,
TRUE ~ 0
)) %>%
select(-playoff_roto)
test_pred_knn <- knn(
train = f_training_prep1,
test = f_test_prep1,
cl = f_training_prep1$factor,
k = 10
)
actual <- f_test_prep1$factor
cm <- table(actual,test_pred_knn)
cm
## test_pred_knn
## actual 0 1
## 0 10 1
## 1 1 18
accuracy <- sum(diag(cm))/length(actual)
sprintf("Accuracy: %.2f%%", accuracy*100)
## [1] "Accuracy: 93.33%"
prob_test <- knn(
train = f_training_prep1,
test = f_test_prep1,
cl = f_training_prep1$factor,
k = 10,
prob = TRUE
)
pbp_reg_23_url <- "https://api.pbpstats.com/get-totals/nba?Season=2022-23&SeasonType=Regular%2BSeason&Type=Player"
pbp_reg_23_json <- read_json(pbp_reg_23_url)
pbp_reg_23_2 <- pbp_reg_23_json[["multi_row_table_data"]] %>%
bind_rows() %>%
clean_names()
pbp_reg_23 <- pbp_reg_23_2 %>%
select(
name,
games_played,
minutes,
plus_minus,
on_off_rtg,
on_def_rtg,
assisted2s_pct,
assisted3s_pct,
fg3pct,
second_chance_fg3pct,
fg2pct,
efg_pct,
ts_pct,
fg3a_pct,
usage,
live_ball_turnover_pct,
at_rim_frequency,
at_rim_accuracy,
at_rim_pct_assisted,
short_mid_range_frequency,
short_mid_range_accuracy,
long_mid_range_frequency,
long_mid_range_accuracy,
corner3frequency,
corner3pct_assisted,
arc3frequency,
arc3accuracy,
arc3pct_assisted,
shot_quality_avg,
shooting_fouls_drawn_pct
)
pbp_reg_23[is.na(pbp_reg_23)] = 0
name_team <- pbp_reg_23_2 %>%
select(
name,
team_abbreviation
) %>%
rename(Player = name)
salaries_24 <- read_html("https://hoopshype.com/salaries/players/") %>%
html_nodes("table") %>%
html_table(fill = TRUE)
salaries_24 <- salaries_24[[1]] %>%
clean_names() %>%
select(x2,
x3)
salaries_24$x3 <- as.numeric(gsub('[$,]','', salaries_24$x3))
salaries_24 <- salaries_24 %>%
mutate( season = 2024) %>%
mutate(minimums = case_when(
x3 < 3200000 ~ "minimum",
TRUE ~ "other"
)
) %>%
filter( minimums == "minimum") %>%
rename(name = x2) %>%
select(name)
mins_24 <- right_join(pbp_reg_23, salaries_24) %>% drop_na()
t2 <- f_recipe_prep %>% bake(new_data = mins_24)
t3 <- knn(
train = f_training_prep1[,-11],
test = t2,
cl = f_training_prep1$factor,
k = 10,
prob = TRUE
)
probs1 <- attributes(t3)
final_df <- cbind(mins_24$name, t3) %>%
cbind(probs1$prob) %>%
as.data.frame() %>%
rename(Player = V1,
Probability = V3) %>%
mutate('Playoff Rotation' = case_when(
t3 == 2 ~ "Yes",
TRUE ~ "No"
)) %>%
select(
Player,
'Playoff Rotation',
Probability
)
Predictions for ’24 Playoffs
final_df %>%
reactable( theme = espn(),
searchable = TRUE,
filterable = TRUE,
sortable = TRUE) %>%
add_title("All minimum players in 2023-2024 season")
All minimum players in 2023-2024 season
final_df %>%
filter(final_df$`Playoff Rotation` == "Yes") %>%
filter(Probability > .5) %>%
reactable( theme = espn(),
searchable = TRUE,
filterable = TRUE,
sortable = TRUE) %>%
add_title("Minimum players projected to be in '24 Playoff rotations")
Minimum players projected to be in '24 Playoff rotations