library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(ggplot2)
library(dplyr)
library(infer)
## Warning: package 'infer' was built under R version 4.0.3
library(knitr)
library(htmlTable)
## Warning: package 'htmlTable' was built under R version 4.0.3
library(tinytex)
nba_pred <- read.csv(file = 'https://projects.fivethirtyeight.com/nba-model/2021/latest_RAPTOR_by_player.csv')
538 has undertaken an empirical method for identifying the most valuable players in the NBA, determining each player’s incremental impact on the game, compared against when they are not in the game or injured. The article can be found here: (https://projects.fivethirtyeight.com/nba-player-ratings/)
I’ve cut the data first into two datasets, separating the players into largest defensive and offensive impacts, removing players who score high in the RAPTOR metrics but who don’t play meaningful minutes.
str(nba_pred)
## 'data.frame': 474 obs. of 21 variables:
## $ player_name : chr "Precious Achiuwa" "Jaylen Adams" "Steven Adams" "Bam Adebayo" ...
## $ player_id : chr "achiupr01" "adamsja01" "adamsst01" "adebaba01" ...
## $ season : int 2021 2021 2021 2021 2021 2021 2021 2021 2021 2021 ...
## $ poss : int 712 21 1176 1307 1005 800 15 557 1161 1019 ...
## $ mp : int 330 9 575 631 480 384 8 258 547 477 ...
## $ raptor_box_offense : num -1.6345 -14.6309 -0.672 -0.0164 -2.7683 ...
## $ raptor_box_defense : num 4.03 -14.3 1.94 2.73 -1.14 ...
## $ raptor_box_total : num 2.39 -28.93 1.27 2.71 -3.91 ...
## $ raptor_onoff_offense: num -3.1939 -27.6696 4.9715 -0.4581 0.0805 ...
## $ raptor_onoff_defense: num -2.4 -15.4 -1.63 1.21 -6.67 ...
## $ raptor_onoff_total : num -5.593 -43.065 3.339 0.747 -6.585 ...
## $ raptor_offense : num -1.9755 -18.4427 0.4692 -0.0151 -2.3478 ...
## $ raptor_defense : num 2.94 -15.5 1.33 2.58 -2.3 ...
## $ raptor_total : num 0.969 -33.943 1.795 2.565 -4.644 ...
## $ war_total : num 0.63 -0.143 1.31 1.704 -0.46 ...
## $ war_reg_season : num 0.63 -0.143 1.31 1.704 -0.46 ...
## $ war_playoffs : int 0 0 0 0 0 0 0 0 0 0 ...
## $ predator_offense : num -3.584 -13.843 0.747 -2.41 -1.916 ...
## $ predator_defense : num 2.83 -14.64 1.65 2.06 -1.82 ...
## $ predator_total : num -0.754 -28.483 2.396 -0.348 -3.731 ...
## $ pace_impact : num 0.6653 0.0182 -1.7515 -0.4044 -0.8591 ...
mean(nba_pred$mp)
## [1] 325.1456
## find average number of minutes played to insure we're finding most efficient players who actually play
Here we will cut down the dataset to only those columns and rows that provide value for our analysis.
keepvars <- c("player_name",
"raptor_defense",
"raptor_offense",
"predator_defense",
"predator_offense",
"mp")
nba_df <- nba_pred[keepvars]
nba_df <- nba_df %>%
filter(mp > 330)
defense <- nba_df %>% arrange(player_name, raptor_defense,)
offense <- nba_df %>% arrange(player_name, raptor_offense)
summary(offense)
## player_name raptor_defense raptor_offense predator_defense
## Length:228 Min. :-5.68203 Min. :-7.0290 Min. :-6.01323
## Class :character 1st Qu.:-1.65042 1st Qu.:-1.5408 1st Qu.:-1.81498
## Mode :character Median :-0.01702 Median :-0.1022 Median :-0.03331
## Mean : 0.04832 Mean : 0.2353 Mean : 0.12891
## 3rd Qu.: 1.54711 3rd Qu.: 1.7894 3rd Qu.: 1.72277
## Max. : 8.65404 Max. : 8.5179 Max. : 8.15452
## predator_offense mp
## Min. :-6.34237 Min. :331.0
## 1st Qu.:-1.68244 1st Qu.:421.2
## Median :-0.03474 Median :535.5
## Mean : 0.22195 Mean :531.5
## 3rd Qu.: 1.75949 3rd Qu.:644.0
## Max. : 9.44382 Max. :845.0
summary(defense)
## player_name raptor_defense raptor_offense predator_defense
## Length:228 Min. :-5.68203 Min. :-7.0290 Min. :-6.01323
## Class :character 1st Qu.:-1.65042 1st Qu.:-1.5408 1st Qu.:-1.81498
## Mode :character Median :-0.01702 Median :-0.1022 Median :-0.03331
## Mean : 0.04832 Mean : 0.2353 Mean : 0.12891
## 3rd Qu.: 1.54711 3rd Qu.: 1.7894 3rd Qu.: 1.72277
## Max. : 8.65404 Max. : 8.5179 Max. : 8.15452
## predator_offense mp
## Min. :-6.34237 Min. :331.0
## 1st Qu.:-1.68244 1st Qu.:421.2
## Median :-0.03474 Median :535.5
## Mean : 0.22195 Mean :531.5
## 3rd Qu.: 1.75949 3rd Qu.:644.0
## Max. : 9.44382 Max. :845.0
defense <- arrange(defense, desc(raptor_defense))
offense <- arrange(offense, desc(raptor_offense))
head(defense)
## player_name raptor_defense raptor_offense predator_defense predator_offense
## 1 Jakob Poeltl 8.654041 -0.09931121 6.535604 0.088269625
## 2 Myles Turner 7.467195 -1.25560994 8.154517 -0.001981428
## 3 Clint Capela 7.260446 -1.63432841 6.811666 -2.362338434
## 4 Rudy Gobert 6.723990 -0.85862757 6.580522 -0.260675144
## 5 David Nwaba 6.399048 0.90348306 6.735025 -0.534508701
## 6 Mike Conley 6.148832 4.64603158 6.577255 5.125660526
## mp
## 1 457
## 2 638
## 3 564
## 4 655
## 5 384
## 6 654
head(offense)
## player_name raptor_defense raptor_offense predator_defense
## 1 Damian Lillard -5.6820304 8.517921 -5.5710616
## 2 Kawhi Leonard 3.5204031 8.424753 3.8552239
## 3 Nikola Jokic 0.2802321 8.308312 0.1859807
## 4 CJ McCollum 0.0464274 7.279798 0.2860003
## 5 James Harden -3.4764291 7.176594 -3.1545813
## 6 Stephen Curry -1.2022500 7.015721 -1.1258199
## predator_offense mp
## 1 8.799776 721
## 2 7.382325 651
## 3 9.443817 750
## 4 7.572090 440
## 5 6.592863 653
## 6 6.649459 739
In general, we can see that just because a player is valuable on the offensive end, it doesn’t mean that he is not a liability on the defensive end. Damian Lillard, for example, leads the league in offensive RAPTOR performance, but is an empirical minus player on defense. Therefore, given that NBA players play both offense and defense, a more useful metric is likely a combined RAPTOR score.