The classic 5 positions have long been inadequate for classifying and grouping players, and other common methods like guard, wing, big are far too simplistic. So what I would like to do is categorize players by their actual roles on either end of the floor. For example a player like Jokic would be listed as creator on offense and anchor on defense. The player will be “defined” by their strongest role, but you could use multiple roles for analysis (i.e spacer/creator).
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(crosstalk)
library(ggplot2)
library(reactable)
library(rpubs)
#pulling and cleaning data
#pulling data for spacing and creation
bball_url <- "https://www.basketball-reference.com/leagues/NBA_2022_advanced.html"
bballref <- bball_url %>%
read_html() %>%
html_elements("table") %>%
html_table() %>%
.[[1]] %>%
clean_names()
bballref[, c(6:29)] <- sapply(bballref[, c(6:29)], as.numeric )
bballref <- bballref %>%
filter( g > 10,
tm != "TOT")
#pulling data for self creator
bballref_url2 <- "https://www.basketball-reference.com/leagues/NBA_2022_shooting.html"
self_creation <- bballref_url2 %>%
read_html() %>%
html_elements("#shooting_stats") %>%
html_table() %>%
.[[1]] %>%
clean_names()
self_creation <- self_creation[-1,]
self_creation[, c(6:35)] <- sapply(self_creation[,c(6:35)], as.numeric)
self_creation <- self_creation %>%
select(x_2,
x_5,
x_6,
percent_of_fg_astd,
percent_of_fg_astd_2) %>%
filter(x_6 > 10,
x_5 != "TOT")
names(self_creation) <- c("player",
"tm",
"g",
"assisted_twos",
"assisted_threes")
bballref_url3 <- "https://www.basketball-reference.com/leagues/NBA_2022_totals.html"
box_score <- bballref_url3 %>%
read_html() %>%
html_elements("#totals_stats") %>%
html_table() %>%
.[[1]] %>%
clean_names()
box_score[, c(6:30)] <- sapply(box_score[, c(6:30)], as.numeric)
points <- box_score %>%
select(player,
pts)
self_creation <- self_creation %>%
inner_join(points)
## Joining, by = "player"
#pulling data for rim runners
bballref_url2 <- "https://www.basketball-reference.com/leagues/NBA_2022_shooting.html"
rim_run <- bballref_url2 %>%
read_html() %>%
html_elements("#shooting_stats") %>%
html_table() %>%
.[[1]] %>%
clean_names()
rim_run <- rim_run[-1,]
rim_run[, c(6:35)] <- sapply(rim_run[,c(6:35)], as.numeric)
rim_run <- rim_run %>%
select(x_2,
x_5,
x_6,
percent_of_fga_by_distance_2) %>%
filter(x_6 > 10,
x_5 != "TOT")
names(rim_run) <- c("player",
"tm",
"g",
"at_basket_fg%")
#spacing
spacing <- bballref %>%
mutate( ts_rank = rank(ts_percent),
three_rank = rank(x3p_ar),
spacing_score = ts_rank + three_rank) %>%
filter( player != "Player") %>%
arrange(-spacing_score)
sq <- quantile(spacing$spacing_score)
spacing <- spacing %>%
mutate(spacing_level = case_when(
spacing_score > 660 ~ 'A',
spacing_score > 544 ~ 'B',
spacing_score > 418 ~ 'C',
TRUE ~'D'
))
g1 <- ggplot(spacing, aes( x = player, y = spacing_score, color = spacing_level))+
geom_point() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Spacing Distribution")
g1
g1+facet_wrap(. ~tm)
#shot creator
creation <- bballref %>%
mutate(creation_score = rank(obpm)) %>%
filter( player != "Player") %>%
arrange(-creation_score)
cq <- quantile(creation$creation_score)
creation <- creation %>%
mutate(creation_level = case_when(
creation_score > 471 ~ 'A',
creation_score > 310 ~ 'B',
creation_score > 157 ~ 'C',
TRUE ~'D'
))
g2 <- ggplot(creation, aes( x = player, y = creation_score, color = creation_level))+
geom_point() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Creation Distribution")
ggplotly(g2)
g2+facet_wrap(. ~tm)
#self creator
self_creation <- self_creation %>%
mutate( assist_3_rank = rank(assisted_threes),
assist_2_rank = rank(assisted_twos),
point_rank = rank(pts),
self_creation_score = point_rank - (assist_2_rank + assist_3_rank))
sfq <- quantile(self_creation$self_creation_score)
self_creation <- self_creation %>%
mutate(
self_creation_level =
case_when(
self_creation_score > 14.50 ~ "A",
self_creation_score > -582 ~ "B",
self_creation_score > -1061 ~ "C",
TRUE ~ "D"
)
)
g3 <- ggplot(self_creation, aes( x = player, y = self_creation_score, color = self_creation_level))+
geom_point() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Self Creation Distribution")
ggplotly(g3)
g3+facet_wrap(. ~tm)
self-creation quantile
#rim running
rim_run <- rim_run %>%
mutate( rim_score = rank(`at_basket_fg%`))
rmq <- quantile(rim_run$rim_score)
rim_run <- rim_run %>%
mutate(
rim_level =
case_when(
rim_score > 405 ~ "A",
rim_score > 270 ~ "B",
rim_score > 134 ~ "C",
TRUE ~ "D"
)
)
g4 <- ggplot(self_creation, aes( x = player, y = self_creation_score, color = self_creation_level))+
geom_point() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Rim Running Distribution")
ggplotly(g4)
g4+facet_wrap(. ~tm)
#connectors
connectors <- bballref %>%
mutate( connect_score = rank(per))
cq <- quantile(connectors$connect_score)
connectors <- connectors %>%
mutate( connect_level = case_when(
per > 16 ~ "A",
per > 13.3 ~ "B",
per > 10.57 ~ "C",
TRUE ~ "D"
))
g5 <- ggplot(connectors, aes( x = player, y = connect_score, color = connect_level))+
geom_point() +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
ggtitle("Connector Distribution")
ggplotly(g5)
g5+facet_wrap(. ~tm)
#combined dfs
combined <- spacing %>%
inner_join(self_creation) %>%
inner_join(creation) %>%
inner_join(rim_run) %>%
inner_join(connectors) %>%
mutate(creation_score = rank(obpm)) %>%
filter( player != "Player") %>%
arrange(-creation_score) %>%
mutate( offensive_role =
case_when(
creation_score > 471 ~ "Creator",
self_creation_score > 14 ~ "Self Creator",
spacing_score > 660 ~ "Spacer",
rim_score > 405 ~ "Rim Runner",
per > 14 ~ "Connector",
TRUE ~ "other"
)) %>%
select(player, offensive_role,
connect_level,
rim_level,
self_creation_level,
creation_level,
spacing_level)
## Joining, by = c("player", "tm", "g")
## Joining, by = c("rk", "player", "pos", "age", "tm", "g", "mp", "per", "ts_percent", "x3p_ar", "f_tr", "orb_percent", "drb_percent", "trb_percent", "ast_percent", "stl_percent", "blk_percent", "tov_percent", "usg_percent", "x", "ows", "dws", "ws", "ws_48", "x_2", "obpm", "dbpm", "bpm", "vorp")
## Joining, by = c("player", "tm", "g")
## Joining, by = c("rk", "player", "pos", "age", "tm", "g", "mp", "per", "ts_percent", "x3p_ar", "f_tr", "orb_percent", "drb_percent", "trb_percent", "ast_percent", "stl_percent", "blk_percent", "tov_percent", "usg_percent", "x", "ows", "dws", "ws", "ws_48", "x_2", "obpm", "dbpm", "bpm", "vorp")
reactable(combined,
sortable = TRUE,
filterable = TRUE,
searchable = TRUE)
Offensive Roles: Roles are more heavily influenced by role within offense than skill level
Creator - a player that creates shots for themselves and others, drives offense
Self Creator - a player able to get themselves a shot but less creation for teammates
Spacer - a player that the opposing defense needs to respect from three-point range
Rim Runners- a player that gets a vast majority of their shots from within 3 feet
Connectors- a player that still contributes to good offense despite not fitting one of other roles
other - players that do not fit into a role and have a PER below 14
The method I chose to categorize roles was to select relevant statistical fields to a role, ranking the field league wide, and then aggregating as needed. Input from coaching staff in off season would be needed to define roles and judge which stats best represent said role. This is a rough outline, as I constantly struggle to define the roles and pick/access most relevant stats so I started with a template for offense. Ideally all rotation level players would fit into a role.
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rpubs_0.2.2 reactable_0.3.0 crosstalk_1.2.0 plotly_4.10.0
## [5] janitor_2.1.0 rvest_1.0.2 forcats_0.5.1 stringr_1.4.0
## [9] dplyr_1.0.7 purrr_0.3.4 readr_2.0.1 tidyr_1.2.0
## [13] tibble_3.1.4 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.7 lubridate_1.7.10 assertthat_0.2.1 digest_0.6.27
## [5] utf8_1.2.2 reactR_0.4.4 R6_2.5.1 cellranger_1.1.0
## [9] backports_1.2.1 reprex_2.0.0 evaluate_0.14 highr_0.9
## [13] httr_1.4.2 pillar_1.6.2 rlang_0.4.11 curl_4.3.2
## [17] lazyeval_0.2.2 readxl_1.3.1 rstudioapi_0.13 data.table_1.14.0
## [21] jquerylib_0.1.4 rmarkdown_2.14 labeling_0.4.2 selectr_0.4-2
## [25] htmlwidgets_1.5.4 munsell_0.5.0 broom_0.7.8 compiler_4.1.0
## [29] modelr_0.1.8 xfun_0.31 pkgconfig_2.0.3 htmltools_0.5.2
## [33] tidyselect_1.1.1 viridisLite_0.4.0 fansi_0.5.0 crayon_1.4.1
## [37] tzdb_0.1.2 dbplyr_2.1.1 withr_2.4.2 grid_4.1.0
## [41] jsonlite_1.7.2 gtable_0.3.0 lifecycle_1.0.0 DBI_1.1.1
## [45] magrittr_2.0.1 scales_1.1.1 cli_3.0.1 stringi_1.7.4
## [49] farver_2.1.0 fs_1.5.0 snakecase_0.11.0 xml2_1.3.2
## [53] bslib_0.3.1 ellipsis_0.3.2 generics_0.1.0 vctrs_0.3.8
## [57] tools_4.1.0 glue_1.4.2 hms_1.1.0 fastmap_1.1.0
## [61] yaml_2.2.1 colorspace_2.0-2 knitr_1.33 haven_2.4.1
## [65] sass_0.4.0