# Faith, Brian - MSDS 456 - Assignment #2
library(BasketballAnalyzeR)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## If you want to reproduce the figures contained in the book of
## Zuccolotto and Manisera (2020) and
## if the version of your R machine is >= 3.6.0, you need to type
## RNGkind(sample.kind = "Rounding")
## at the beginning of your working session
RNGkind(sample.kind="Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
library(nbastatR)
## Warning: replacing previous import 'dplyr::collapse' by 'glue::collapse' when
## loading 'nbastatR'
## Warning: replacing previous import 'curl::handle_reset' by 'httr::handle_reset'
## when loading 'nbastatR'
## Warning: replacing previous import 'httr::timeout' by 'memoise::timeout' when
## loading 'nbastatR'
## Warning: replacing previous import 'magrittr::set_names' by 'purrr::set_names'
## when loading 'nbastatR'
## Warning: replacing previous import 'jsonlite::flatten' by 'purrr::flatten' when
## loading 'nbastatR'
## Warning: replacing previous import 'curl::parse_date' by 'readr::parse_date'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::list_along' by 'rlang::list_along'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::invoke' by 'rlang::invoke' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_raw' by 'rlang::flatten_raw'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::modify' by 'rlang::modify' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::as_function' by 'rlang::as_function'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_dbl' by 'rlang::flatten_dbl'
## when loading 'nbastatR'
## Warning: replacing previous import 'jsonlite::unbox' by 'rlang::unbox' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_lgl' by 'rlang::flatten_lgl'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_int' by 'rlang::flatten_int'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::%@%' by 'rlang::%@%' when loading
## 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_chr' by 'rlang::flatten_chr'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::splice' by 'rlang::splice' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten' by 'rlang::flatten' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::prepend' by 'rlang::prepend' when
## loading 'nbastatR'
## Warning: replacing previous import 'readr::guess_encoding' by
## 'rvest::guess_encoding' when loading 'nbastatR'
## Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
## loading 'nbastatR'
## Warning: replacing previous import 'rlang::as_list' by 'xml2::as_list' when
## loading 'nbastatR'
library(tidyverse)
## Warning in (function (kind = NULL, normal.kind = NULL, sample.kind = NULL) :
## non-uniform 'Rounding' sampler used
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
### 2021 Analysis Four Factors Analysis ###
# Team Logs for 2021 Regular Season
season21 <- c(2021)
tlog_reg_21 <- suppressWarnings(game_logs(seasons = season21, league = "NBA",
result_types = "team", season_types = "Regular Season"))
## Acquiring NBA basic team game logs for the 2020-21 Regular Season
# Team Boxscore Reg Season
tbox_reg_21 <- tlog_reg_21 %>%
group_by("Season"=yearSeason, "Team"=slugTeam) %>%
summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
PTS=sum(ptsTeam),
W=sum(outcomeGame=="W"), L=sum(outcomeGame=="L"),
P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Opponent Boxscore Reg Season
obox_reg_21 <- tlog_reg_21 %>%
group_by("Season"=yearSeason, "Team"=slugOpponent) %>%
summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
PTS=sum(ptsTeam),
W=sum(outcomeGame=="L"), L=sum(outcomeGame=="W"),
P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Four Factors - 2021 Regular Season by Team
FF21 <- fourfactors(tbox_reg_21, obox_reg_21)
# Add Wins Column
Wins_21 <- tbox_reg_21$W
FF21 <- cbind(FF21,Wins_21)
View(FF21)
FF21 <- FF21 %>%
mutate(rank(-ORtg)) %>%
mutate(rank (DRtg)) %>%
mutate(rank(-F1.Off)) %>%
mutate(rank(F1.Def)) %>%
mutate(rank(F2.Off)) %>%
mutate(rank(-F2.Def)) %>%
mutate(rank(-F3.Off)) %>%
mutate(rank(-F3.Def)) %>%
mutate(rank(-F4.Off)) %>%
mutate(rank(F4.Def))
colnames(FF21) <- c("Team", "Off Poss", "Def Poss", "Off Pace", "Def Pace",
"ORtg", "DRtg", "eFG", "TOV%", "ORB%", "FTR",
"Opp eFG", "Opp TOV%", "Opp DRB%", "Opp FTR", "Wins",
"Rank ORtg", "Rank DRtg", "Rank eFG", "Rank TOV%",
"Rank ORB%", "Rank FTR", "Rank Opp eFG", "Rank Opp TOV%",
"Rank Opp ORB%", "Rank Opp FTR")
# Subset Four Factor Stats Columns
FF21_stats <- round(FF21[,c(6,7,8,12,9,13,10,14,11,15)],1)
FF21_stats <- cbind(FF21[,c(1,16)],FF21_stats)
# Calculate Column Means
Mean_stats_21 <- round(colMeans(FF21_stats[,c(2:12)]),1)
kable(Mean_stats_21, digits = 1)
| Wins |
36.0 |
| ORtg |
109.9 |
| DRtg |
109.9 |
| eFG |
53.8 |
| Opp eFG |
53.8 |
| TOV% |
13.6 |
| Opp TOV% |
13.6 |
| ORB% |
22.2 |
| Opp DRB% |
77.8 |
| FTR |
19.2 |
| Opp FTR |
19.2 |
# Subset Rankings Columns and Add Team Name Column Back
FF21_ranks <- round(FF21[,c(17,18,19,23,20,24,21,25,22,26)])
FF21_ranks <- cbind(FF21[,c(1,16)],FF21_ranks)
View(FF21_ranks)
### 2022 Analysis Four Factors Analysis ###
# Team Logs for 2022 Regular Season
season22 <- c(2022)
tlog_reg_22 <- suppressWarnings(game_logs(seasons = season22, league = "NBA",
result_types = "team", season_types = "Regular Season"))
## Acquiring NBA basic team game logs for the 2021-22 Regular Season
# Team Boxscore Reg Season
tbox_reg_22 <- tlog_reg_22 %>%
group_by("Season"=yearSeason, "Team"=slugTeam) %>%
summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
PTS=sum(ptsTeam),
W=sum(outcomeGame=="W"), L=sum(outcomeGame=="L"),
P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Opponent Boxscore Reg Season
obox_reg_22 <- tlog_reg_22 %>%
group_by("Season"=yearSeason, "Team"=slugOpponent) %>%
summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
PTS=sum(ptsTeam),
W=sum(outcomeGame=="L"), L=sum(outcomeGame=="W"),
P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Four Factors - 2022 Regular Season by Team
FF22 <- fourfactors(tbox_reg_22, obox_reg_22)
# Add Wins Column
Wins_22 <- tbox_reg_22$W
FF22 <- cbind(FF22,Wins_22)
FF22 <- FF22 %>%
mutate(rank(-ORtg)) %>%
mutate(rank (DRtg)) %>%
mutate(rank(-F1.Off)) %>%
mutate(rank(F1.Def)) %>%
mutate(rank(F2.Off)) %>%
mutate(rank(-F2.Def)) %>%
mutate(rank(-F3.Off)) %>%
mutate(rank(-F3.Def)) %>%
mutate(rank(-F4.Off)) %>%
mutate(rank(F4.Def))
colnames(FF22) <- c("Team", "Off Poss", "Def Poss", "Off Pace", "Def Pace",
"ORtg", "DRtg", "eFG", "TOV%", "ORB%", "FTR",
"Opp eFG", "Opp TOV%", "Opp DRB%", "Opp FTR", "Wins",
"Rank ORtg", "Rank DRtg", "Rank eFG", "Rank TOV%",
"Rank ORB%", "Rank FTR", "Rank Opp eFG", "Rank Opp TOV%",
"Rank Opp ORB%", "Rank Opp FTR")
View(FF22)
# Subset Four Factor Stats Columns
FF22_stats <- round(FF22[,c(6,7,8,12,9,13,10,14,11,15)],1)
FF22_stats <- cbind(FF22[,c(1,16)],FF22_stats)
View(FF22_stats)
# Calculate Column Means
Mean_stats_22 <- round(colMeans(FF22_stats[,c(2:12)]),1)
kable(Mean_stats_22, digits = 1)
| Wins |
33.9 |
| ORtg |
108.6 |
| DRtg |
108.6 |
| eFG |
52.8 |
| Opp eFG |
52.8 |
| TOV% |
13.7 |
| Opp TOV% |
13.7 |
| ORB% |
23.2 |
| Opp DRB% |
76.8 |
| FTR |
19.0 |
| Opp FTR |
19.0 |
# Subset Rankings Columns and Add Team Name Column Back
FF22_ranks <- round(FF22[,c(17,18,19,23,20,24,21,25,22,26)])
FF22_ranks <- cbind(FF22[,c(1,16)],FF22_ranks)
View(FF22_ranks)
### New York Knicks Analysis ###
FF21_div <- FF21_ranks %>%
filter(Team == "NYK" | Team == "BKN" | Team == "BOS" | Team == "PHI" | Team == "TOR")
FF22_div <- FF22_ranks %>%
filter(Team == "NYK" | Team == "BKN" | Team == "BOS" | Team == "PHI" | Team == "TOR")
View(FF21_div)
View(FF22_div)
### Correlation and Regression, 2021 ###
# Remove Team, ORtg, Dtg from Stats for Correlation Matrix
FF_corr <- (FF21_stats[,c(-2, -3, -4)])
# Four Factors Correlation Matrix
kable(round(cor(FF_corr[,unlist(lapply(FF_corr,is.numeric))]),3))
| eFG |
1.000 |
-0.413 |
-0.147 |
-0.213 |
0.077 |
0.249 |
-0.064 |
0.033 |
| Opp eFG |
-0.413 |
1.000 |
0.057 |
0.238 |
0.017 |
-0.359 |
-0.163 |
0.046 |
| TOV% |
-0.147 |
0.057 |
1.000 |
0.278 |
0.111 |
0.104 |
-0.023 |
-0.154 |
| Opp TOV% |
-0.213 |
0.238 |
0.278 |
1.000 |
-0.221 |
-0.359 |
0.076 |
0.474 |
| ORB% |
0.077 |
0.017 |
0.111 |
-0.221 |
1.000 |
0.452 |
0.148 |
-0.259 |
| Opp DRB% |
0.249 |
-0.359 |
0.104 |
-0.359 |
0.452 |
1.000 |
-0.067 |
-0.304 |
| FTR |
-0.064 |
-0.163 |
-0.023 |
0.076 |
0.148 |
-0.067 |
1.000 |
0.145 |
| Opp FTR |
0.033 |
0.046 |
-0.154 |
0.474 |
-0.259 |
-0.304 |
0.145 |
1.000 |
# Regression Analysis for Predicting Wins
eFG_diff <- (FF21_stats$eFG - FF21_stats$`Opp eFG`)/100
TOV_diff <- (FF21_stats$`TOV%` - FF21_stats$`Opp TOV%`)/100
RB_diff <- (FF21_stats$`ORB%` - FF21_stats$`Opp DRB%`)/100
FTR_diff <- (FF21_stats$FTR - FF21_stats$`Opp FTR`)/100
Regression_21 <- lm(FF21_stats$Wins ~ eFG_diff + TOV_diff + RB_diff + FTR_diff)
summary(Regression_21)
##
## Call:
## lm(formula = FF21_stats$Wins ~ eFG_diff + TOV_diff + RB_diff +
## FTR_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0475 -1.8753 0.4269 1.4246 7.5482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.78 23.97 2.202 0.03714 *
## eFG_diff 339.67 26.85 12.652 2.29e-12 ***
## TOV_diff -190.38 58.83 -3.236 0.00340 **
## RB_diff 30.11 43.09 0.699 0.49110
## FTR_diff 100.61 34.99 2.875 0.00814 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.826 on 25 degrees of freedom
## Multiple R-squared: 0.8741, Adjusted R-squared: 0.854
## F-statistic: 43.4 on 4 and 25 DF, p-value: 6.694e-11
# Correlation between Four Factors and Wins
cor(FF21_stats$Wins, eFG_diff)
## [1] 0.8927872
cor(FF21_stats$Wins, TOV_diff)
## [1] -0.06954477
cor(FF21_stats$Wins, RB_diff)
## [1] -0.1119886
cor(FF21_stats$Wins, FTR_diff)
## [1] 0.1595572
# Filter New York Knicks 2021 Stats and FF Differentials
NYK21_stats <- FF21_stats %>%
filter(Team == "NYK")
NYK_eFG_diff <- (NYK21_stats$eFG - NYK21_stats$`Opp eFG`)/100
NYK_TOV_diff <- (NYK21_stats$`TOV%` - NYK21_stats$`Opp TOV%`)/100
NYK_RB_diff <- (NYK21_stats$`ORB%` - NYK21_stats$`Opp DRB%`)/100
NYK_FTR_diff <- (NYK21_stats$FTR - NYK21_stats$`Opp FTR`)/100
# Predict Knicks Number of Wins and Compare to Actual
NYK21_WPredict <- round(predict(Regression_21, newdata = list(eFG_diff = NYK_eFG_diff,
TOV_diff = NYK_TOV_diff,
RB_diff = NYK_RB_diff,
FTR_diff = NYK_FTR_diff)))
summary(NYK21_WPredict)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40 40 40 40 40 40
NYK21_Windiff <- NYK21_WPredict - NYK21_stats$Wins
NYK21_Windiff
## 1
## -1
### Clusters ###
scrape_stats <- function(season){
# Player Total Stats
url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_totals.html")
stats_tot <- url %>%
read_html() %>%
html_table() %>%
.[[1]]
player_stats_tot <- stats_tot %>%
remove_empty(which = "cols") %>%
clean_names() %>%
dplyr::filter(!player=="Player") %>%
mutate_at(vars(-c(player,tm,pos)),as.numeric) %>%
mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>%
as_tibble() %>%
group_by(player) %>%
slice(1) %>%
ungroup() %>%
select(-rk)
# Player Per 100 Poss Stats
url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_per_poss.html")
stats_p100 <- url %>%
read_html() %>%
html_table() %>%
.[[1]]
player_stats_p100 <- stats_p100 %>%
remove_empty(which = "cols") %>%
clean_names() %>%
dplyr::filter(!player=="Player") %>%
mutate_at(vars(-c(player,tm,pos)),as.numeric) %>%
mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>%
as_tibble() %>%
group_by(player) %>%
slice(1) %>%
ungroup() %>%
rename_at(vars(9:29),funs(paste0(.,"_p100"))) %>%
select(-rk)
# Player Advanced Stats
url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_advanced.html")
stats_adv <- url %>%
read_html() %>%
html_table() %>%
.[[1]]
player_stats_adv <- stats_adv %>%
remove_empty(which = "cols") %>%
clean_names() %>%
dplyr::filter(!player=="Player") %>%
mutate_at(vars(-c(player,tm,pos)),as.numeric) %>%
mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>%
as_tibble() %>%
group_by(player) %>%
slice(1) %>%
ungroup() %>%
select(-rk)
player_stats <- full_join(player_stats_tot,player_stats_p100,
by = c("player", "pos", "age", "tm", "g", "gs", "mp")) %>%
full_join(player_stats_adv,
by = c("player", "pos", "age", "tm", "g", "mp"))
return(player_stats)
}
# Scrape last three seasons of Player Stats
player_stats <- map_dfr(2019:2021, scrape_stats)
# Filter and Summarize Stats for Cluster Analysis
player_stats_final <- player_stats %>%
filter (mp >= 1500) %>%
group_by (player) %>%
summarize(
Team = last(tm),
Games = sum(g),
Mins = sum(mp),
eFG = mean(e_fg_percent),
P3M_100 = mean(x3p_p100),
P3p = mean(x3p_percent_p100),
FTp = mean(ft_percent_p100),
PTS_100 = mean(pts_p100),
ORtg = mean(o_rtg),
DRtg = mean(d_rtg),
P3Ar = mean(x3p_ar),
FTR = mean(f_tr),
ORBp = mean(orb_percent),
DRBp= mean(drb_percent),
ASTp = mean(ast_percent),
STLp = mean(stl_percent),
BLKp = mean(blk_percent),
TOVp = mean(tov_percent),
USGp = mean(usg_percent),
PER = mean(per),
BPM = mean(bpm),
VORP = mean(vorp)
)
### Basketball Data Science Clustering Approach ###
# Create stats-only vector
player_stats_clu <- player_stats_final[,c(5:23)]
# Identify Cluster Number
findk <- hclustering(player_stats_clu, nclumax = 12)
plot(findk)

# Radial Plots by Cluster
radials <-hclustering(player_stats_clu, labels = player_stats_final$player, k=10)
plot(radials, profiles = TRUE)

#Dendrogram
plot(radials, rect = TRUE, colored.branches = TRUE, cex.labels = .25)

# Subset for Plots
clu_subset <- subset(player_stats_final, Mins >= 1500)
Mins <- clu_subset$Mins
Scale_stats <- data.frame(radials$Subjects, scale(player_stats_clu), Mins)
dvar <- c("eFG","P3M_100","P3p","FTp","PTS_100","ORtg","DRtg","P3Ar","FTR",
"ORBp","DRBp","ASTp","STLp","BLKp","TOVp", "USGp", "PER", "BPM", "VORP")
svar <- "Mins"
yRange <- range(Scale_stats[,dvar])
sizeRange <- c(1500,7500)
no.clu <- 10
p <- vector(no.clu, mode = "list")
for(k in 1:no.clu) {
Clusters <- subset(Scale_stats, Cluster == k)
vrb <- variability(Clusters[,3:22], data.var = dvar,
size.var = svar, weight = FALSE, VC = FALSE)
title <- paste ("Cluster", k)
p[[k]] <- plot(vrb, size.lim = sizeRange, ylim = yRange, title = title,
leg.pos = c(0,1), leg.just = c(-0.5,0),
leg.box = "vertical", leg.brk = seq(1500,7500,1500),
leg.title.pos = "left", leg.nrow = 1, max.circle = 7)
}
grid.arrange(grobs = p, ncol = 2)
## Warning: Removed 19 rows containing missing values (geom_point).

C1 <- Scale_stats %>%
filter(Cluster == 1)
C1_Players <- C1$Label
C2 <- Scale_stats %>%
filter(Cluster == 2)
C2_Players <- C2$Label
C3 <- Scale_stats %>%
filter(Cluster == 3)
C3_Players <- C3$Label
C4 <- Scale_stats %>%
filter(Cluster == 4)
C4_Players <- C4$Label
C5 <- Scale_stats %>%
filter(Cluster == 5)
C5_Players <- C5$Label
C6 <- Scale_stats %>%
filter(Cluster == 6)
C6_Players <- C6$Label
C7 <- Scale_stats %>%
filter(Cluster == 7)
C7_Players <- C7$Label
C8 <- Scale_stats %>%
filter(Cluster == 8)
C8_Players <- C8$Label
C9 <- Scale_stats %>%
filter(Cluster == 9)
C9_Players <- C9$Label
C10 <- Scale_stats %>%
filter(Cluster == 10)
C10_Players <- C10$Label
kable(C1_Players, col.names = "Cluster 1")
| Aaron Gordon |
| Al-Farouq Aminu |
| Al Horford |
| Andrew Wiggins |
| Brook Lopez |
| Carmelo Anthony |
| Dario Šaric |
| DeMarre Carroll |
| Dewayne Dedmon |
| Eric Paschall |
| Glenn Robinson III |
| Harrison Barnes |
| Jabari Parker |
| Jae’Sean Tate |
| Jaren Jackson Jr. |
| Jaylen Brown |
| Jeff Green |
| Jerami Grant |
| Jonathan Isaac |
| Justin Jackson |
| Keldon Johnson |
| Kelly Olynyk |
| Kelly Oubre Jr. |
| Kristaps Porzingis |
| Kyle Anderson |
| Kyle Kuzma |
| Marc Gasol |
| Maxi Kleber |
| Miles Bridges |
| Myles Turner |
| Nemanja Bjelica |
| Noah Vonleh |
| P.J. Washington |
| Patrick Williams |
| Rodney Hood |
| Rui Hachimura |
| T.J. Warren |
| Torrey Craig |
kable(C2_Players, col.names = "Cluster 2")
| Aaron Holiday |
| Anthony Edwards |
| Avery Bradley |
| Cam Reddish |
| Cedi Osman |
| Coby White |
| Collin Sexton |
| Damyean Dotson |
| Darius Bazley |
| Darius Garland |
| De’Andre Hunter |
| Dennis Schröder |
| Dillon Brooks |
| Dwayne Bacon |
| Emmanuel Mudiay |
| Gary Harris |
| Isaac Okoro |
| Jarrett Culver |
| Josh Jackson |
| Josh Okogie |
| Josh Richardson |
| Kent Bazemore |
| Kevin Huerter |
| Kevin Knox |
| Lonnie Walker IV |
| Luguentz Dort |
| RJ Barrett |
| Taurean Prince |
| Théo Maledon |
| Trevor Ariza |
| Tyler Herro |
| Tyler Johnson |
| Will Barton |
kable(C3_Players, col.names = "Cluster 3")
| Alec Burks |
| Bogdan Bogdanovic |
| Bojan Bogdanovic |
| Buddy Hield |
| CJ McCollum |
| Danilo Gallinari |
| Devonte’ Graham |
| Evan Fournier |
| Gordon Hayward |
| Jamal Murray |
| Jayson Tatum |
| Jeremy Lamb |
| Jordan Clarkson |
| Kendrick Nunn |
| Kevin Love |
| Khris Middleton |
| Klay Thompson |
| Lauri Markkanen |
| Malcolm Brogdon |
| Marcus Morris |
| Michael Porter Jr. |
| Norman Powell |
| Pascal Siakam |
| Reggie Jackson |
| Rudy Gay |
| Terrence Ross |
| Terry Rozier |
| Tobias Harris |
kable(C4_Players, col.names = "Cluster 4")
| Alex Len |
| Daniel Theis |
| Deandre Ayton |
| Dwight Powell |
| Enes Freedom |
| Ivica Zubac |
| Jarrett Allen |
| John Collins |
| Jonas Valanciunas |
| Khem Birch |
| LaMarcus Aldridge |
| Marvin Bagley III |
| Richaun Holmes |
| Robin Lopez |
| Serge Ibaka |
| Taj Gibson |
| Tristan Thompson |
kable(C5_Players, col.names = "Cluster 5")
| Andre Drummond |
| Bam Adebayo |
| Ben Simmons |
| Clint Capela |
| DeAndre Jordan |
| Derrick Favors |
| Hassan Whiteside |
| Jakob Poeltl |
| JaVale McGee |
| Jusuf Nurkic |
| Mason Plumlee |
| Montrezl Harrell |
| Nerlens Noel |
| Rudy Gobert |
| Steven Adams |
kable(C6_Players, col.names = "Cluster 6")
| Andre Iguodala |
| Austin Rivers |
| Danny Green |
| Danuel House Jr. |
| Darius Miller |
| Donte DiVincenzo |
| Dorian Finney-Smith |
| Garrett Temple |
| Jaden McDaniels |
| Jae Crowder |
| Josh Hart |
| Justin Holiday |
| Kentavious Caldwell-Pope |
| Marvin Williams |
| Nicolas Batum |
| OG Anunoby |
| P.J. Tucker |
| Pat Connaughton |
| Patrick Beverley |
| Robert Covington |
| Rodney McGruder |
| Royce O’Neale |
| Solomon Hill |
| Terrance Ferguson |
| Wesley Matthews |
kable(C7_Players, col.names = "Cluster 7")
| Anthony Davis |
| Blake Griffin |
| Damian Lillard |
| Domantas Sabonis |
| Giannis Antetokounmpo |
| James Harden |
| Jimmy Butler |
| Joel Embiid |
| Julius Randle |
| Karl-Anthony Towns |
| Kawhi Leonard |
| Kevin Durant |
| Kyrie Irving |
| LeBron James |
| Luka Doncic |
| Mike Conley |
| Nikola Jokic |
| Nikola Vucevic |
| Paul George |
| Russell Westbrook |
| Stephen Curry |
| Zion Williamson |
kable(C8_Players, col.names = "Cluster 8")
| Ben McLemore |
| Bryn Forbes |
| Davis Bertans |
| Desmond Bane |
| Doug McDermott |
| Duncan Robinson |
| Eric Gordon |
| Furkan Korkmaz |
| Gary Trent Jr. |
| J.J. Redick |
| Joe Harris |
| Landry Shamet |
| Langston Galloway |
| Malik Beasley |
| Marco Belinelli |
| Patty Mills |
| Reggie Bullock |
| Saddiq Bey |
| Seth Curry |
| Tim Hardaway Jr. |
| Tony Snell |
kable(C9_Players, col.names = "Cluster 9")
| Bradley Beal |
| Brandon Ingram |
| D’Angelo Russell |
| De’Aaron Fox |
| DeMar DeRozan |
| Devin Booker |
| Donovan Mitchell |
| Goran Dragic |
| Ja Morant |
| Kemba Walker |
| Lou Williams |
| Spencer Dinwiddie |
| Trae Young |
| Zach LaVine |
kable(C10_Players, col.names = "Cluster 10")
| Bruce Brown |
| Chris Paul |
| Cory Joseph |
| D.J. Augustin |
| Darren Collison |
| DeAndre’ Bembry |
| Dejounte Murray |
| Delon Wright |
| Dennis Smith Jr. |
| Derrick White |
| Draymond Green |
| Dwyane Wade |
| Eric Bledsoe |
| Evan Turner |
| Fred VanVleet |
| Ish Smith |
| Jalen Brunson |
| Joe Ingles |
| Jrue Holiday |
| Justise Winslow |
| Kyle Lowry |
| Larry Nance Jr. |
| Lonzo Ball |
| Marcus Smart |
| Markelle Fultz |
| Mikal Bridges |
| Monte Morris |
| Otto Porter Jr. |
| Paul Millsap |
| Ricky Rubio |
| Ryan Arcidiacono |
| Shai Gilgeous-Alexander |
| T.J. McConnell |
| Thaddeus Young |
| Tomáš Satoranský |
| Troy Brown Jr. |
| Tyrese Haliburton |
| Tyus Jones |
| Willie Cauley-Stein |
Player_Cluster <-Scale_stats[,2]
player_stats_all <- cbind(player_stats_final, Player_Cluster)
# Boxplot Comparisons by Cluster
par(mfrow = c(1,3))
boxplot(Mins ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Minutes Played by Cluster", xlab = "Cluster",
ylab = "Minutes Played")
boxplot(eFG ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Effective FG% by Cluster", xlab = "Cluster",
ylab = "eFG%")
boxplot(PTS_100 ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Points Per 100 Poss. by Cluster", xlab = "Cluster",
ylab = "Points Per 100 Poss.")

boxplot(P3Ar ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "3-Pt Att. Rate by Cluster", xlab = "Cluster",
ylab = "3-Pt Att. Rate")
boxplot(P3M_100 ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "3s Made Per 100 Poss. by Cluster", xlab = "Cluster",
ylab = "3s Made Per 100 Poss.")
boxplot(P3p ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "3-Pt % by Cluster", xlab = "Cluster",
ylab = "3-Pt %")

par(mfrow = c(1,2))
boxplot(FTR ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Free Throw Rate by Cluster", xlab = "Cluster",
ylab = "Free Throw Rate")
boxplot(FTp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Free Throw % by Cluster", xlab = "Cluster",
ylab = "Free Throw %")

par(mfrow = c(1,3))
boxplot(ORtg ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Off. Rating by Cluster", xlab = "Cluster",
ylab = "Off. Rating")
boxplot(DRtg ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Def. Rating by Cluster", xlab = "Cluster",
ylab = "Def. Rating")
boxplot(USGp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Usage % by Cluster", xlab = "Cluster",
ylab = "Usage %")

boxplot(ORBp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Off. Rebound % by Cluster", xlab = "Cluster",
ylab = "Off. Rebound %")
boxplot(DRBp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Def. Rebound % by Cluster", xlab = "Cluster",
ylab = "Def. Rebound %")
boxplot(BLKp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Block Rate by Cluster", xlab = "Cluster",
ylab = "Block Rate")

boxplot(ASTp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Assist Rate by Cluster", xlab = "Cluster",
ylab = "Assist Rate")
boxplot(STLp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Steal Rate by Cluster", xlab = "Cluster",
ylab = "Steal Rate")
boxplot(TOVp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "Turnover Rate by Cluster", xlab = "Cluster",
ylab = "Turnover Rate")

boxplot(PER ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "PER by Cluster", xlab = "Cluster",
ylab = "PER")
boxplot(BPM ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "BPM by Cluster", xlab = "Cluster",
ylab = "BPM")
boxplot(VORP ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
main = "VORP by Cluster", xlab = "Cluster",
ylab = "VORP")

par(mfrow = c(1,1))