# Faith, Brian - MSDS 456 - Assignment #2
library(BasketballAnalyzeR)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## If you want to reproduce the figures contained in the book of
## Zuccolotto and Manisera (2020) and
## if the version of your R machine is >= 3.6.0, you need to type
## RNGkind(sample.kind = "Rounding")
## at the beginning of your working session
RNGkind(sample.kind="Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072 * 2)
library(nbastatR)
## Warning: replacing previous import 'dplyr::collapse' by 'glue::collapse' when
## loading 'nbastatR'
## Warning: replacing previous import 'curl::handle_reset' by 'httr::handle_reset'
## when loading 'nbastatR'
## Warning: replacing previous import 'httr::timeout' by 'memoise::timeout' when
## loading 'nbastatR'
## Warning: replacing previous import 'magrittr::set_names' by 'purrr::set_names'
## when loading 'nbastatR'
## Warning: replacing previous import 'jsonlite::flatten' by 'purrr::flatten' when
## loading 'nbastatR'
## Warning: replacing previous import 'curl::parse_date' by 'readr::parse_date'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::list_along' by 'rlang::list_along'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::invoke' by 'rlang::invoke' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_raw' by 'rlang::flatten_raw'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::modify' by 'rlang::modify' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::as_function' by 'rlang::as_function'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_dbl' by 'rlang::flatten_dbl'
## when loading 'nbastatR'
## Warning: replacing previous import 'jsonlite::unbox' by 'rlang::unbox' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_lgl' by 'rlang::flatten_lgl'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_int' by 'rlang::flatten_int'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::%@%' by 'rlang::%@%' when loading
## 'nbastatR'
## Warning: replacing previous import 'purrr::flatten_chr' by 'rlang::flatten_chr'
## when loading 'nbastatR'
## Warning: replacing previous import 'purrr::splice' by 'rlang::splice' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::flatten' by 'rlang::flatten' when
## loading 'nbastatR'
## Warning: replacing previous import 'purrr::prepend' by 'rlang::prepend' when
## loading 'nbastatR'
## Warning: replacing previous import 'readr::guess_encoding' by
## 'rvest::guess_encoding' when loading 'nbastatR'
## Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
## loading 'nbastatR'
## Warning: replacing previous import 'rlang::as_list' by 'xml2::as_list' when
## loading 'nbastatR'
library(tidyverse)
## Warning in (function (kind = NULL, normal.kind = NULL, sample.kind = NULL) :
## non-uniform 'Rounding' sampler used
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)
library(rvest)      
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
### 2021 Analysis Four Factors Analysis ###
# Team Logs for 2021 Regular Season
season21 <- c(2021)
tlog_reg_21 <- suppressWarnings(game_logs(seasons = season21, league = "NBA", 
                           result_types = "team", season_types = "Regular Season"))
## Acquiring NBA basic team game logs for the 2020-21 Regular Season
# Team Boxscore Reg Season
tbox_reg_21 <- tlog_reg_21 %>%
  group_by("Season"=yearSeason, "Team"=slugTeam) %>%
  summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
                   PTS=sum(ptsTeam),
                   W=sum(outcomeGame=="W"), L=sum(outcomeGame=="L"),
                   P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
                   P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
                   FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
                   OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
                   TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
                   PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
  as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Opponent Boxscore Reg Season
obox_reg_21 <- tlog_reg_21 %>%
  group_by("Season"=yearSeason, "Team"=slugOpponent) %>%
  summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
                   PTS=sum(ptsTeam),
                   W=sum(outcomeGame=="L"), L=sum(outcomeGame=="W"),
                   P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
                   P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
                   FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
                   OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
                   TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
                   PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
  as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Four Factors - 2021 Regular Season by Team 
FF21 <- fourfactors(tbox_reg_21, obox_reg_21)
# Add Wins Column
Wins_21 <- tbox_reg_21$W
FF21 <- cbind(FF21,Wins_21)
View(FF21)
FF21 <- FF21 %>%
  mutate(rank(-ORtg)) %>%
  mutate(rank (DRtg)) %>%
  mutate(rank(-F1.Off)) %>%
  mutate(rank(F1.Def)) %>%
  mutate(rank(F2.Off)) %>%
  mutate(rank(-F2.Def)) %>%
  mutate(rank(-F3.Off)) %>%
  mutate(rank(-F3.Def)) %>%
  mutate(rank(-F4.Off)) %>%
  mutate(rank(F4.Def))
colnames(FF21) <- c("Team", "Off Poss", "Def Poss", "Off Pace", "Def Pace",
                         "ORtg", "DRtg", "eFG", "TOV%", "ORB%", "FTR",
                         "Opp eFG", "Opp TOV%", "Opp DRB%", "Opp FTR", "Wins",
                         "Rank ORtg", "Rank DRtg", "Rank eFG", "Rank TOV%", 
                         "Rank ORB%", "Rank FTR", "Rank Opp eFG", "Rank Opp TOV%",
                         "Rank Opp ORB%", "Rank Opp FTR")

# Subset Four Factor Stats Columns 
FF21_stats <- round(FF21[,c(6,7,8,12,9,13,10,14,11,15)],1)
FF21_stats <- cbind(FF21[,c(1,16)],FF21_stats)

# Calculate Column Means
Mean_stats_21 <- round(colMeans(FF21_stats[,c(2:12)]),1)
kable(Mean_stats_21, digits = 1)
x
Wins 36.0
ORtg 109.9
DRtg 109.9
eFG 53.8
Opp eFG 53.8
TOV% 13.6
Opp TOV% 13.6
ORB% 22.2
Opp DRB% 77.8
FTR 19.2
Opp FTR 19.2
# Subset Rankings Columns and Add Team Name Column Back
FF21_ranks <- round(FF21[,c(17,18,19,23,20,24,21,25,22,26)])
FF21_ranks <- cbind(FF21[,c(1,16)],FF21_ranks)
View(FF21_ranks)

### 2022 Analysis Four Factors Analysis ###
# Team Logs for 2022 Regular Season
season22 <- c(2022)
tlog_reg_22 <- suppressWarnings(game_logs(seasons = season22, league = "NBA", 
                                          result_types = "team", season_types = "Regular Season"))
## Acquiring NBA basic team game logs for the 2021-22 Regular Season
# Team Boxscore Reg Season
tbox_reg_22 <- tlog_reg_22 %>%
  group_by("Season"=yearSeason, "Team"=slugTeam) %>%
  summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
            PTS=sum(ptsTeam),
            W=sum(outcomeGame=="W"), L=sum(outcomeGame=="L"),
            P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
            P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
            FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
            OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
            TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
            PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
  as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Opponent Boxscore Reg Season
obox_reg_22 <- tlog_reg_22 %>%
  group_by("Season"=yearSeason, "Team"=slugOpponent) %>%
  summarise(GP=n(), MIN=sum(round(minutesTeam/5)),
            PTS=sum(ptsTeam),
            W=sum(outcomeGame=="L"), L=sum(outcomeGame=="W"),
            P2M=sum(fg2mTeam), P2A=sum(fg2aTeam), P2p=P2M/P2A,
            P3M=sum(fg3mTeam), P3A=sum(fg3aTeam), P3p=P3M/P3A,
            FTM=sum(ftmTeam), FTA=sum(ftaTeam), FTp=FTM/FTA,
            OREB=sum(orebTeam), DREB=sum(drebTeam), AST=sum(astTeam),
            TOV=sum(tovTeam), STL=sum(stlTeam), BLK=sum(blkTeam),
            PF=sum(pfTeam), PM=sum(plusminusTeam)) %>%
  as.data.frame()
## `summarise()` has grouped output by 'Season'. You can override using the `.groups` argument.
# Four Factors - 2022 Regular Season by Team 
FF22 <- fourfactors(tbox_reg_22, obox_reg_22)
# Add Wins Column
Wins_22 <- tbox_reg_22$W
FF22 <- cbind(FF22,Wins_22)

FF22 <- FF22 %>%
  mutate(rank(-ORtg)) %>%
  mutate(rank (DRtg)) %>%
  mutate(rank(-F1.Off)) %>%
  mutate(rank(F1.Def)) %>%
  mutate(rank(F2.Off)) %>%
  mutate(rank(-F2.Def)) %>%
  mutate(rank(-F3.Off)) %>%
  mutate(rank(-F3.Def)) %>%
  mutate(rank(-F4.Off)) %>%
  mutate(rank(F4.Def))
colnames(FF22) <- c("Team", "Off Poss", "Def Poss", "Off Pace", "Def Pace",
                    "ORtg", "DRtg", "eFG", "TOV%", "ORB%", "FTR",
                    "Opp eFG", "Opp TOV%", "Opp DRB%", "Opp FTR", "Wins",
                    "Rank ORtg", "Rank DRtg", "Rank eFG", "Rank TOV%", 
                    "Rank ORB%", "Rank FTR", "Rank Opp eFG", "Rank Opp TOV%",
                    "Rank Opp ORB%", "Rank Opp FTR")
View(FF22)
# Subset Four Factor Stats Columns 
FF22_stats <- round(FF22[,c(6,7,8,12,9,13,10,14,11,15)],1)
FF22_stats <- cbind(FF22[,c(1,16)],FF22_stats)
View(FF22_stats)

# Calculate Column Means
Mean_stats_22 <- round(colMeans(FF22_stats[,c(2:12)]),1)
kable(Mean_stats_22, digits = 1)
x
Wins 33.9
ORtg 108.6
DRtg 108.6
eFG 52.8
Opp eFG 52.8
TOV% 13.7
Opp TOV% 13.7
ORB% 23.2
Opp DRB% 76.8
FTR 19.0
Opp FTR 19.0
# Subset Rankings Columns and Add Team Name Column Back
FF22_ranks <- round(FF22[,c(17,18,19,23,20,24,21,25,22,26)])
FF22_ranks <- cbind(FF22[,c(1,16)],FF22_ranks)

View(FF22_ranks)

### New York Knicks Analysis ###
FF21_div <- FF21_ranks %>%
  filter(Team == "NYK" | Team == "BKN" | Team == "BOS" | Team ==  "PHI" | Team == "TOR")
FF22_div <- FF22_ranks %>%
  filter(Team == "NYK" | Team == "BKN" | Team == "BOS" | Team ==  "PHI" | Team == "TOR")

View(FF21_div)
View(FF22_div)

### Correlation and Regression, 2021 ### 
# Remove Team, ORtg, Dtg from Stats for Correlation Matrix
FF_corr <- (FF21_stats[,c(-2, -3, -4)])

# Four Factors Correlation Matrix 
kable(round(cor(FF_corr[,unlist(lapply(FF_corr,is.numeric))]),3))
eFG Opp eFG TOV% Opp TOV% ORB% Opp DRB% FTR Opp FTR
eFG 1.000 -0.413 -0.147 -0.213 0.077 0.249 -0.064 0.033
Opp eFG -0.413 1.000 0.057 0.238 0.017 -0.359 -0.163 0.046
TOV% -0.147 0.057 1.000 0.278 0.111 0.104 -0.023 -0.154
Opp TOV% -0.213 0.238 0.278 1.000 -0.221 -0.359 0.076 0.474
ORB% 0.077 0.017 0.111 -0.221 1.000 0.452 0.148 -0.259
Opp DRB% 0.249 -0.359 0.104 -0.359 0.452 1.000 -0.067 -0.304
FTR -0.064 -0.163 -0.023 0.076 0.148 -0.067 1.000 0.145
Opp FTR 0.033 0.046 -0.154 0.474 -0.259 -0.304 0.145 1.000
# Regression Analysis for Predicting Wins
eFG_diff <- (FF21_stats$eFG - FF21_stats$`Opp eFG`)/100
TOV_diff <- (FF21_stats$`TOV%` - FF21_stats$`Opp TOV%`)/100
RB_diff <-  (FF21_stats$`ORB%` - FF21_stats$`Opp DRB%`)/100
FTR_diff <- (FF21_stats$FTR - FF21_stats$`Opp FTR`)/100

Regression_21 <- lm(FF21_stats$Wins ~ eFG_diff + TOV_diff + RB_diff + FTR_diff)
summary(Regression_21)
## 
## Call:
## lm(formula = FF21_stats$Wins ~ eFG_diff + TOV_diff + RB_diff + 
##     FTR_diff)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0475 -1.8753  0.4269  1.4246  7.5482 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    52.78      23.97   2.202  0.03714 *  
## eFG_diff      339.67      26.85  12.652 2.29e-12 ***
## TOV_diff     -190.38      58.83  -3.236  0.00340 ** 
## RB_diff        30.11      43.09   0.699  0.49110    
## FTR_diff      100.61      34.99   2.875  0.00814 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.826 on 25 degrees of freedom
## Multiple R-squared:  0.8741, Adjusted R-squared:  0.854 
## F-statistic:  43.4 on 4 and 25 DF,  p-value: 6.694e-11
# Correlation between Four Factors and Wins 
cor(FF21_stats$Wins, eFG_diff)
## [1] 0.8927872
cor(FF21_stats$Wins, TOV_diff)
## [1] -0.06954477
cor(FF21_stats$Wins, RB_diff)
## [1] -0.1119886
cor(FF21_stats$Wins, FTR_diff)
## [1] 0.1595572
# Filter New York Knicks 2021 Stats and FF Differentials
NYK21_stats <- FF21_stats %>%
  filter(Team == "NYK")

NYK_eFG_diff <- (NYK21_stats$eFG - NYK21_stats$`Opp eFG`)/100
NYK_TOV_diff <- (NYK21_stats$`TOV%` - NYK21_stats$`Opp TOV%`)/100
NYK_RB_diff <-  (NYK21_stats$`ORB%` - NYK21_stats$`Opp DRB%`)/100
NYK_FTR_diff <- (NYK21_stats$FTR - NYK21_stats$`Opp FTR`)/100

# Predict Knicks Number of Wins and Compare to Actual
NYK21_WPredict <- round(predict(Regression_21, newdata = list(eFG_diff = NYK_eFG_diff, 
                                      TOV_diff = NYK_TOV_diff,
                                      RB_diff  = NYK_RB_diff,
                                      FTR_diff = NYK_FTR_diff)))
summary(NYK21_WPredict)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      40      40      40      40      40      40
NYK21_Windiff <- NYK21_WPredict - NYK21_stats$Wins
NYK21_Windiff
##  1 
## -1
### Clusters ### 
scrape_stats <- function(season){
  # Player Total Stats 
  url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_totals.html")
  stats_tot <- url %>% 
    read_html() %>% 
    html_table() %>% 
    .[[1]]
  
  player_stats_tot <- stats_tot %>% 
    remove_empty(which = "cols") %>%
    clean_names() %>% 
    dplyr::filter(!player=="Player") %>%
    mutate_at(vars(-c(player,tm,pos)),as.numeric) %>% 
    mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>% 
    as_tibble() %>% 
    group_by(player) %>% 
    slice(1) %>% 
    ungroup() %>% 
    select(-rk)
  
  # Player Per 100 Poss Stats 
  url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_per_poss.html")
  stats_p100 <- url %>% 
    read_html() %>% 
    html_table() %>% 
    .[[1]]
  
  player_stats_p100 <- stats_p100 %>% 
    remove_empty(which = "cols") %>%
    clean_names() %>% 
    dplyr::filter(!player=="Player") %>%
    mutate_at(vars(-c(player,tm,pos)),as.numeric) %>% 
    mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>% 
    as_tibble() %>% 
    group_by(player) %>% 
    slice(1) %>% 
    ungroup() %>% 
    rename_at(vars(9:29),funs(paste0(.,"_p100"))) %>% 
    select(-rk)
  
  # Player Advanced Stats
  url <- paste0("https://www.basketball-reference.com/leagues/NBA_",season,"_advanced.html")
  stats_adv <- url %>% 
    read_html() %>% 
    html_table() %>% 
    .[[1]]
  
  player_stats_adv <- stats_adv %>% 
    remove_empty(which = "cols") %>%
    clean_names() %>% 
    dplyr::filter(!player=="Player") %>%
    mutate_at(vars(-c(player,tm,pos)),as.numeric) %>% 
    mutate_at(vars(-c(player,tm,pos)), funs(replace(., is.na(.), 0))) %>% 
    as_tibble() %>% 
    group_by(player) %>% 
    slice(1) %>% 
    ungroup() %>% 
    select(-rk)
  
  player_stats <- full_join(player_stats_tot,player_stats_p100,
                            by = c("player", "pos", "age", "tm", "g", "gs", "mp")) %>% 
    full_join(player_stats_adv,
              by = c("player", "pos", "age", "tm", "g", "mp"))
  return(player_stats)
}

# Scrape last three seasons of Player Stats
player_stats <- map_dfr(2019:2021, scrape_stats)

# Filter and Summarize Stats for Cluster Analysis
player_stats_final <- player_stats %>%
  filter (mp >= 1500) %>%
  group_by (player) %>%
  summarize(
    Team = last(tm),
    Games = sum(g),
    Mins = sum(mp),
    eFG = mean(e_fg_percent),
    P3M_100 = mean(x3p_p100),
    P3p = mean(x3p_percent_p100),
    FTp = mean(ft_percent_p100),
    PTS_100 = mean(pts_p100),
    ORtg = mean(o_rtg),
    DRtg = mean(d_rtg),
    P3Ar = mean(x3p_ar),
    FTR = mean(f_tr),
    ORBp = mean(orb_percent),
    DRBp= mean(drb_percent),
    ASTp = mean(ast_percent),
    STLp = mean(stl_percent),
    BLKp = mean(blk_percent),
    TOVp = mean(tov_percent),
    USGp = mean(usg_percent),
    PER = mean(per),
    BPM = mean(bpm),
    VORP = mean(vorp)
  )

### Basketball Data Science Clustering Approach ### 

# Create stats-only vector
player_stats_clu <- player_stats_final[,c(5:23)]

# Identify Cluster Number 
findk <- hclustering(player_stats_clu, nclumax = 12)
plot(findk)

# Radial Plots by Cluster
radials <-hclustering(player_stats_clu, labels = player_stats_final$player, k=10)
plot(radials, profiles = TRUE)

#Dendrogram
plot(radials, rect = TRUE, colored.branches = TRUE, cex.labels = .25)

# Subset for Plots
clu_subset <- subset(player_stats_final, Mins >= 1500)

Mins <- clu_subset$Mins

Scale_stats <- data.frame(radials$Subjects, scale(player_stats_clu), Mins)

dvar <- c("eFG","P3M_100","P3p","FTp","PTS_100","ORtg","DRtg","P3Ar","FTR",
          "ORBp","DRBp","ASTp","STLp","BLKp","TOVp", "USGp", "PER", "BPM", "VORP")
svar <- "Mins"
yRange <- range(Scale_stats[,dvar])
sizeRange <- c(1500,7500)
no.clu <- 10

p <- vector(no.clu, mode = "list")

for(k in 1:no.clu) {
  Clusters <- subset(Scale_stats, Cluster == k)
  vrb <- variability(Clusters[,3:22], data.var = dvar,
                     size.var = svar, weight = FALSE, VC = FALSE)
  title <- paste ("Cluster", k)
  p[[k]] <- plot(vrb, size.lim = sizeRange, ylim = yRange, title = title,
                 leg.pos = c(0,1), leg.just = c(-0.5,0),
                 leg.box = "vertical", leg.brk = seq(1500,7500,1500),
                 leg.title.pos = "left", leg.nrow = 1, max.circle = 7)
}

grid.arrange(grobs = p, ncol = 2)
## Warning: Removed 19 rows containing missing values (geom_point).

C1 <- Scale_stats %>%
  filter(Cluster == 1)
C1_Players <- C1$Label

C2 <- Scale_stats %>%
  filter(Cluster == 2)
C2_Players <- C2$Label

C3 <- Scale_stats %>%
  filter(Cluster == 3)
C3_Players <- C3$Label

C4 <- Scale_stats %>%
  filter(Cluster == 4)
C4_Players <- C4$Label

C5 <- Scale_stats %>%
  filter(Cluster == 5)
C5_Players <- C5$Label

C6 <- Scale_stats %>%
  filter(Cluster == 6)
C6_Players <- C6$Label

C7 <- Scale_stats %>%
  filter(Cluster == 7)
C7_Players <- C7$Label

C8 <- Scale_stats %>%
  filter(Cluster == 8)
C8_Players <- C8$Label

C9 <- Scale_stats %>%
  filter(Cluster == 9)
C9_Players <- C9$Label

C10 <- Scale_stats %>%
  filter(Cluster == 10)
C10_Players <- C10$Label

kable(C1_Players, col.names = "Cluster 1")
Cluster 1
Aaron Gordon
Al-Farouq Aminu
Al Horford
Andrew Wiggins
Brook Lopez
Carmelo Anthony
Dario Šaric
DeMarre Carroll
Dewayne Dedmon
Eric Paschall
Glenn Robinson III
Harrison Barnes
Jabari Parker
Jae’Sean Tate
Jaren Jackson Jr.
Jaylen Brown
Jeff Green
Jerami Grant
Jonathan Isaac
Justin Jackson
Keldon Johnson
Kelly Olynyk
Kelly Oubre Jr.
Kristaps Porzingis
Kyle Anderson
Kyle Kuzma
Marc Gasol
Maxi Kleber
Miles Bridges
Myles Turner
Nemanja Bjelica
Noah Vonleh
P.J. Washington
Patrick Williams
Rodney Hood
Rui Hachimura
T.J. Warren
Torrey Craig
kable(C2_Players, col.names = "Cluster 2")
Cluster 2
Aaron Holiday
Anthony Edwards
Avery Bradley
Cam Reddish
Cedi Osman
Coby White
Collin Sexton
Damyean Dotson
Darius Bazley
Darius Garland
De’Andre Hunter
Dennis Schröder
Dillon Brooks
Dwayne Bacon
Emmanuel Mudiay
Gary Harris
Isaac Okoro
Jarrett Culver
Josh Jackson
Josh Okogie
Josh Richardson
Kent Bazemore
Kevin Huerter
Kevin Knox
Lonnie Walker IV
Luguentz Dort
RJ Barrett
Taurean Prince
Théo Maledon
Trevor Ariza
Tyler Herro
Tyler Johnson
Will Barton
kable(C3_Players, col.names = "Cluster 3")
Cluster 3
Alec Burks
Bogdan Bogdanovic
Bojan Bogdanovic
Buddy Hield
CJ McCollum
Danilo Gallinari
Devonte’ Graham
Evan Fournier
Gordon Hayward
Jamal Murray
Jayson Tatum
Jeremy Lamb
Jordan Clarkson
Kendrick Nunn
Kevin Love
Khris Middleton
Klay Thompson
Lauri Markkanen
Malcolm Brogdon
Marcus Morris
Michael Porter Jr.
Norman Powell
Pascal Siakam
Reggie Jackson
Rudy Gay
Terrence Ross
Terry Rozier
Tobias Harris
kable(C4_Players, col.names = "Cluster 4")
Cluster 4
Alex Len
Daniel Theis
Deandre Ayton
Dwight Powell
Enes Freedom
Ivica Zubac
Jarrett Allen
John Collins
Jonas Valanciunas
Khem Birch
LaMarcus Aldridge
Marvin Bagley III
Richaun Holmes
Robin Lopez
Serge Ibaka
Taj Gibson
Tristan Thompson
kable(C5_Players, col.names = "Cluster 5")
Cluster 5
Andre Drummond
Bam Adebayo
Ben Simmons
Clint Capela
DeAndre Jordan
Derrick Favors
Hassan Whiteside
Jakob Poeltl
JaVale McGee
Jusuf Nurkic
Mason Plumlee
Montrezl Harrell
Nerlens Noel
Rudy Gobert
Steven Adams
kable(C6_Players, col.names = "Cluster 6")
Cluster 6
Andre Iguodala
Austin Rivers
Danny Green
Danuel House Jr.
Darius Miller
Donte DiVincenzo
Dorian Finney-Smith
Garrett Temple
Jaden McDaniels
Jae Crowder
Josh Hart
Justin Holiday
Kentavious Caldwell-Pope
Marvin Williams
Nicolas Batum
OG Anunoby
P.J. Tucker
Pat Connaughton
Patrick Beverley
Robert Covington
Rodney McGruder
Royce O’Neale
Solomon Hill
Terrance Ferguson
Wesley Matthews
kable(C7_Players, col.names = "Cluster 7")
Cluster 7
Anthony Davis
Blake Griffin
Damian Lillard
Domantas Sabonis
Giannis Antetokounmpo
James Harden
Jimmy Butler
Joel Embiid
Julius Randle
Karl-Anthony Towns
Kawhi Leonard
Kevin Durant
Kyrie Irving
LeBron James
Luka Doncic
Mike Conley
Nikola Jokic
Nikola Vucevic
Paul George
Russell Westbrook
Stephen Curry
Zion Williamson
kable(C8_Players, col.names = "Cluster 8")
Cluster 8
Ben McLemore
Bryn Forbes
Davis Bertans
Desmond Bane
Doug McDermott
Duncan Robinson
Eric Gordon
Furkan Korkmaz
Gary Trent Jr.
J.J. Redick
Joe Harris
Landry Shamet
Langston Galloway
Malik Beasley
Marco Belinelli
Patty Mills
Reggie Bullock
Saddiq Bey
Seth Curry
Tim Hardaway Jr.
Tony Snell
kable(C9_Players, col.names = "Cluster 9")
Cluster 9
Bradley Beal
Brandon Ingram
D’Angelo Russell
De’Aaron Fox
DeMar DeRozan
Devin Booker
Donovan Mitchell
Goran Dragic
Ja Morant
Kemba Walker
Lou Williams
Spencer Dinwiddie
Trae Young
Zach LaVine
kable(C10_Players, col.names = "Cluster 10")
Cluster 10
Bruce Brown
Chris Paul
Cory Joseph
D.J. Augustin
Darren Collison
DeAndre’ Bembry
Dejounte Murray
Delon Wright
Dennis Smith Jr.
Derrick White
Draymond Green
Dwyane Wade
Eric Bledsoe
Evan Turner
Fred VanVleet
Ish Smith
Jalen Brunson
Joe Ingles
Jrue Holiday
Justise Winslow
Kyle Lowry
Larry Nance Jr.
Lonzo Ball
Marcus Smart
Markelle Fultz
Mikal Bridges
Monte Morris
Otto Porter Jr.
Paul Millsap
Ricky Rubio
Ryan Arcidiacono
Shai Gilgeous-Alexander
T.J. McConnell
Thaddeus Young
Tomáš Satoranský
Troy Brown Jr.
Tyrese Haliburton
Tyus Jones
Willie Cauley-Stein
Player_Cluster <-Scale_stats[,2] 
player_stats_all <- cbind(player_stats_final, Player_Cluster)

# Boxplot Comparisons by Cluster
par(mfrow = c(1,3))
boxplot(Mins ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Minutes Played by Cluster", xlab = "Cluster", 
        ylab = "Minutes Played")
boxplot(eFG ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Effective FG% by Cluster", xlab = "Cluster", 
        ylab = "eFG%")
boxplot(PTS_100 ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Points Per 100 Poss. by Cluster", xlab = "Cluster", 
        ylab = "Points Per 100 Poss.")

boxplot(P3Ar ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "3-Pt Att. Rate by Cluster", xlab = "Cluster", 
        ylab = "3-Pt Att. Rate")
boxplot(P3M_100 ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "3s Made Per 100 Poss. by Cluster", xlab = "Cluster", 
        ylab = "3s Made Per 100 Poss.")
boxplot(P3p ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "3-Pt % by Cluster", xlab = "Cluster", 
        ylab = "3-Pt %")

par(mfrow = c(1,2))
boxplot(FTR ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Free Throw Rate by Cluster", xlab = "Cluster", 
        ylab = "Free Throw Rate")
boxplot(FTp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Free Throw % by Cluster", xlab = "Cluster", 
        ylab = "Free Throw %")

par(mfrow = c(1,3))
boxplot(ORtg ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Off. Rating by Cluster", xlab = "Cluster", 
        ylab = "Off. Rating")
boxplot(DRtg ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Def. Rating by Cluster", xlab = "Cluster", 
        ylab = "Def. Rating")
boxplot(USGp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Usage % by Cluster", xlab = "Cluster", 
        ylab = "Usage %")

boxplot(ORBp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Off. Rebound % by Cluster", xlab = "Cluster", 
        ylab = "Off. Rebound %")
boxplot(DRBp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Def. Rebound % by Cluster", xlab = "Cluster", 
        ylab = "Def. Rebound %")
boxplot(BLKp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Block Rate by Cluster", xlab = "Cluster", 
        ylab = "Block Rate")

boxplot(ASTp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Assist Rate by Cluster", xlab = "Cluster", 
        ylab = "Assist Rate")
boxplot(STLp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Steal Rate by Cluster", xlab = "Cluster", 
        ylab = "Steal Rate")
boxplot(TOVp ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "Turnover Rate by Cluster", xlab = "Cluster", 
        ylab = "Turnover Rate")

boxplot(PER ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "PER by Cluster", xlab = "Cluster", 
        ylab = "PER")
boxplot(BPM ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "BPM by Cluster", xlab = "Cluster", 
        ylab = "BPM")
boxplot(VORP ~ Player_Cluster, data = player_stats_all, col = rainbow(12),
        main = "VORP by Cluster", xlab = "Cluster", 
        ylab = "VORP")

par(mfrow = c(1,1))