library(tidyverse)
## ── Attaching packages ────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.3.0.9000     ✔ purrr   0.3.2     
## ✔ tibble  2.1.3          ✔ dplyr   0.8.3     
## ✔ tidyr   0.8.3          ✔ stringr 1.4.0     
## ✔ readr   1.3.1          ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(cluster)
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.6.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
library(ggplot2)
rm(list=ls())

nba19 <- read_csv(file='https://raw.githubusercontent.com/jdumalig/DataBank/master/nba_2019.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Player = col_character(),
##   Pos = col_character(),
##   Tm = col_character()
## )
## See spec(...) for full column specifications.
nba19 <- 
  nba19 %>%
  na.omit() %>%
  select(Player, `FG%`, FGA, `3P%`, `3PA`, `FT%`, ORB, DRB, AST, STL, BLK, TOV, PTS)

nba19$FGA <- (nba19$FGA - min(nba19$FGA))/(max(nba19$FGA)-min(nba19$FGA))
nba19$`3PA` <- (nba19$`3PA` - min(nba19$`3PA`))/(max(nba19$`3PA`)-min(nba19$`3PA`))
nba19$ORB <- (nba19$ORB - min(nba19$ORB))/(max(nba19$ORB)-min(nba19$ORB))
nba19$DRB <- (nba19$DRB - min(nba19$DRB))/(max(nba19$DRB)-min(nba19$DRB))
nba19$AST <- (nba19$AST - min(nba19$AST))/(max(nba19$AST)-min(nba19$AST))
nba19$STL <- (nba19$STL - min(nba19$STL))/(max(nba19$STL)-min(nba19$STL))
nba19$BLK <- (nba19$BLK - min(nba19$BLK))/(max(nba19$BLK)-min(nba19$BLK))
nba19$TOV <- (nba19$TOV - min(nba19$TOV))/(max(nba19$TOV)-min(nba19$TOV))
nba19$PTS <- (nba19$PTS - min(nba19$PTS))/(max(nba19$PTS)-min(nba19$PTS))

rownames(nba19) <- nba19$Player
## Warning: Setting row names on a tibble is deprecated.
d <- dist(select(nba19, -Player), method='euclidean')
hc1 <- hclust(d, method='complete')
plot(hc1, cex=0.6, hang=-1)

clust <- cutree(hc1, k=10)
nba19$Hierarchical <- clust

fviz_cluster(list(data = select(nba19, -Player), cluster = clust))

#write_csv(nba19, path = "/Users/jerduma/Downloads/hierarchical.csv")