library(tidyverse)
## ── Attaching packages ────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.3.0.9000 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(cluster)
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.6.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
library(ggplot2)
rm(list=ls())
nba19 <- read_csv(file='https://raw.githubusercontent.com/jdumalig/DataBank/master/nba_2019.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## Player = col_character(),
## Pos = col_character(),
## Tm = col_character()
## )
## See spec(...) for full column specifications.
nba19 <-
nba19 %>%
na.omit() %>%
select(Player, `FG%`, FGA, `3P%`, `3PA`, `FT%`, ORB, DRB, AST, STL, BLK, TOV, PTS)
nba19$FGA <- (nba19$FGA - min(nba19$FGA))/(max(nba19$FGA)-min(nba19$FGA))
nba19$`3PA` <- (nba19$`3PA` - min(nba19$`3PA`))/(max(nba19$`3PA`)-min(nba19$`3PA`))
nba19$ORB <- (nba19$ORB - min(nba19$ORB))/(max(nba19$ORB)-min(nba19$ORB))
nba19$DRB <- (nba19$DRB - min(nba19$DRB))/(max(nba19$DRB)-min(nba19$DRB))
nba19$AST <- (nba19$AST - min(nba19$AST))/(max(nba19$AST)-min(nba19$AST))
nba19$STL <- (nba19$STL - min(nba19$STL))/(max(nba19$STL)-min(nba19$STL))
nba19$BLK <- (nba19$BLK - min(nba19$BLK))/(max(nba19$BLK)-min(nba19$BLK))
nba19$TOV <- (nba19$TOV - min(nba19$TOV))/(max(nba19$TOV)-min(nba19$TOV))
nba19$PTS <- (nba19$PTS - min(nba19$PTS))/(max(nba19$PTS)-min(nba19$PTS))
rownames(nba19) <- nba19$Player
## Warning: Setting row names on a tibble is deprecated.
d <- dist(select(nba19, -Player), method='euclidean')
hc1 <- hclust(d, method='complete')
plot(hc1, cex=0.6, hang=-1)

clust <- cutree(hc1, k=10)
nba19$Hierarchical <- clust
fviz_cluster(list(data = select(nba19, -Player), cluster = clust))

#write_csv(nba19, path = "/Users/jerduma/Downloads/hierarchical.csv")