Import Data
library(rio)
library("cluster")
Fulldata <- read.csv("/Users/Lorraine/Desktop/Project 5(1).csv")
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
Uni <- subset(Fulldata, select = c(YEAR, TYPE, UNDER, VARS, NCAA, MASCOT))
Hierarchical Cluster
dist.eucl <- dist(Uni, method = "euclidean")
## Warning in dist(Uni, method = "euclidean"): NAs introduced by coercion
dist.eucl
## 1 2 3 4 5 6
## 2 31.407006
## 3 28.982753 56.794366
## 4 21.661025 42.213742 17.355115
## 5 28.607691 5.366563 53.171421 38.277931
## 6 22.636254 42.099881 23.773935 18.000000 40.114835
## 7 69.713700 39.587877 95.912460 81.188669 44.063590 78.376017
## 8 87.963629 114.797213 59.819729 73.818697 111.509641 74.578817
## 9 27.121947 7.014271 54.254954 41.133928 9.099451 39.889848
## 10 50.414284 77.907638 21.577766 36.545862 74.215901 41.323117
## 11 14.979987 19.068298 38.930708 25.408660 14.979987 29.678275
## 12 35.445733 60.527680 9.859006 18.718974 56.423399 29.718681
## 13 20.287927 28.670542 33.424542 20.079841 26.765650 14.449913
## 14 53.001887 78.894867 25.099801 38.042082 75.569835 39.648455
## 15 29.939940 51.252317 18.428239 17.731328 48.707289 10.620734
## 16 60.329097 85.479822 34.170162 46.294708 82.595399 44.389188
## 17 21.661025 41.698921 21.661025 14.696938 39.329378 3.464102
## 18 20.493902 44.145215 15.798734 11.009087 41.221354 8.831761
## 19 18.718974 41.655732 17.111400 6.099180 37.469988 21.326040
## 20 22.899782 34.205263 39.023070 32.236625 34.415113 18.264720
## 21 35.156792 50.864526 32.422215 29.738863 49.815660 13.371612
## 22 13.813037 21.990907 35.445733 21.410278 17.899721 26.381812
## 23 31.349641 51.158577 20.985709 19.036806 48.756538 10.733126
## 7 8 9 10 11 12
## 2
## 3
## 4
## 5
## 6
## 7
## 8 152.585714
## 9 42.694262 112.649900
## 10 116.869158 39.038443 75.649190
## 11 58.491025 98.144791 17.181385 60.209634
## 12 99.745677 57.456070 58.950827 19.318385 42.680206
## 13 65.038450 87.560265 27.756080 52.865868 19.411337 37.629775
## 14 116.956402 35.949965 76.931138 9.979980 62.373071 23.443549
## 15 88.038628 64.631262 49.501515 32.255232 37.373788 22.768399
## 16 122.449990 30.731092 83.455377 19.989997 70.022853 33.728326
## 17 78.696887 73.981079 39.708941 39.814570 28.270126 27.099815
## 18 82.282440 70.806779 41.928511 35.088460 28.628657 21.605555
## 19 81.107336 75.641259 40.054962 37.341666 23.571169 19.193749
## 20 66.118076 90.126578 30.436820 57.664547 28.354894 46.346521
## 21 83.642095 72.315973 49.002041 44.819639 41.612498 37.773006
## 22 61.530480 94.392796 20.493902 56.593286 4.098780 38.976916
## 23 87.409382 65.461439 49.646752 34.064644 38.042082 24.787093
## 13 14 15 16 17 18
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14 52.007692
## 15 23.134390 29.597297
## 16 57.570826 10.954451 34.467376
## 17 14.028542 38.605699 9.919677 44.063590
## 18 18.231840 35.105555 9.859006 41.698921 6.292853
## 19 22.742032 40.234314 21.990907 49.050994 18.297541 13.769532
## 20 17.320508 56.370205 28.523674 60.119880 20.493902 24.955961
## 21 23.622024 40.264128 15.059880 41.698921 15.987495 20.668817
## 22 16.613248 58.562787 33.621422 66.217822 24.738634 24.955961
## 23 22.715633 30.789609 2.683282 35.088460 10.392305 11.644741
## 19 20 21 22
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20 33.603571
## 21 33.905752 22.423202
## 22 19.929877 27.232334 38.559046
## 23 23.672769 28.354894 13.371612 34.292856
#Hierarchical Clustering -- Agglomerative with Dendrogram
res.hc <- hclust(d = dist.eucl, method = "ward.D2")
fviz_dend(res.hc, cex = 0.5)

fviz_dend(res.hc, k = 3, kcolors = 3, palette = "simpsons", cex = 0.5)

K-Means cluster analysis
#5 cluster
Uni$UNDER <- as.numeric(Uni$UNDER)
fviz_nbclust(Uni, kmeans, k.max = 5, method = "gap_stat")

km.res <- kmeans(Uni, 5, nstart = 25)
fviz_cluster(km.res, data = Uni, k = 5, kcolors = 5, palette = "uchicago", cex = 0.5)
## Warning: Duplicated aesthetics after name standardisation: size

km.res1 <- kmeans(Uni, 3, nstart = 25)
fviz_cluster(km.res1, data = Uni, k = 3, kcolors = 3, palette = "uchicago", cex = 0.5)
## Warning: Duplicated aesthetics after name standardisation: size

km.res2 <- kmeans(Uni, 4, nstart = 25)
fviz_cluster(km.res2, data = Uni, k = 4, kcolors = 4, palette = "uchicago", cex = 0.5)
## Warning: Duplicated aesthetics after name standardisation: size
