Implementation with ANIMATION clustering
input <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering\\Universities.csv")
mydata <- input
normalized_data <- scale(mydata[,2:7])
fit <- kmeans(normalized_data, 5) # 5 cluster solution
str(fit)
## List of 9
## $ cluster : int [1:25] 3 4 1 3 3 2 2 3 2 4 ...
## $ centers : num [1:5, 1:6] -0.367 0.8964 0.0739 0.8634 -1.8913 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "1" "2" "3" "4" ...
## .. ..$ : chr [1:6] "SAT" "Top10" "Accept" "SFRatio" ...
## $ totss : num 144
## $ withinss : num [1:5] 6.74 2.83 6.63 2.11 7.09
## $ tot.withinss: num 25.4
## $ betweenss : num 119
## $ size : int [1:5] 3 7 9 2 4
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
fit$centers
## SAT Top10 Accept SFRatio Expenses GradRate
## 1 -0.36704889 -0.1276120 0.8347143 0.31470124 -0.6130148 -0.9259078
## 2 0.89637905 0.7692006 -0.9008542 -0.52824852 0.5606384 0.8668162
## 3 0.07386915 0.1811267 -0.2185352 -0.06774818 -0.2143826 0.4357213
## 4 0.86342006 0.5670502 -0.2382484 -1.52925136 2.3393604 -0.3002944
## 5 -1.89129229 -1.9414523 1.5612876 1.60546806 -1.2086753 -1.6527233
final2<- data.frame(mydata, fit$cluster) # append cluster membership
str(final2)
## 'data.frame': 25 obs. of 8 variables:
## $ Univ : Factor w/ 25 levels "Brown","CalTech",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ SAT : int 1310 1415 1260 1310 1280 1340 1315 1255 1400 1305 ...
## $ Top10 : int 89 100 62 76 83 89 90 74 91 75 ...
## $ Accept : int 22 25 59 24 33 23 30 24 14 44 ...
## $ SFRatio : int 13 6 9 12 13 10 12 12 11 7 ...
## $ Expenses : int 22704 63575 25026 31510 21864 32162 31585 20126 39525 58691 ...
## $ GradRate : int 94 81 72 88 90 95 95 92 97 87 ...
## $ fit.cluster: int 3 4 1 3 3 2 2 3 2 4 ...
View(final2)
x <- aggregate(mydata[,2:7], by=list(fit$cluster), FUN=mean)
View(x)
#install.packages("animation")
library(animation)
x1 <- kmeans.ani(normalized_data, 5)















