K Means Clusting

University Dataset

Implementation with ANIMATION clustering

input <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering\\Universities.csv")

mydata <- input

normalized_data <- scale(mydata[,2:7])
fit <- kmeans(normalized_data, 5) # 5 cluster solution
str(fit)

## List of 9
##  $ cluster     : int [1:25] 3 4 1 3 3 2 2 3 2 4 ...
##  $ centers     : num [1:5, 1:6] -0.367 0.8964 0.0739 0.8634 -1.8913 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:5] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:6] "SAT" "Top10" "Accept" "SFRatio" ...
##  $ totss       : num 144
##  $ withinss    : num [1:5] 6.74 2.83 6.63 2.11 7.09
##  $ tot.withinss: num 25.4
##  $ betweenss   : num 119
##  $ size        : int [1:5] 3 7 9 2 4
##  $ iter        : int 2
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"

fit$centers

##           SAT      Top10     Accept     SFRatio   Expenses   GradRate
## 1 -0.36704889 -0.1276120  0.8347143  0.31470124 -0.6130148 -0.9259078
## 2  0.89637905  0.7692006 -0.9008542 -0.52824852  0.5606384  0.8668162
## 3  0.07386915  0.1811267 -0.2185352 -0.06774818 -0.2143826  0.4357213
## 4  0.86342006  0.5670502 -0.2382484 -1.52925136  2.3393604 -0.3002944
## 5 -1.89129229 -1.9414523  1.5612876  1.60546806 -1.2086753 -1.6527233

final2<- data.frame(mydata, fit$cluster) # append cluster membership
str(final2)

## 'data.frame':    25 obs. of  8 variables:
##  $ Univ       : Factor w/ 25 levels "Brown","CalTech",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ SAT        : int  1310 1415 1260 1310 1280 1340 1315 1255 1400 1305 ...
##  $ Top10      : int  89 100 62 76 83 89 90 74 91 75 ...
##  $ Accept     : int  22 25 59 24 33 23 30 24 14 44 ...
##  $ SFRatio    : int  13 6 9 12 13 10 12 12 11 7 ...
##  $ Expenses   : int  22704 63575 25026 31510 21864 32162 31585 20126 39525 58691 ...
##  $ GradRate   : int  94 81 72 88 90 95 95 92 97 87 ...
##  $ fit.cluster: int  3 4 1 3 3 2 2 3 2 4 ...

View(final2)

x <- aggregate(mydata[,2:7], by=list(fit$cluster), FUN=mean)
View(x)

#install.packages("animation")
library(animation)

x1 <- kmeans.ani(normalized_data, 5)