input <-  read.csv("/home/vijay/Documents/xcelR banglore/datasets from excelr website/Hierarchial Clustering/Universities.csv")
normalized_data <-  scale(input[,2:7])
fit <- kmeans(normalized_data,3) # cluster solution
final2 <- data.frame(fit$cluster, input) # append cluster membershio
aggregate(input[,2:7], by = list(fit$cluster), FUN = mean)
##   Group.1      SAT    Top10   Accept   SFRatio Expenses GradRate
## 1       1 1262.500 78.50000 40.08333 12.833333 22858.00 87.58333
## 2       2 1362.778 90.55556 24.33333  9.666667 41176.89 92.22222
## 3       3 1061.500 38.75000 70.00000 19.250000  9953.00 71.75000
library(readr)
wss = (nrow(normalized_data)-1)*sum(apply(normalized_data, 2, var)) # determine the number of clusters
for (i in 2:8) wss[i] = sum(kmeans(normalized_data, centers = i)$withinss)
plot(1:8, wss, type = "b", xlab = "number of clusters", ylab = "within groups sum of squares")  
title(sub = " K -means clustering Screen-Plot")