Hierarchical Clustering
# load package 'readxl' to load data from xlsx file
library(readxl)
## Warning: package 'readxl' was built under R version 3.4.4
input <- read_excel("C:/Users/yogesh.thimmegowda/Desktop/SKY/R Codes/Clustering/University_Clustering.xlsx")
#View(input)
mydata <- input[1:25,c(1,3:8)]
#View(mydata)
normalized_data<-scale(mydata[,2:7]) # Excluding the university name column before normalizing the data
d<-dist(normalized_data,method="euclidean") # Distance matrix
d
## 1 2 3 4 5 6 7
## 2 3.7857938
## 3 3.5430636 4.1848461
## 4 1.1531123 3.1908062 2.7831649
## 5 0.8254086 3.8460831 2.8112483 0.9853405
## 6 1.0321529 2.9846624 3.5437922 1.1695165 1.4203093
## 7 0.7879842 3.2655674 3.4195166 1.1003951 1.0375485 0.6520549
## 8 1.0023943 4.0861104 3.0109877 1.0421418 0.7743085 1.5037735 1.3482156
## 9 1.6050500 2.8206003 4.2390400 1.7026482 2.1590538 0.9460827 1.2992183
## 10 3.2805612 2.0559324 3.1030668 2.4711581 3.0557018 2.5422276 2.6325334
## 11 1.4188069 2.5317720 3.3203915 1.3317046 1.6013507 0.7474162 0.9438530
## 12 1.2968620 3.3913483 2.4973647 1.0656666 0.7583965 1.3511703 1.0445978
## 13 1.3157492 4.4925822 3.0064051 1.7164830 0.8279045 1.9141752 1.5054133
## 14 4.3171337 6.6524863 3.3246017 3.9678543 3.6404851 4.8991316 4.5191196
## 15 1.5263446 2.9392629 3.8980026 1.6688113 2.0042855 0.7635247 1.3939767
## 16 6.3517374 8.0714998 4.2949603 5.9159998 5.5800729 6.8296647 6.4227353
## 17 1.0984372 2.8379459 3.6581824 1.0931552 1.5171256 0.6623822 0.7693644
## 18 5.7162085 7.7424127 4.5372101 5.4290964 5.0591391 6.4025826 5.9297396
## 19 2.3890592 4.6851664 2.9363624 2.4594499 1.8928988 3.0929822 2.6712584
## 20 2.0846640 3.3231017 2.3019346 1.4398677 1.5301082 2.0285523 1.6550634
## 21 3.1913126 5.4826103 2.4975374 3.0079455 2.4295076 3.6936140 3.2173864
## 22 1.1494113 3.3976259 2.5479464 0.8062669 0.6681441 1.1720119 0.9366030
## 23 1.6643553 4.7822435 2.8844732 1.9151260 1.0590191 2.3277861 1.8949020
## 24 4.8579911 6.5384195 2.6773901 4.3438724 4.0727756 5.2769065 4.9465303
## 25 1.6882067 2.5461162 4.1262865 1.7067347 2.1416714 0.9657764 1.2000969
## 8 9 10 11 12 13 14
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 2.2330469
## 10 3.1948284 2.7767763
## 11 1.9444887 1.1485169 2.3247902
## 12 1.1716344 2.1870609 2.4553871 1.4076914
## 13 1.0917100 2.6987736 3.4949551 2.1624417 1.1906306
## 14 3.5677474 5.5531073 5.2320055 5.0664881 3.8261280 3.4440555
## 15 1.9532207 1.0305916 2.8537614 1.1065707 1.9578142 2.4747924 5.4430301
## 16 5.6671223 7.5181213 6.5908909 6.8378478 5.5946826 5.3526678 2.3723788
## 17 1.7177675 0.7296074 2.5808399 0.7974530 1.5620122 2.1577457 4.9305912
## 18 5.2560351 6.9474483 6.6425435 6.3807336 5.2827001 4.9864409 2.4062530
## 19 2.4216283 3.6597256 4.1961141 2.9700953 2.1802719 2.1531056 3.3891744
## 20 1.9520641 2.5505847 2.0664907 1.8632703 1.2064514 1.8870687 3.6489650
## 21 2.7086609 4.4104528 4.1228945 3.7104626 2.4964381 2.1037634 1.9872874
## 22 0.9572291 1.9941031 2.4497464 1.3076787 0.3948852 1.1624396 3.8311793
## 23 1.2612810 3.0953932 3.7017207 2.5534614 1.4250197 0.5035883 2.9699570
## 24 4.1198754 5.9996772 5.1479941 5.2889154 4.0570101 3.9403360 1.4611148
## 25 2.3114020 0.4984911 2.4786797 1.0197810 2.0408670 2.6961591 5.5256172
## 15 16 17 18 19 20 21
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16 7.3969180
## 17 1.1475009 6.8621046
## 18 6.9859561 2.2737148 6.2609603
## 19 3.5837419 4.9416046 2.9591368 3.9162571
## 20 2.6897220 5.2089193 1.9603148 4.9276322 2.4655789
## 21 4.3190653 3.3672088 3.7679123 3.2687492 2.3156778 2.2897764
## 22 1.7711099 5.6758983 1.3972163 5.3640129 2.3411752 1.2193516 2.5852501
## 23 2.8985394 4.8952748 2.5096574 4.5152111 1.9663610 1.9593956 1.7013168
## 24 5.7814732 1.7643346 5.3459976 2.5535840 3.6204225 3.8069831 2.2222077
## 25 1.2297027 7.4131967 0.7056109 6.8636551 3.5608604 2.3451126 4.3077487
## 22 23 24
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23 1.4343185
## 24 4.1222095 3.5137427
## 25 1.9212796 3.0809361 5.9158514
fit<-hclust(d,method="complete")
fit1<-hclust(d,method="single")
plot(fit) # Display Dendrogram

plot(fit1,hang=-1)

groups <- cutree(fit,k=5)
groups
## [1] 1 2 3 1 1 2 2 1 2 2 2 1 1 4 2 5 2 5 1 1 4 1 1 4 2
#?cutree
rect.hclust(fit,plot(fit,hang=-1),k=3,border="red")

#?rect.hclust
membership <- as.matrix(groups)
final<-data.frame(membership,mydata)
#View(final)
# Load the package for writing the data into xlsx file format
#library(xlsx)
#write.csv(final,file="final123.csv")
#getwd()