Dendrogram using Complete Linkage
library(readxl)
getwd()
## [1] "C:/Users/RISHI RAHUL/Desktop/Dataset/3 Data Sets/6 Hierarchial Clustering"
setwd("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering")
mydata <- read_excel("C:\\Users\\RISHI RAHUL\\Desktop\\Dataset\\3 Data Sets\\6 Hierarchial Clustering\\University.xlsx")
input <- mydata
normalized_data <- scale(mydata[,2:7]) # Excluding the university name
View(normalized_data)
# Calculate Distance
d <- dist(normalized_data,method="euclidean") # Distance matrix
d
## 1 2 3 4 5 6 7
## 2 3.7857938
## 3 3.5430636 4.1848461
## 4 1.1531123 3.1908062 2.7831649
## 5 0.8254086 3.8460831 2.8112483 0.9853405
## 6 1.0321529 2.9846624 3.5437922 1.1695165 1.4203093
## 7 0.7879842 3.2655674 3.4195166 1.1003951 1.0375485 0.6520549
## 8 1.0023943 4.0861104 3.0109877 1.0421418 0.7743085 1.5037735 1.3482156
## 9 1.6050500 2.8206003 4.2390400 1.7026482 2.1590538 0.9460827 1.2992183
## 10 3.2805612 2.0559324 3.1030668 2.4711581 3.0557018 2.5422276 2.6325334
## 11 1.4188069 2.5317720 3.3203915 1.3317046 1.6013507 0.7474162 0.9438530
## 12 1.2968620 3.3913483 2.4973647 1.0656666 0.7583965 1.3511703 1.0445978
## 13 1.3157492 4.4925822 3.0064051 1.7164830 0.8279045 1.9141752 1.5054133
## 14 4.3171337 6.6524863 3.3246017 3.9678543 3.6404851 4.8991316 4.5191196
## 15 1.5263446 2.9392629 3.8980026 1.6688113 2.0042855 0.7635247 1.3939767
## 16 6.3517374 8.0714998 4.2949603 5.9159998 5.5800729 6.8296647 6.4227353
## 17 1.0984372 2.8379459 3.6581824 1.0931552 1.5171256 0.6623822 0.7693644
## 18 5.7162085 7.7424127 4.5372101 5.4290964 5.0591391 6.4025826 5.9297396
## 19 2.3890592 4.6851664 2.9363624 2.4594499 1.8928988 3.0929822 2.6712584
## 20 2.0846640 3.3231017 2.3019346 1.4398677 1.5301082 2.0285523 1.6550634
## 21 3.1913126 5.4826103 2.4975374 3.0079455 2.4295076 3.6936140 3.2173864
## 22 1.1494113 3.3976259 2.5479464 0.8062669 0.6681441 1.1720119 0.9366030
## 23 1.6643553 4.7822435 2.8844732 1.9151260 1.0590191 2.3277861 1.8949020
## 24 4.8579911 6.5384195 2.6773901 4.3438724 4.0727756 5.2769065 4.9465303
## 25 1.6882067 2.5461162 4.1262865 1.7067347 2.1416714 0.9657764 1.2000969
## 8 9 10 11 12 13 14
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 2.2330469
## 10 3.1948284 2.7767763
## 11 1.9444887 1.1485169 2.3247902
## 12 1.1716344 2.1870609 2.4553871 1.4076914
## 13 1.0917100 2.6987736 3.4949551 2.1624417 1.1906306
## 14 3.5677474 5.5531073 5.2320055 5.0664881 3.8261280 3.4440555
## 15 1.9532207 1.0305916 2.8537614 1.1065707 1.9578142 2.4747924 5.4430301
## 16 5.6671223 7.5181213 6.5908909 6.8378478 5.5946826 5.3526678 2.3723788
## 17 1.7177675 0.7296074 2.5808399 0.7974530 1.5620122 2.1577457 4.9305912
## 18 5.2560351 6.9474483 6.6425435 6.3807336 5.2827001 4.9864409 2.4062530
## 19 2.4216283 3.6597256 4.1961141 2.9700953 2.1802719 2.1531056 3.3891744
## 20 1.9520641 2.5505847 2.0664907 1.8632703 1.2064514 1.8870687 3.6489650
## 21 2.7086609 4.4104528 4.1228945 3.7104626 2.4964381 2.1037634 1.9872874
## 22 0.9572291 1.9941031 2.4497464 1.3076787 0.3948852 1.1624396 3.8311793
## 23 1.2612810 3.0953932 3.7017207 2.5534614 1.4250197 0.5035883 2.9699570
## 24 4.1198754 5.9996772 5.1479941 5.2889154 4.0570101 3.9403360 1.4611148
## 25 2.3114020 0.4984911 2.4786797 1.0197810 2.0408670 2.6961591 5.5256172
## 15 16 17 18 19 20 21
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16 7.3969180
## 17 1.1475009 6.8621046
## 18 6.9859561 2.2737148 6.2609603
## 19 3.5837419 4.9416046 2.9591368 3.9162571
## 20 2.6897220 5.2089193 1.9603148 4.9276322 2.4655789
## 21 4.3190653 3.3672088 3.7679123 3.2687492 2.3156778 2.2897764
## 22 1.7711099 5.6758983 1.3972163 5.3640129 2.3411752 1.2193516 2.5852501
## 23 2.8985394 4.8952748 2.5096574 4.5152111 1.9663610 1.9593956 1.7013168
## 24 5.7814732 1.7643346 5.3459976 2.5535840 3.6204225 3.8069831 2.2222077
## 25 1.2297027 7.4131967 0.7056109 6.8636551 3.5608604 2.3451126 4.3077487
## 22 23 24
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23 1.4343185
## 24 4.1222095 3.5137427
## 25 1.9212796 3.0809361 5.9158514
str(d)
## 'dist' num [1:300] 3.786 3.543 1.153 0.825 1.032 ...
## - attr(*, "Size")= int 25
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "euclidean"
## - attr(*, "call")= language dist(x = normalized_data, method = "euclidean")
fit <- hclust(d,method="complete")
fit1 <- hclust(d,method="single")
fit2 <- hclust(d,method="average")
fit3 <- hclust(d,method="centroid")
# Plot Dendrogram
plot(fit)

plot1 <- plot(fit,hang=-1)

groups <- cutree(fit, k = 4)
groups
## [1] 1 2 3 1 1 2 2 1 2 2 2 1 1 3 2 4 2 4 1 1 3 1 1 3 2
str(groups)
## int [1:25] 1 2 3 1 1 2 2 1 2 2 ...
plot(fit)
rect.hclust(fit, k = 4, border = "blue")

clustno <- as.data.frame(groups)
final <- data.frame(clustno,mydata)
final <- cbind(clustno,mydata)
View(final)
aggregate(final[,3:8], by=list(final$groups), FUN=mean)
## Group.1 Z-SAT Z-Top10 Z-Accept Z-SFRatio Z-Expenses
## 1 1 0.04208204 0.2583114 -0.1926264 0.04425486 -0.2778532
## 2 2 0.88905483 0.7242783 -0.7536085 -0.75069359 0.9559099
## 3 3 -1.06072575 -1.2982465 1.1811039 0.43763142 -0.8148073
## 4 4 -2.08970541 -1.9543164 1.9921624 2.28158402 -1.2827140
## ZGradRate
## 1 0.2958783
## 2 0.6074583
## 3 -1.0731109
## 4 -2.0667322