library(MASS)
library(fpc)
library(cluster)
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.16.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
library(mclust)
## Package 'mclust' version 5.4.10
## Type 'citation("mclust")' for citing this R package in publications.
library(EMCluster)
## Loading required package: Matrix
D1<- rbind(cbind(rnorm(15,0,0.5), rnorm(15,0,0.5)),
cbind(rnorm(15,5,0.5), rnorm(15,5,0.5)),
cbind(rnorm(15,5,0.5), rnorm(15,0,0.5)),
cbind(rnorm(15,0,0.5), rnorm(15,5,0.5)))
D1full=rbind(cbind(rnorm(15,0,0.5), rnorm(15,0,0.5), rep(1,15)),
cbind(rnorm(15,5,0.5), rnorm(15,5,0.5),rep(2,15)),
cbind(rnorm(15,5,0.5), rnorm(15,0,0.5),rep(3,15)),
cbind(rnorm(15,0,0.5), rnorm(15,5,0.5),rep(4,15)))
D2<- rbind(cbind(rnorm(10,0,2), rnorm(10,0,2)),
cbind(rnorm(15,5,2), rnorm(15,5,2)),
cbind(rnorm(15,5,2), rnorm(15,0,2)),
cbind(rnorm(15,0,2), rnorm(15,5,2)))
D2full=rbind(cbind(rnorm(15,0,2), rnorm(15,0,2), rep(1,15)),
cbind(rnorm(15,5,2), rnorm(15,5,2),rep(2,15)),
cbind(rnorm(15,5,2), rnorm(15,0,2),rep(3,15)),
cbind(rnorm(15,0,2), rnorm(15,5,2),rep(4,15)))
kmD2 <- kmeans(D2full[,1:2], 4)
plot(D2full[,1:2], col = kmD2$cluster, cex=2)
points(D2full[,1:2], pch=4, cex=0.3, col=D2full[,3])

table(D2full[,3], kmD2$cluster)
##
## 1 2 3 4
## 1 1 0 4 10
## 2 14 1 0 0
## 3 2 0 13 0
## 4 0 14 0 1
pamD1 <- pam(D1, 4)
pamD1
## Medoids:
## ID
## [1,] 11 -0.11479072 0.08974015
## [2,] 19 5.19069752 4.90067324
## [3,] 31 4.74114505 -0.11205244
## [4,] 58 0.04494387 4.81189654
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
## [39] 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## Objective function:
## build swap
## 1.0025105 0.6523814
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
pamD2 <- pam(D2full[,1:2], 4)
pamD2
## Medoids:
## ID
## [1,] 9 0.5627189 -0.1360652
## [2,] 20 6.2662297 5.6887306
## [3,] 51 0.1766644 5.3909433
## [4,] 42 4.9811221 -2.1597856
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 2 2 3 2 2 2 2 2 2 2 3 2 2 4 4 4 4 1 1 4
## [39] 4 4 2 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## Objective function:
## build swap
## 2.239968 2.025144
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
summary(pamD1)
## Medoids:
## ID
## [1,] 11 -0.11479072 0.08974015
## [2,] 19 5.19069752 4.90067324
## [3,] 31 4.74114505 -0.11205244
## [4,] 58 0.04494387 4.81189654
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3
## [39] 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## Objective function:
## build swap
## 1.0025105 0.6523814
##
## Numerical information per cluster:
## size max_diss av_diss diameter separation
## [1,] 15 1.281663 0.6747804 2.261776 2.871446
## [2,] 15 1.800909 0.7197072 2.833866 2.806656
## [3,] 15 1.430770 0.5933567 1.994104 2.806656
## [4,] 15 1.338423 0.6216813 2.070500 2.835471
##
## Isolated clusters:
## L-clusters: [1] 2
## L*-clusters: [1] 1 3 4
##
## Silhouette plot information:
## cluster neighbor sil_width
## 1 1 4 0.8507908
## 11 1 4 0.8503591
## 4 1 4 0.8420419
## 10 1 3 0.8349749
## 12 1 4 0.8190246
## 6 1 4 0.8124547
## 15 1 3 0.7980802
## 9 1 4 0.7898401
## 5 1 4 0.7802310
## 8 1 3 0.7557605
## 3 1 3 0.7404718
## 7 1 4 0.7327528
## 2 1 3 0.7306409
## 14 1 3 0.6714613
## 13 1 4 0.6341489
## 24 2 3 0.8455856
## 19 2 3 0.8445278
## 29 2 4 0.8355563
## 25 2 3 0.8355097
## 20 2 3 0.8301493
## 16 2 3 0.8223442
## 26 2 4 0.8176193
## 18 2 3 0.8146331
## 22 2 4 0.7831433
## 30 2 3 0.7830590
## 23 2 3 0.7673720
## 17 2 3 0.7579896
## 27 2 4 0.7103154
## 21 2 3 0.5245342
## 28 2 4 0.4837699
## 44 3 1 0.8680662
## 43 3 1 0.8679866
## 31 3 1 0.8656572
## 42 3 1 0.8594783
## 37 3 1 0.8544116
## 35 3 1 0.8460447
## 32 3 2 0.8444881
## 45 3 1 0.8096819
## 36 3 2 0.8019857
## 33 3 2 0.7731579
## 38 3 1 0.7640664
## 40 3 2 0.7613132
## 34 3 1 0.7587327
## 39 3 2 0.7555163
## 41 3 2 0.7469703
## 54 4 1 0.8627075
## 58 4 1 0.8609427
## 60 4 2 0.8523907
## 55 4 1 0.8446875
## 57 4 1 0.8319548
## 51 4 1 0.8268495
## 46 4 1 0.8251184
## 52 4 2 0.8248433
## 47 4 2 0.8235835
## 59 4 2 0.8095711
## 49 4 1 0.8005677
## 56 4 1 0.7959372
## 48 4 2 0.7823767
## 53 4 2 0.7687029
## 50 4 1 0.6321738
## Average silhouette width per cluster:
## [1] 0.7762022 0.7637406 0.8118371 0.8094938
## Average silhouette width of total data set:
## [1] 0.7903184
##
## 1770 dissimilarities, summarized :
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.05605 3.54800 5.07020 4.59570 6.20620 9.24430
## Metric : euclidean
## Number of objects : 60
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
summary(pamD2)
## Medoids:
## ID
## [1,] 9 0.5627189 -0.1360652
## [2,] 20 6.2662297 5.6887306
## [3,] 51 0.1766644 5.3909433
## [4,] 42 4.9811221 -2.1597856
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 2 2 3 2 2 2 2 2 2 2 3 2 2 4 4 4 4 1 1 4
## [39] 4 4 2 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## Objective function:
## build swap
## 2.239968 2.025144
##
## Numerical information per cluster:
## size max_diss av_diss diameter separation
## [1,] 16 4.434664 1.919371 6.846250 0.4675720
## [2,] 16 4.880355 2.150056 7.868150 0.9400962
## [3,] 17 4.008736 2.103055 7.246507 0.9400962
## [4,] 11 3.693022 1.876899 5.860279 0.4675720
##
## Isolated clusters:
## L-clusters: character(0)
## L*-clusters: character(0)
##
## Silhouette plot information:
## cluster neighbor sil_width
## 4 1 3 0.62493949
## 15 1 4 0.62319259
## 12 1 3 0.61804646
## 3 1 4 0.59948639
## 9 1 4 0.59471075
## 1 1 3 0.57842738
## 13 1 3 0.50138542
## 8 1 4 0.39980511
## 5 1 4 0.34500052
## 10 1 4 0.29064358
## 37 1 4 0.25234036
## 7 1 4 0.24755335
## 6 1 3 0.22991495
## 11 1 3 0.19835608
## 2 1 4 0.14158198
## 36 1 4 -0.05599211
## 20 2 3 0.64437271
## 26 2 3 0.63868345
## 19 2 3 0.62999255
## 24 2 3 0.62151018
## 25 2 3 0.60492529
## 16 2 3 0.56700186
## 23 2 3 0.50635572
## 18 2 3 0.50306908
## 28 2 3 0.49312709
## 30 2 3 0.44687110
## 22 2 4 0.42030601
## 27 2 3 0.37906766
## 14 2 3 0.26902379
## 41 2 3 0.26189858
## 17 2 3 0.25217888
## 31 2 4 0.24460127
## 59 3 2 0.60736306
## 51 3 2 0.59259495
## 53 3 1 0.56501123
## 57 3 2 0.55165181
## 49 3 1 0.54913064
## 54 3 1 0.54272364
## 60 3 1 0.52288945
## 48 3 2 0.51956540
## 56 3 1 0.47154532
## 50 3 1 0.34544079
## 21 3 2 0.29315596
## 58 3 2 0.28481801
## 46 3 2 0.26217857
## 47 3 2 0.18943647
## 55 3 2 0.18262066
## 52 3 1 0.04908075
## 29 3 2 -0.14236469
## 40 4 1 0.62007218
## 42 4 1 0.60821068
## 35 4 1 0.55535410
## 33 4 1 0.54088336
## 32 4 1 0.53145719
## 34 4 1 0.45168524
## 38 4 1 0.44088294
## 39 4 1 0.41655237
## 43 4 1 0.36896746
## 45 4 2 0.34418443
## 44 4 1 0.12390263
## Average silhouette width per cluster:
## [1] 0.3868370 0.4676866 0.3756966 0.4547411
## Average silhouette width of total data set:
## [1] 0.4176895
##
## 1770 dissimilarities, summarized :
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1815 3.7752 5.8956 5.9844 7.9648 15.0370
## Metric : euclidean
## Number of objects : 60
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
plot(pamD1)


plot(pamD2)


table(D2full[,3], pamD2$clustering)
##
## 1 2 3 4
## 1 14 1 0 0
## 2 0 13 2 0
## 3 2 2 0 11
## 4 0 0 15 0
| Model Based Clustering using EM Algorithm |
modD2 <- Mclust(D2full[,1:2], G = 4)
summary(modD2, parameters = TRUE)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust EII (spherical, equal volume) model with 4 components:
##
## log-likelihood n df BIC ICL
## -300.5249 60 12 -650.1819 -664.4548
##
## Clustering table:
## 1 2 3 4
## 11 17 17 15
##
## Mixing probabilities:
## 1 2 3 4
## 0.1990388 0.2710637 0.2845334 0.2453641
##
## Means:
## [,1] [,2] [,3] [,4]
## [1,] -0.5868774 4.096657 5.051400 0.02478723
## [2,] 0.5165496 -1.352826 5.647688 5.53166212
##
## Variances:
## [,,1]
## [,1] [,2]
## [1,] 2.901474 0.000000
## [2,] 0.000000 2.901474
## [,,2]
## [,1] [,2]
## [1,] 2.901474 0.000000
## [2,] 0.000000 2.901474
## [,,3]
## [,1] [,2]
## [1,] 2.901474 0.000000
## [2,] 0.000000 2.901474
## [,,4]
## [,1] [,2]
## [1,] 2.901474 0.000000
## [2,] 0.000000 2.901474
table(D2full[,3], modD2$classification)
##
## 1 2 3 4
## 1 10 4 1 0
## 2 0 0 14 1
## 3 0 13 2 0
## 4 1 0 0 14
plot(modD2, what=c("classification"), main = "Model based Clustering using EM")

plot(modD2, "density")

D2H <- as.data.frame(scale(D2full[,1:2]))
#summary(D2H)
dist_m <- dist(D2H, method = 'euclidean')
hcl_avg <- hclust(dist_m, method = 'average')
plot(hcl_avg)

dist_m <- dist(D2H, method = 'euclidean')
hcl_mcquitty <- hclust(dist_m, method = 'mcquitty')
plot(hcl_mcquitty)

c_avg <- cutree(hcl_avg, k = 4)
plot(hcl_avg)
rect.hclust(hcl_avg , k = 4, border = 2:6)
abline(h = 4, col = 'red')

c_avg <- cutree(hcl_avg, k = 4)
plot(hcl_mcquitty)
rect.hclust(hcl_mcquitty , k = 4, border = 2:6)
abline(h = 4, col = 'red')
