#toInstall <- c("cluster", "fpc", "mclust")
#install.packages(toInstall, dependencies=TRUE)
#set working directory
#setwd("surfdrive/publications 2023/review paper UPM")
## Binary Matrix of Objectives
A Binary Matrix has been prepared with 140 observations and 11 objectives, with presence/absence values 1/0.
mydata <- read.csv("datasetforR.csv", header = TRUE)
Using the method ‘binary’ the distance of the Matrix will be calculared
d_binary <- dist(mydata, method="binary") # First, construct a distance matrix.
With the distance matrix the clustering is done using the complete method for balanced and tighers clusters
fit <- hclust(d_binary, method="complete")
We first need to select a number of clusters to explore different dedongrams
clusters <- 8
Then we can draw the dendogram
par(mfrow=c(1,1)) # Set the plot to 1 row, 2 columns
plot(fit, cex=0.8) # cex refers to font size in the plot
rect.hclust(fit, k=clusters) # k refers to the number of clusters
To evaluate if the clusters are similar enough we calculate the average of Jaccard similarity index. We boostrap 1000 times.
library(fpc)
bsamples <- 1000
clus.boot <- clusterboot(mydata,
B=bsamples, # Number of bootstrap resamples
clustermethod=hclustCBI, # for hierarchical clustering
method="complete",
dmethod="binary",
cmethod="complete", # use what we used in "hclust"
k=clusters,
count=FALSE) # Show progress on screen?
set.seed(8675309)
AvgJaccard <- clus.boot$bootmean
Instability <- clus.boot$bootbrd/bsamples
Clusters <- c(1:clusters)
Eval <- cbind(Clusters, AvgJaccard, Instability)
Eval
## Clusters AvgJaccard Instability
## [1,] 1 0.3930995 0.854
## [2,] 2 0.4241156 0.762
## [3,] 3 0.5579577 0.461
## [4,] 4 0.6140642 0.451
## [5,] 5 0.2746869 0.985
## [6,] 6 0.1926094 0.995
## [7,] 7 0.9401069 0.017
## [8,] 8 0.3627914 0.858