#
#Hierarchical Clustering in R.
#PREPARED BY Richel alabado
homedir <- "C:/Users/LENOVO/OneDrive/Documents/thesis"
setwd(homedir)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
#load data
df <- USArrests
#remove rows with missing values
df <- na.omit(df)
#scale each variable to have a mean of 0 and sd of 1
df <- scale(df)
#define linkage methods
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
#function to compute agglomerative coefficient
ac <- function(x) {
agnes(df, method = x)$ac
}
#calculate agglomerative coefficient for each clustering linkage method
sapply(m, ac)
## average single complete ward
## 0.7379371 0.6276128 0.8531583 0.9346210
#perform hierarchical clustering using Ward's minimum variance
clust <- agnes(df, method = "ward")
#produce dendrogram
pltree(clust, cex = 0.6, hang = -1, main = "Dendrogram")

#calculate gap statistic for each number of clusters (up to 10 clusters)
gap_stat <- clusGap(df, FUN = hcut, nstart = 25, K.max = 10, B = 50)
#produce plot of clusters vs. gap statistic
fviz_gap_stat(gap_stat)

#compute distance matrix
d <- dist(df, method = "euclidean")
#perform hierarchical clustering using Ward's method
final_clust <- hclust(d, method = "ward.D2" )
#cut the dendrogram into 4 clusters
groups <- cutree(final_clust, k=4)
# Number of members in each cluster
table(groups)
## groups
## 1 2 3 4
## 7 12 19 12
#append cluster labels to original data
final_data <- cbind(USArrests, cluster = groups)
#display first six rows of final data
head(final_data)
## Murder Assault UrbanPop Rape cluster
## Alabama 13.2 236 58 21.2 1
## Alaska 10.0 263 48 44.5 2
## Arizona 8.1 294 80 31.0 2
## Arkansas 8.8 190 50 19.5 3
## California 9.0 276 91 40.6 2
## Colorado 7.9 204 78 38.7 2
#find mean values for each cluster
aggregate(final_data, by=list(cluster=final_data$cluster), mean)
## cluster Murder Assault UrbanPop Rape cluster
## 1 1 14.671429 251.2857 54.28571 21.68571 1
## 2 2 10.966667 264.0000 76.50000 33.60833 2
## 3 3 6.210526 142.0526 71.26316 19.18421 3
## 4 4 3.091667 76.0000 52.08333 11.83333 4