Importing the Data
cities<-read.csv("cities.csv")
row.names(cities)<-cities$Kota
cities<-cities[,-1]
corrplot::corrplot(cor(cities), method = "color",
type = "upper", order = "original",
addCoef.col = "black", # Add coefficient of correlation
diag = FALSE,
)

boxplot(scale(cities), col="lightblue")

Assessing Cluster Tendency
#install.packages(c("factoextra", "hopkins"))
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
res.pca <- prcomp(cities)
fviz_pca_ind(res.pca, theme="minimal",repel=T, col.ind="steelblue")

## Warning: package 'hopkins' was built under R version 4.2.3
set.seed(123)
hopkins(cities, m=2)
## [1] 0.02318285
hopkins.pval(hopkins(cities), n=2)
## [1] 0.005683091
fviz_dist(dist(cities), show_labels = T)

Single Linkage Clustering
clustsingle <- hclust(jarak, method="single")
clustsingle$labels<-row.names(cities)
plot(clustsingle, cex=0.6, hang=-1)

fviz_nbclust(cities, hcut, method = "wss") +
geom_vline(xintercept = 2, linetype = 2)+
labs(subtitle = "Elbow method")

fviz_nbclust(cities, hcut, method = "gap_stat") +
geom_vline(xintercept = 2, linetype = 2)

library(dendextend); library(tidyverse);
## Warning: package 'dendextend' was built under R version 4.2.3
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 1.0.1
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
dend <- as.dendrogram(clustsingle)
dend <- dend %>%
color_branches(k = 2)
dend <- color_labels(dend, k = 2)
# The same as:
# labels_colors(dend) <- get_leaves_branches_col(dend)
plot(dend, lwd=5, cex=5, edge.root = T, horiz=F)

Complete Linkage Clustering
clustcomp<- hclust(jarak, method="complete")
clustcomp$labels<-row.names(cities)
plot(clustcomp, hang=-1)

library(dendextend); library(tidyverse);
dend2 <- as.dendrogram(clustcomp)
dend2 <- dend2 %>%
color_branches(k = 3)
dend2 <- color_labels(dend2, k = 3)
# The same as:
# labels_colors(dend) <- get_leaves_branches_col(dend)
plot(dend2, lwd=5, cex=5, edge.root = T, horiz=F)

Ward Method
clustward<- hclust(jarak, method="ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
clustward$labels<-row.names(cities)
plot(clustward, hang=-1)

dend3 <- as.dendrogram(clustward)
dend3 <- dend3 %>%
color_branches(k = 4)
dend3 <- color_labels(dend3, k = 4)
# The same as:
# labels_colors(dend) <- get_leaves_branches_col(dend)
plot(dend3, lwd=5, cex=5, edge.root = T, horiz=F)

Cluster Validation
CH index
## Warning: package 'clusterSim' was built under R version 4.2.3
## Loading required package: cluster
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
index.G1(cities, cl=cutree(clustsingle,2), d=jarak)
## [1] 1.354056
index.G1(cities, cl=cutree(clustcomp,3), d=jarak)
## [1] 2.209869
index.G1(cities, cl=cutree(clustward,4), d=jarak)
## [1] 8.953063
DB Index
index.DB(cities, cl=cutree(clustsingle,2), d=jarak)
## $DB
## [1] 0.7844965
##
## $r
## [1] 0.7844965 0.7844965
##
## $R
## [,1] [,2]
## [1,] Inf 0.7844965
## [2,] 0.7844965 NaN
##
## $d
## 1 2
## 1 0.0000 27.4936
## 2 27.4936 0.0000
##
## $S
## [1] 21.56863 0.00000
##
## $centers
## [,1] [,2] [,3] [,4] [,5]
## [1,] 74.09091 6.718182 7.818182 73.81818 45.63636
## [2,] 56.00000 7.100000 6.000000 86.00000 29.00000
index.DB(cities, cl=cutree(clustcomp,3), d=jarak)$DB
## [1] 1.176063
index.DB(cities, cl=cutree(clustward,4), d=jarak)$DB
## [1] 0.7797654
ASW
sil.clustsingle<-silhouette(cutree(clustsingle,2), dist=jarak)
plot(silhouette(cutree(clustsingle,2), dist=jarak), col="lightblue")

plot(silhouette(cutree(clustcomp,3), dist=jarak), col="lightblue")

plot(silhouette(cutree(clustward,4), dist=jarak), col="lightblue")

The Results
cities$cluster<-cutree(clustward,4)
fviz_pca_ind(prcomp(cities), title = "Clustering Results",
habillage = cities$cluster, palette = "jco",
ggtheme = theme_classic(),addEllipse=F, repel=T,
legend = "bottom")

cities %>%
group_by(cluster) %>%
summarise_all(list(mean))
## # A tibble: 4 × 6
## cluster Pendapatan Pinjaman Dana.Hibah Konsumsi Produk
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 60.2 5.75 7.5 59 37.2
## 2 2 79.8 6.85 7.25 79.5 42.2
## 3 3 85 7.83 9 86 61.3
## 4 4 56 7.1 6 86 29