Importing the Data

cities<-read.csv("cities.csv")
row.names(cities)<-cities$Kota
cities<-cities[,-1]
corrplot::corrplot(cor(cities), method = "color",  
         type = "upper", order = "original", 
         addCoef.col = "black", # Add coefficient of correlation
         diag = FALSE, 
         )

boxplot(scale(cities), col="lightblue")

Assessing Cluster Tendency

#install.packages(c("factoextra", "hopkins"))
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
res.pca <- prcomp(cities)
fviz_pca_ind(res.pca, theme="minimal",repel=T, col.ind="steelblue")

library(hopkins)
## Warning: package 'hopkins' was built under R version 4.2.3
set.seed(123)
hopkins(cities, m=2)
## [1] 0.02318285
hopkins.pval(hopkins(cities), n=2)
## [1] 0.005683091
fviz_dist(dist(cities), show_labels = T)

Calculating Distance

cholMaha <- function(X) {
 dec <- chol( cov(X) )
 tmp <- forwardsolve(t(dec), t(X) )
 dist(t(tmp))
}

jarak<-cholMaha(cities)

Single Linkage Clustering

clustsingle <- hclust(jarak, method="single")
clustsingle$labels<-row.names(cities)
plot(clustsingle, cex=0.6, hang=-1)

fviz_nbclust(cities, hcut, method = "wss") +
geom_vline(xintercept = 2, linetype = 2)+
labs(subtitle = "Elbow method")

fviz_nbclust(cities, hcut, method = "gap_stat") +
geom_vline(xintercept = 2, linetype = 2)

library(dendextend); library(tidyverse);
## Warning: package 'dendextend' was built under R version 4.2.3
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   1.0.1
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
dend <- as.dendrogram(clustsingle)
 dend <- dend %>%
          color_branches(k = 2)
 
  dend <- color_labels(dend, k = 2)
 # The same as:
 # labels_colors(dend)  <- get_leaves_branches_col(dend)
 plot(dend, lwd=5, cex=5, edge.root = T, horiz=F)

Complete Linkage Clustering

clustcomp<- hclust(jarak, method="complete")
clustcomp$labels<-row.names(cities)
plot(clustcomp, hang=-1)

library(dendextend); library(tidyverse);

dend2 <- as.dendrogram(clustcomp)
 dend2 <- dend2 %>%
          color_branches(k = 3)
 
  dend2 <- color_labels(dend2, k = 3)
 # The same as:
 # labels_colors(dend)  <- get_leaves_branches_col(dend)
 plot(dend2, lwd=5, cex=5, edge.root = T, horiz=F)

Ward Method

clustward<- hclust(jarak, method="ward")
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
clustward$labels<-row.names(cities)
plot(clustward, hang=-1)

dend3 <- as.dendrogram(clustward)
 dend3 <- dend3 %>%
          color_branches(k = 4)
 
  dend3 <- color_labels(dend3, k = 4)
 # The same as:
 # labels_colors(dend)  <- get_leaves_branches_col(dend)
 plot(dend3, lwd=5, cex=5, edge.root = T, horiz=F)

Cluster Validation

CH index

library(clusterSim)
## Warning: package 'clusterSim' was built under R version 4.2.3
## Loading required package: cluster
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
index.G1(cities, cl=cutree(clustsingle,2), d=jarak)
## [1] 1.354056
index.G1(cities, cl=cutree(clustcomp,3), d=jarak)
## [1] 2.209869
index.G1(cities, cl=cutree(clustward,4), d=jarak)
## [1] 8.953063

DB Index

index.DB(cities, cl=cutree(clustsingle,2), d=jarak)
## $DB
## [1] 0.7844965
## 
## $r
## [1] 0.7844965 0.7844965
## 
## $R
##           [,1]      [,2]
## [1,]       Inf 0.7844965
## [2,] 0.7844965       NaN
## 
## $d
##         1       2
## 1  0.0000 27.4936
## 2 27.4936  0.0000
## 
## $S
## [1] 21.56863  0.00000
## 
## $centers
##          [,1]     [,2]     [,3]     [,4]     [,5]
## [1,] 74.09091 6.718182 7.818182 73.81818 45.63636
## [2,] 56.00000 7.100000 6.000000 86.00000 29.00000
index.DB(cities, cl=cutree(clustcomp,3), d=jarak)$DB
## [1] 1.176063
index.DB(cities, cl=cutree(clustward,4), d=jarak)$DB
## [1] 0.7797654

ASW

sil.clustsingle<-silhouette(cutree(clustsingle,2), dist=jarak)
plot(silhouette(cutree(clustsingle,2), dist=jarak), col="lightblue")

plot(silhouette(cutree(clustcomp,3), dist=jarak), col="lightblue")

plot(silhouette(cutree(clustward,4), dist=jarak), col="lightblue")

The Results

cities$cluster<-cutree(clustward,4)
fviz_pca_ind(prcomp(cities), title = "Clustering Results",
habillage = cities$cluster, palette = "jco",
ggtheme = theme_classic(),addEllipse=F, repel=T,
legend = "bottom")

cities %>% 
  group_by(cluster) %>% 
  summarise_all(list(mean))
## # A tibble: 4 × 6
##   cluster Pendapatan Pinjaman Dana.Hibah Konsumsi Produk
##     <int>      <dbl>    <dbl>      <dbl>    <dbl>  <dbl>
## 1       1       60.2     5.75       7.5      59     37.2
## 2       2       79.8     6.85       7.25     79.5   42.2
## 3       3       85       7.83       9        86     61.3
## 4       4       56       7.1        6        86     29