1. Persiapan Data

# Export data
datauplm <- read_excel("C:/Users/WORKPLUS/Documents/UPLM.xlsx")
datauplm <- as.data.frame(datauplm)
# Mengambil semua baris dikolom pertama sebagai nama baris untuk data
names1 <- datauplm[,1]
# Menghapus kolom pertama dan menetapkan nama baris
datauplm <- datauplm[,-1]
rownames(datauplm) <- names1
# Komponen dalam analisis
colnames(datauplm)
## [1] "Pemerataan Layanan Perpustakaan"                                
## [2] "Ketercukupan Koleksi Perpustakaan"                              
## [3] "Rasio Ketercukupan Tenaga Perpustakaan"                         
## [4] "Tingkat Kunjungan Masyarakat per hari"                          
## [5] "Keterlibatan Masyarakat dalam Kegiatan Sosialisasi Perpustakaan"
# Verifikasi ukuran data 
dim(datauplm)
## [1] 35  5

2. Penentuan Jumlah Klaster

# Menentukan Jumlah Klaster
wss <- (nrow(datauplm)-1)*sum(apply(datauplm,2,var))
for (i in 2:34) 
  wss[i] <- sum(kmeans(datauplm,centers = i)$withinss)
  plot(1:34, wss,type = "b", main = "Optimal Number of Clusters", xlab = "number of clusters", 
       ylab = "within clusters sum of squares")

3. Analisis Klastering untuk k = 3

A. Klasterisasi Hierarkis

a. Single Linkage

# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle3 <- hclust(duplm, method = "single")
plot(fitsingle3, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 3",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts3 <- cutree(fitsingle3, k = 3)
rect.hclust(fitsingle3, k=3, border = "red")

# Siluet Single Linkage
silsingle3 <- silhouette(cuts3, duplm)
plot(silsingle3, main = "Silhoutte Plot Single Linkage k=3", col=c("darkblue","skyblue","pink"))

# WCSS
wcss_total <- 0
for (i in 1:3) {
  cluster_data <- datauplm[cuts3 == i, ]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 22.0678

b. Complete Linkage

# Klaster Complete Linkage
fitcom3 <- hclust(duplm, method = "complete")
plot(fitcom3, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 3",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom3 <- cutree(fitcom3, k=3)
rect.hclust(fitcom3, k=3, border = "red")

# Siluet Complete Linkage
silcom3 <- silhouette(cutcom3, duplm)
plot(silcom3,main = "Silhoutte Plot Complete Linkage k=3", col=c("darkblue","skyblue","pink"))

# WCSS
wcss_total <- 0
for (i in 1:3) {
  cluster_data <- datauplm[cutcom3 == i, ]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883

c. Average Linkage

# Klaster Average Linkage
fitavg3 <- hclust(duplm, method = "average")
plot(fitavg3, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 3",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg3 <- cutree(fitavg3, k=3)
rect.hclust(fitavg3, k=3, border = "red")

# Siluet Average Linkage
silavg3 <- silhouette(cutavg3, duplm)
plot(silavg3, main = "Silhoutte Plot Average Linkage k=3", col=c("darkblue","skyblue","pink"))

#WCSS
wcss_total <- 0
for (i in 1:3) {
  cluster_data <- datauplm[cutavg3 == i, ]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883

d. Ward’s Method

# Klaster Metode Ward's
fitward3 <- hclust(duplm, method = "ward.D2")
plot(fitward3, label = rownames(datauplm), hang=-1,  sub = "", main = "Dendrogram Metode Wards k = 3",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward3 <- cutree(fitward3, k=3)
rect.hclust(fitward3, k=3, border = "red")

# Siluet Metode Ward's
silward3 <- silhouette(cutward3, duplm)
plot(silward3,main = "Silhoutte Plot Ward's Method k=3", col=c("darkblue","skyblue","pink"))

#WCSS
wcss_total <- 0
for (i in 1:3) {
  cluster_data <- datauplm[cutward3 == i, ]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883

B. Klastering Non Hirarki (K-Means)

# Klaster K-Means
set.seed(123)
fitkm3 <- kmeans(datauplm, centers = 3)
clusters3 <- split(rownames(datauplm), fitkm3$cluster)
clusters3
## $`1`
## [1] "Kota Magelang"   "Kota Surakarta"  "Kota Salatiga"   "Kota Pekalongan"
## 
## $`2`
## [1] "Magelang"    "Wonogiri"    "Karanganyar" "Grobogan"    "Pemalang"   
## [6] "Kota Tegal" 
## 
## $`3`
##  [1] "Cilacap"       "Banyumas"      "Purbalingga"   "Banjarnegara" 
##  [5] "Kebumen"       "Purworejo"     "Wonosobo"      "Boyolali"     
##  [9] "Klaten"        "Sukoharjo"     "Sragen"        "Blora"        
## [13] "Rembang"       "Pati"          "Kudus"         "Jepara"       
## [17] "Demak"         "Semarang"      "Temanggung"    "Kendal"       
## [21] "Batang"        "Pekalongan"    "Tegal"         "Brebes"       
## [25] "Kota Semarang"
# Visualisasi klaster
fviz_cluster(fitkm3, data = datauplm,
             main = "Visualisasi Klaster K-Means k = 3",
             geom = "point",
             ellipse.type = "convex",
             palette = "jco",
             ggtheme = theme_minimal())

# Siluet K-Means
silkm3 <- silhouette(fitkm3$cluster, duplm)
plot(silkm3, col=c("darkblue","skyblue","pink"))

#WCSS
wcss_total <- 0
for (i in 1:3) {
  cluster_data <- datauplm[fitkm3$cluster == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883

4. Analisis Klastering untuk k = 4

A. Klasterisasi Hierarkis

a. Single Linkage

# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle4 <- hclust(duplm, method = "single")
plot(fitsingle4, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 4",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts4 <- cutree(fitsingle4, k = 4)
rect.hclust(fitsingle4, k=4, border = "red")

# Siluet Single Linkage
silsingle4 <- silhouette(cuts4, duplm)
plot(silsingle4, main = "Silhoutte Plot Single Linkage k=4", col=c("darkblue","skyblue","pink","violet"))

#WCSS
wcss_total <- 0
for (i in 1:4) {
  cluster_data <- datauplm[cuts4 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 16.87361

b. Complete Linkage

# Klaster Complete Linkage
fitcom4 <- hclust(duplm, method = "complete")
plot(fitcom4, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 4",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom4 <- cutree(fitcom3, k=4)
rect.hclust(fitcom4, k=4, border = "red")

# Siluet Complete Linkage
silcom4 <- silhouette(cutcom4, duplm)
plot(silcom4,main = "Silhoutte Plot Complete Linkage k=4", col=c("darkblue","skyblue","pink","violet"))

# WCSS
wcss_total <- 0
for (i in 1:4) {
  cluster_data <- datauplm[cutcom4 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 21.22551

c. Average Linkage

# Klaster Average Linkage
fitavg4 <- hclust(duplm, method = "average")
plot(fitavg4, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 4",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg4 <- cutree(fitavg4, k=4)
rect.hclust(fitavg4, k=4, border = "red")

# Siluet Average Linkage
silavg4 <- silhouette(cutavg4, duplm)
plot(silavg4, main = "Silhoutte Plot Average Linkage k=4", col=c("darkblue","skyblue","pink","violet"))

# WCSS
wcss_total <- 0
for (i in 1:4) {
  cluster_data <- datauplm[cutavg4 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 16.87361

d. Ward’s Method

# Klaster Metode Ward's
fitward4 <- hclust(duplm, method = "ward.D2")
plot(fitward4, label = rownames(datauplm), hang=-1,  sub = "", main = "Dendrogram Metode Wards k = 4",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward4 <- cutree(fitward4, k=4)
rect.hclust(fitward4, k=4, border = "red")

# Siluet Metode Ward's
silward4 <- silhouette(cutward4, duplm)
plot(silward4,main = "Silhoutte Plot Ward's Method k=4", col=c("darkblue","skyblue","pink","violet"))

# WCSS
wcss_total <- 0
for (i in 1:4) {
  cluster_data <- datauplm[cutward4 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.90209

B. Klastering Non Hirarki (K-Means)

# Klaster K-Means
set.seed(123)
fitkm4 <- kmeans(datauplm, centers = 4)
clusters4 <- split(rownames(datauplm), fitkm4$cluster)
clusters4
## $`1`
## [1] "Purworejo"       "Kota Magelang"   "Kota Surakarta"  "Kota Salatiga"  
## [5] "Kota Semarang"   "Kota Pekalongan"
## 
## $`2`
## [1] "Magelang"    "Wonogiri"    "Karanganyar" "Grobogan"    "Pemalang"   
## [6] "Kota Tegal" 
## 
## $`3`
##  [1] "Cilacap"     "Banyumas"    "Purbalingga" "Wonosobo"    "Boyolali"   
##  [6] "Klaten"      "Sragen"      "Rembang"     "Pati"        "Jepara"     
## [11] "Demak"       "Temanggung"  "Kendal"      "Batang"      "Pekalongan" 
## [16] "Tegal"       "Brebes"     
## 
## $`4`
## [1] "Banjarnegara" "Kebumen"      "Sukoharjo"    "Blora"        "Kudus"       
## [6] "Semarang"
# Visualisasi klaster
fviz_cluster(fitkm4, data = datauplm,
             main = "Visualisasi Klaster K-Means k = 4",
             geom = "point",
             ellipse.type = "convex",
             palette = "jco",
             ggtheme = theme_minimal())

# Siluet K-Means
silkm4 <- silhouette(fitkm4$cluster, duplm)
plot(silkm4, col=c("darkblue","skyblue","pink","purple"))

# WCSS
wcss_total <- 0
for (i in 1:4) {
  cluster_data <- datauplm[fitkm4$cluster == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.98086

5. Analisis Klastering untuk k = 5

A. Klasterisasi Hierarkis

a. Single Linkage

# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle5 <- hclust(duplm, method = "single")
plot(fitsingle5, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 5",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts5 <- cutree(fitsingle5, k = 5)
rect.hclust(fitsingle5, k=5, border = "red")

# Siluet Single Linkage
silsingle5 <- silhouette(cuts5, duplm)
plot(silsingle5, main = "Silhoutte Plot Single Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))

# WCSS
wcss_total <- 0
for (i in 1:5) {
  cluster_data <- datauplm[cuts5 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.68673

b. Complete Linkage

# Klaster Complete Linkage
fitcom5 <- hclust(duplm, method = "complete")
plot(fitcom5, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 5",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom5 <- cutree(fitcom5, k=5)
rect.hclust(fitcom5, k=5, border = "red")

# Siluet Complete Linkage
silcom5 <- silhouette(cutcom5, duplm)
plot(silcom5,main = "Silhoutte Plot Complete Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))

# WCSS
wcss_total <- 0
for (i in 1:5) {
  cluster_data <- datauplm[cutcom5 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.01029

c. Average Linkage

# Klaster Average Linkage
fitavg5 <- hclust(duplm, method = "average")
plot(fitavg5, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 5",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg5 <- cutree(fitavg5, k=5)
rect.hclust(fitavg5, k=5, border = "red")

# Siluet Average Linkage
silavg5 <- silhouette(cutavg5, duplm)
plot(silavg5, main = "Silhoutte Plot Average Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))

# WCSS
wcss_total <- 0
for (i in 1:5) {
  cluster_data <- datauplm[cutavg5 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.53867

d. Ward’s Method

# Klaster Metode Ward's
fitward5 <- hclust(duplm, method = "ward.D2")
plot(fitward5, label = rownames(datauplm), hang=-1,  sub = "", main = "Dendrogram Metode Wards k = 5",
     xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward5 <- cutree(fitward5, k=5)
rect.hclust(fitward5, k=5, border = "red")

# Siluet Metode Ward's
silward5 <- silhouette(cutward5, duplm)
plot(silward5,main = "Silhoutte Plot Ward's Method k=5", col=c("darkblue","skyblue","pink","violet","purple"))

# WCSS
wcss_total <- 0
for (i in 1:5) {
  cluster_data <- datauplm[cutward5 == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 21.45072

B. Klastering Non Hirarki (K-Means)

# Klaster K-Means
set.seed(123)
fitkm5 <- kmeans(datauplm, centers = 5)
clusters5 <- split(rownames(datauplm), fitkm5$cluster)
clusters5
## $`1`
## [1] "Kota Magelang"   "Kota Surakarta"  "Kota Salatiga"   "Kota Pekalongan"
## 
## $`2`
## [1] "Magelang"    "Wonogiri"    "Karanganyar" "Grobogan"    "Pemalang"   
## [6] "Kota Tegal" 
## 
## $`3`
## [1] "Purbalingga"   "Purworejo"     "Wonosobo"      "Sragen"       
## [5] "Pati"          "Kota Semarang"
## 
## $`4`
## [1] "Banjarnegara" "Kebumen"      "Sukoharjo"    "Blora"        "Kudus"       
## [6] "Semarang"    
## 
## $`5`
##  [1] "Cilacap"    "Banyumas"   "Boyolali"   "Klaten"     "Rembang"   
##  [6] "Jepara"     "Demak"      "Temanggung" "Kendal"     "Batang"    
## [11] "Pekalongan" "Tegal"      "Brebes"
# Visualisasi klaster
fviz_cluster(fitkm5, data = datauplm,
             main = "Visualisasi Klaster K-Means k = 5",
             geom = "point",
             ellipse.type = "convex",
             palette = "jco",
             ggtheme = theme_minimal())

# Siluet K-Means
silkm5 <- silhouette(fitkm5$cluster, duplm)
plot(silkm5, col=c("darkblue","skyblue","pink","violet","purple"))

# WCSS
wcss_total <- 0
for (i in 1:5) {
  cluster_data <- datauplm[fitkm5$cluster == i,]
  centroid <- colMeans(cluster_data)
  ss <- sum(rowSums((cluster_data - centroid)^2))
  wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.36219
# Complete
datacom3 <- data.frame(datauplm, Cluster = cutcom3)
meancluscom <- aggregate(.~ Cluster, data = datacom3, mean)
print("Rata-rata per variabel untuk complete linkage k = 3")
## [1] "Rata-rata per variabel untuk complete linkage k = 3"
print(meancluscom)
##   Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1       1                       0.5766760                          0.354392
## 2       2                       0.6405667                          0.223550
## 3       3                       1.0000000                          0.901525
##   Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1                               0.522416                             0.3653720
## 2                               0.325900                             0.1290167
## 3                               1.000000                             0.9974000
##   Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1                                                       0.9910360
## 2                                                       0.1432833
## 3                                                       1.0000000
# Average
dataavg3 <- data.frame(datauplm, Cluster = cutavg3)
meanclusavg <- aggregate(.~ Cluster, data = dataavg3, mean)
print("Rata-rata per variabel untuk average linkage k = 3")
## [1] "Rata-rata per variabel untuk average linkage k = 3"
print(meanclusavg)
##   Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1       1                       0.5766760                          0.354392
## 2       2                       0.6405667                          0.223550
## 3       3                       1.0000000                          0.901525
##   Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1                               0.522416                             0.3653720
## 2                               0.325900                             0.1290167
## 3                               1.000000                             0.9974000
##   Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1                                                       0.9910360
## 2                                                       0.1432833
## 3                                                       1.0000000
# Ward's
dataward3 <- data.frame(datauplm, Cluster = cutward3)
meanclusward <- aggregate(.~ Cluster, data = dataward3, mean)
print("Rata-rata per variabel untuk metode ward k = 3")
## [1] "Rata-rata per variabel untuk metode ward k = 3"
print(meanclusward)
##   Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1       1                       0.5766760                          0.354392
## 2       2                       0.6405667                          0.223550
## 3       3                       1.0000000                          0.901525
##   Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1                               0.522416                             0.3653720
## 2                               0.325900                             0.1290167
## 3                               1.000000                             0.9974000
##   Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1                                                       0.9910360
## 2                                                       0.1432833
## 3                                                       1.0000000
# K-means
datakm3 <- data.frame(datauplm, Cluster = fitkm3$cluster)
meancluskm <- aggregate(.~ Cluster, data = datakm3, mean)
print("Rata-rata per variabel untuk metode k-means k = 3")
## [1] "Rata-rata per variabel untuk metode k-means k = 3"
print(meancluskm)
##   Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1       1                       1.0000000                          0.901525
## 2       2                       0.6405667                          0.223550
## 3       3                       0.5766760                          0.354392
##   Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1                               1.000000                             0.9974000
## 2                               0.325900                             0.1290167
## 3                               0.522416                             0.3653720
##   Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1                                                       1.0000000
## 2                                                       0.1432833
## 3                                                       0.9910360