1. Persiapan Data
# Export data
datauplm <- read_excel("C:/Users/WORKPLUS/Documents/UPLM.xlsx")
datauplm <- as.data.frame(datauplm)
# Mengambil semua baris dikolom pertama sebagai nama baris untuk data
names1 <- datauplm[,1]
# Menghapus kolom pertama dan menetapkan nama baris
datauplm <- datauplm[,-1]
rownames(datauplm) <- names1
# Komponen dalam analisis
colnames(datauplm)
## [1] "Pemerataan Layanan Perpustakaan"
## [2] "Ketercukupan Koleksi Perpustakaan"
## [3] "Rasio Ketercukupan Tenaga Perpustakaan"
## [4] "Tingkat Kunjungan Masyarakat per hari"
## [5] "Keterlibatan Masyarakat dalam Kegiatan Sosialisasi Perpustakaan"
# Verifikasi ukuran data
dim(datauplm)
## [1] 35 5
2. Penentuan Jumlah Klaster
# Menentukan Jumlah Klaster
wss <- (nrow(datauplm)-1)*sum(apply(datauplm,2,var))
for (i in 2:34)
wss[i] <- sum(kmeans(datauplm,centers = i)$withinss)
plot(1:34, wss,type = "b", main = "Optimal Number of Clusters", xlab = "number of clusters",
ylab = "within clusters sum of squares")
3. Analisis Klastering untuk k = 3
A. Klasterisasi Hierarkis
a. Single Linkage
# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle3 <- hclust(duplm, method = "single")
plot(fitsingle3, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 3",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts3 <- cutree(fitsingle3, k = 3)
rect.hclust(fitsingle3, k=3, border = "red")
# Siluet Single Linkage
silsingle3 <- silhouette(cuts3, duplm)
plot(silsingle3, main = "Silhoutte Plot Single Linkage k=3", col=c("darkblue","skyblue","pink"))
# WCSS
wcss_total <- 0
for (i in 1:3) {
cluster_data <- datauplm[cuts3 == i, ]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 22.0678
b. Complete Linkage
# Klaster Complete Linkage
fitcom3 <- hclust(duplm, method = "complete")
plot(fitcom3, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 3",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom3 <- cutree(fitcom3, k=3)
rect.hclust(fitcom3, k=3, border = "red")
# Siluet Complete Linkage
silcom3 <- silhouette(cutcom3, duplm)
plot(silcom3,main = "Silhoutte Plot Complete Linkage k=3", col=c("darkblue","skyblue","pink"))
# WCSS
wcss_total <- 0
for (i in 1:3) {
cluster_data <- datauplm[cutcom3 == i, ]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883
c. Average Linkage
# Klaster Average Linkage
fitavg3 <- hclust(duplm, method = "average")
plot(fitavg3, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 3",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg3 <- cutree(fitavg3, k=3)
rect.hclust(fitavg3, k=3, border = "red")
# Siluet Average Linkage
silavg3 <- silhouette(cutavg3, duplm)
plot(silavg3, main = "Silhoutte Plot Average Linkage k=3", col=c("darkblue","skyblue","pink"))
#WCSS
wcss_total <- 0
for (i in 1:3) {
cluster_data <- datauplm[cutavg3 == i, ]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883
d. Ward’s Method
# Klaster Metode Ward's
fitward3 <- hclust(duplm, method = "ward.D2")
plot(fitward3, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Wards k = 3",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward3 <- cutree(fitward3, k=3)
rect.hclust(fitward3, k=3, border = "red")
# Siluet Metode Ward's
silward3 <- silhouette(cutward3, duplm)
plot(silward3,main = "Silhoutte Plot Ward's Method k=3", col=c("darkblue","skyblue","pink"))
#WCSS
wcss_total <- 0
for (i in 1:3) {
cluster_data <- datauplm[cutward3 == i, ]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883
B. Klastering Non Hirarki (K-Means)
# Klaster K-Means
set.seed(123)
fitkm3 <- kmeans(datauplm, centers = 3)
clusters3 <- split(rownames(datauplm), fitkm3$cluster)
clusters3
## $`1`
## [1] "Kota Magelang" "Kota Surakarta" "Kota Salatiga" "Kota Pekalongan"
##
## $`2`
## [1] "Magelang" "Wonogiri" "Karanganyar" "Grobogan" "Pemalang"
## [6] "Kota Tegal"
##
## $`3`
## [1] "Cilacap" "Banyumas" "Purbalingga" "Banjarnegara"
## [5] "Kebumen" "Purworejo" "Wonosobo" "Boyolali"
## [9] "Klaten" "Sukoharjo" "Sragen" "Blora"
## [13] "Rembang" "Pati" "Kudus" "Jepara"
## [17] "Demak" "Semarang" "Temanggung" "Kendal"
## [21] "Batang" "Pekalongan" "Tegal" "Brebes"
## [25] "Kota Semarang"
# Visualisasi klaster
fviz_cluster(fitkm3, data = datauplm,
main = "Visualisasi Klaster K-Means k = 3",
geom = "point",
ellipse.type = "convex",
palette = "jco",
ggtheme = theme_minimal())
# Siluet K-Means
silkm3 <- silhouette(fitkm3$cluster, duplm)
plot(silkm3, col=c("darkblue","skyblue","pink"))
#WCSS
wcss_total <- 0
for (i in 1:3) {
cluster_data <- datauplm[fitkm3$cluster == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 18.08883
4. Analisis Klastering untuk k = 4
A. Klasterisasi Hierarkis
a. Single Linkage
# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle4 <- hclust(duplm, method = "single")
plot(fitsingle4, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 4",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts4 <- cutree(fitsingle4, k = 4)
rect.hclust(fitsingle4, k=4, border = "red")
# Siluet Single Linkage
silsingle4 <- silhouette(cuts4, duplm)
plot(silsingle4, main = "Silhoutte Plot Single Linkage k=4", col=c("darkblue","skyblue","pink","violet"))
#WCSS
wcss_total <- 0
for (i in 1:4) {
cluster_data <- datauplm[cuts4 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 16.87361
b. Complete Linkage
# Klaster Complete Linkage
fitcom4 <- hclust(duplm, method = "complete")
plot(fitcom4, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 4",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom4 <- cutree(fitcom3, k=4)
rect.hclust(fitcom4, k=4, border = "red")
# Siluet Complete Linkage
silcom4 <- silhouette(cutcom4, duplm)
plot(silcom4,main = "Silhoutte Plot Complete Linkage k=4", col=c("darkblue","skyblue","pink","violet"))
# WCSS
wcss_total <- 0
for (i in 1:4) {
cluster_data <- datauplm[cutcom4 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 21.22551
c. Average Linkage
# Klaster Average Linkage
fitavg4 <- hclust(duplm, method = "average")
plot(fitavg4, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 4",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg4 <- cutree(fitavg4, k=4)
rect.hclust(fitavg4, k=4, border = "red")
# Siluet Average Linkage
silavg4 <- silhouette(cutavg4, duplm)
plot(silavg4, main = "Silhoutte Plot Average Linkage k=4", col=c("darkblue","skyblue","pink","violet"))
# WCSS
wcss_total <- 0
for (i in 1:4) {
cluster_data <- datauplm[cutavg4 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 16.87361
d. Ward’s Method
# Klaster Metode Ward's
fitward4 <- hclust(duplm, method = "ward.D2")
plot(fitward4, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Wards k = 4",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward4 <- cutree(fitward4, k=4)
rect.hclust(fitward4, k=4, border = "red")
# Siluet Metode Ward's
silward4 <- silhouette(cutward4, duplm)
plot(silward4,main = "Silhoutte Plot Ward's Method k=4", col=c("darkblue","skyblue","pink","violet"))
# WCSS
wcss_total <- 0
for (i in 1:4) {
cluster_data <- datauplm[cutward4 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.90209
B. Klastering Non Hirarki (K-Means)
# Klaster K-Means
set.seed(123)
fitkm4 <- kmeans(datauplm, centers = 4)
clusters4 <- split(rownames(datauplm), fitkm4$cluster)
clusters4
## $`1`
## [1] "Purworejo" "Kota Magelang" "Kota Surakarta" "Kota Salatiga"
## [5] "Kota Semarang" "Kota Pekalongan"
##
## $`2`
## [1] "Magelang" "Wonogiri" "Karanganyar" "Grobogan" "Pemalang"
## [6] "Kota Tegal"
##
## $`3`
## [1] "Cilacap" "Banyumas" "Purbalingga" "Wonosobo" "Boyolali"
## [6] "Klaten" "Sragen" "Rembang" "Pati" "Jepara"
## [11] "Demak" "Temanggung" "Kendal" "Batang" "Pekalongan"
## [16] "Tegal" "Brebes"
##
## $`4`
## [1] "Banjarnegara" "Kebumen" "Sukoharjo" "Blora" "Kudus"
## [6] "Semarang"
# Visualisasi klaster
fviz_cluster(fitkm4, data = datauplm,
main = "Visualisasi Klaster K-Means k = 4",
geom = "point",
ellipse.type = "convex",
palette = "jco",
ggtheme = theme_minimal())
# Siluet K-Means
silkm4 <- silhouette(fitkm4$cluster, duplm)
plot(silkm4, col=c("darkblue","skyblue","pink","purple"))
# WCSS
wcss_total <- 0
for (i in 1:4) {
cluster_data <- datauplm[fitkm4$cluster == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.98086
5. Analisis Klastering untuk k = 5
A. Klasterisasi Hierarkis
a. Single Linkage
# Klaster Single Linkage
duplm <- dist(datauplm, method = "euclidean")
fitsingle5 <- hclust(duplm, method = "single")
plot(fitsingle5, label = rownames(datauplm), hang = -1,sub = "", main = "Dendrogram Metode Single Linkage k = 5",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cuts5 <- cutree(fitsingle5, k = 5)
rect.hclust(fitsingle5, k=5, border = "red")
# Siluet Single Linkage
silsingle5 <- silhouette(cuts5, duplm)
plot(silsingle5, main = "Silhoutte Plot Single Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))
# WCSS
wcss_total <- 0
for (i in 1:5) {
cluster_data <- datauplm[cuts5 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.68673
b. Complete Linkage
# Klaster Complete Linkage
fitcom5 <- hclust(duplm, method = "complete")
plot(fitcom5, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Complete Linkage k = 5",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutcom5 <- cutree(fitcom5, k=5)
rect.hclust(fitcom5, k=5, border = "red")
# Siluet Complete Linkage
silcom5 <- silhouette(cutcom5, duplm)
plot(silcom5,main = "Silhoutte Plot Complete Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))
# WCSS
wcss_total <- 0
for (i in 1:5) {
cluster_data <- datauplm[cutcom5 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.01029
c. Average Linkage
# Klaster Average Linkage
fitavg5 <- hclust(duplm, method = "average")
plot(fitavg5, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Average Linkage k = 5",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutavg5 <- cutree(fitavg5, k=5)
rect.hclust(fitavg5, k=5, border = "red")
# Siluet Average Linkage
silavg5 <- silhouette(cutavg5, duplm)
plot(silavg5, main = "Silhoutte Plot Average Linkage k=5", col=c("darkblue","skyblue","pink","violet","purple"))
# WCSS
wcss_total <- 0
for (i in 1:5) {
cluster_data <- datauplm[cutavg5 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 19.53867
d. Ward’s Method
# Klaster Metode Ward's
fitward5 <- hclust(duplm, method = "ward.D2")
plot(fitward5, label = rownames(datauplm), hang=-1, sub = "", main = "Dendrogram Metode Wards k = 5",
xlab = "Kabupaten/Kota", ylab = "Jarak")
cutward5 <- cutree(fitward5, k=5)
rect.hclust(fitward5, k=5, border = "red")
# Siluet Metode Ward's
silward5 <- silhouette(cutward5, duplm)
plot(silward5,main = "Silhoutte Plot Ward's Method k=5", col=c("darkblue","skyblue","pink","violet","purple"))
# WCSS
wcss_total <- 0
for (i in 1:5) {
cluster_data <- datauplm[cutward5 == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 21.45072
B. Klastering Non Hirarki (K-Means)
# Klaster K-Means
set.seed(123)
fitkm5 <- kmeans(datauplm, centers = 5)
clusters5 <- split(rownames(datauplm), fitkm5$cluster)
clusters5
## $`1`
## [1] "Kota Magelang" "Kota Surakarta" "Kota Salatiga" "Kota Pekalongan"
##
## $`2`
## [1] "Magelang" "Wonogiri" "Karanganyar" "Grobogan" "Pemalang"
## [6] "Kota Tegal"
##
## $`3`
## [1] "Purbalingga" "Purworejo" "Wonosobo" "Sragen"
## [5] "Pati" "Kota Semarang"
##
## $`4`
## [1] "Banjarnegara" "Kebumen" "Sukoharjo" "Blora" "Kudus"
## [6] "Semarang"
##
## $`5`
## [1] "Cilacap" "Banyumas" "Boyolali" "Klaten" "Rembang"
## [6] "Jepara" "Demak" "Temanggung" "Kendal" "Batang"
## [11] "Pekalongan" "Tegal" "Brebes"
# Visualisasi klaster
fviz_cluster(fitkm5, data = datauplm,
main = "Visualisasi Klaster K-Means k = 5",
geom = "point",
ellipse.type = "convex",
palette = "jco",
ggtheme = theme_minimal())
# Siluet K-Means
silkm5 <- silhouette(fitkm5$cluster, duplm)
plot(silkm5, col=c("darkblue","skyblue","pink","violet","purple"))
# WCSS
wcss_total <- 0
for (i in 1:5) {
cluster_data <- datauplm[fitkm5$cluster == i,]
centroid <- colMeans(cluster_data)
ss <- sum(rowSums((cluster_data - centroid)^2))
wcss_total <- wcss_total + ss
}
print(wcss_total)
## [1] 20.36219
# Complete
datacom3 <- data.frame(datauplm, Cluster = cutcom3)
meancluscom <- aggregate(.~ Cluster, data = datacom3, mean)
print("Rata-rata per variabel untuk complete linkage k = 3")
## [1] "Rata-rata per variabel untuk complete linkage k = 3"
print(meancluscom)
## Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1 1 0.5766760 0.354392
## 2 2 0.6405667 0.223550
## 3 3 1.0000000 0.901525
## Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1 0.522416 0.3653720
## 2 0.325900 0.1290167
## 3 1.000000 0.9974000
## Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1 0.9910360
## 2 0.1432833
## 3 1.0000000
# Average
dataavg3 <- data.frame(datauplm, Cluster = cutavg3)
meanclusavg <- aggregate(.~ Cluster, data = dataavg3, mean)
print("Rata-rata per variabel untuk average linkage k = 3")
## [1] "Rata-rata per variabel untuk average linkage k = 3"
print(meanclusavg)
## Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1 1 0.5766760 0.354392
## 2 2 0.6405667 0.223550
## 3 3 1.0000000 0.901525
## Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1 0.522416 0.3653720
## 2 0.325900 0.1290167
## 3 1.000000 0.9974000
## Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1 0.9910360
## 2 0.1432833
## 3 1.0000000
# Ward's
dataward3 <- data.frame(datauplm, Cluster = cutward3)
meanclusward <- aggregate(.~ Cluster, data = dataward3, mean)
print("Rata-rata per variabel untuk metode ward k = 3")
## [1] "Rata-rata per variabel untuk metode ward k = 3"
print(meanclusward)
## Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1 1 0.5766760 0.354392
## 2 2 0.6405667 0.223550
## 3 3 1.0000000 0.901525
## Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1 0.522416 0.3653720
## 2 0.325900 0.1290167
## 3 1.000000 0.9974000
## Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1 0.9910360
## 2 0.1432833
## 3 1.0000000
# K-means
datakm3 <- data.frame(datauplm, Cluster = fitkm3$cluster)
meancluskm <- aggregate(.~ Cluster, data = datakm3, mean)
print("Rata-rata per variabel untuk metode k-means k = 3")
## [1] "Rata-rata per variabel untuk metode k-means k = 3"
print(meancluskm)
## Cluster Pemerataan.Layanan.Perpustakaan Ketercukupan.Koleksi.Perpustakaan
## 1 1 1.0000000 0.901525
## 2 2 0.6405667 0.223550
## 3 3 0.5766760 0.354392
## Rasio.Ketercukupan.Tenaga.Perpustakaan Tingkat.Kunjungan.Masyarakat.per.hari
## 1 1.000000 0.9974000
## 2 0.325900 0.1290167
## 3 0.522416 0.3653720
## Keterlibatan.Masyarakat.dalam.Kegiatan.Sosialisasi.Perpustakaan
## 1 1.0000000
## 2 0.1432833
## 3 0.9910360