# Library
library(ggplot2)
library(cluster)
library(factoextra)
library(tidyverse)
library(readxl)
# Memanggil Data
datapkl<-read_excel("C:/Data/data_pkl.xlsx")
datapkl<-data.frame(datapkl)
datapkl
## KAPANEWON p_0_14 p_15_64 p_65_up pend_rendah pend_menengah
## 1 SRANDAKAN 17.68631 67.70396 14.609733 40.76546 49.34394
## 2 SANDEN 17.22648 67.75395 15.019565 40.27547 47.89169
## 3 KRETEK 17.38470 67.85818 14.757118 38.66714 50.09857
## 4 PUNDONG 18.67158 68.10478 13.223641 47.47414 45.03082
## 5 BAMBANGLIPURO 17.77268 69.15382 13.073503 43.05884 46.25390
## 6 PANDAK 18.03994 69.33701 12.623047 47.48805 43.89631
## 7 PAJANGAN 19.53318 69.26936 11.197463 48.02849 44.56892
## 8 BANTUL 19.03868 69.99076 10.970558 41.81268 44.20555
## 9 JETIS 19.28785 69.54183 11.170319 44.14343 45.80511
## 10 IMOGIRI 19.32576 67.86399 12.810253 54.75939 38.60706
## 11 DLINGO 18.18137 67.88089 13.937737 51.55657 45.24654
## 12 BANGUNTAPAN 20.09485 70.81340 9.091751 40.51654 41.07588
## 13 PLERET 20.93161 70.07876 8.989631 50.79547 40.16345
## 14 PIYUNGAN 19.89010 69.75817 10.351730 41.15581 48.23953
## 15 SEWON 19.21957 70.60383 10.176599 42.35092 42.94665
## 16 KASIHAN 19.39730 70.79124 9.811462 40.60416 44.41511
## 17 SEDAYU 18.93049 69.68466 11.384853 41.64470 46.79206
## pend_tinggi
## 1 9.890603
## 2 11.832838
## 3 11.234285
## 4 7.495047
## 5 10.687258
## 6 8.615640
## 7 7.402594
## 8 13.981763
## 9 10.051461
## 10 6.633553
## 11 3.196887
## 12 18.407575
## 13 9.041080
## 14 10.604655
## 15 14.702423
## 16 14.980726
## 17 11.563237
# Statistika Deskriptif
statdes <-summary(datapkl)
statdes
## KAPANEWON p_0_14 p_15_64 p_65_up
## Length:17 Min. :17.23 Min. :67.70 Min. : 8.99
## Class :character 1st Qu.:18.04 1st Qu.:67.88 1st Qu.:10.35
## Mode :character Median :19.04 Median :69.34 Median :11.38
## Mean :18.86 Mean :69.19 Mean :11.95
## 3rd Qu.:19.40 3rd Qu.:69.99 3rd Qu.:13.22
## Max. :20.93 Max. :70.81 Max. :15.02
## pend_rendah pend_menengah pend_tinggi
## Min. :38.67 Min. :38.61 Min. : 3.197
## 1st Qu.:40.77 1st Qu.:43.90 1st Qu.: 8.616
## Median :42.35 Median :45.03 Median :10.605
## Mean :44.42 Mean :44.98 Mean :10.607
## 3rd Qu.:47.49 3rd Qu.:46.79 3rd Qu.:11.833
## Max. :54.76 Max. :50.10 Max. :18.408
# Uji Asumsi
## Uji Non-Multikolinieritas
ujikorelasi <- cor(datapkl[,2:7], method = 'pearson')
ujikorelasi
## p_0_14 p_15_64 p_65_up pend_rendah pend_menengah
## p_0_14 1.0000000 0.6891428 -0.9112745540 0.2824600177 -0.65622624
## p_15_64 0.6891428 1.0000000 -0.9263986292 -0.2567023310 -0.38792700
## p_65_up -0.9112746 -0.9263986 1.0000000000 -0.0008954291 0.56145731
## pend_rendah 0.2824600 -0.2567023 -0.0008954291 1.0000000000 -0.64198407
## pend_menengah -0.6562262 -0.3879270 0.5614573057 -0.6419840690 1.00000000
## pend_tinggi 0.1978916 0.6693869 -0.4832399538 -0.7499453463 -0.02573082
## pend_tinggi
## p_0_14 0.19789157
## p_15_64 0.66938690
## p_65_up -0.48323995
## pend_rendah -0.74994535
## pend_menengah -0.02573082
## pend_tinggi 1.00000000
# Standardisasi
datastandardisasi <- scale(datapkl[,2:7])
datastandardisasi
## p_0_14 p_15_64 p_65_up pend_rendah pend_menengah
## [1,] -1.14637671 -1.32552528 1.3489892 -0.7752822 1.40149751
## [2,] -1.59567410 -1.28085604 1.5570768 -0.8792994 0.93559611
## [3,] -1.44108055 -1.18773240 1.4238223 -1.2207291 1.64359360
## [4,] -0.18366889 -0.96741289 0.6452152 0.6488900 0.01779135
## [5,] -1.06198568 -0.03015059 0.5689844 -0.2884242 0.41017299
## [6,] -0.80084445 0.13352385 0.3402700 0.6518437 -0.34617304
## [7,] 0.65819602 0.07307891 -0.3835550 0.7665721 -0.13039156
## [8,] 0.17502082 0.71761579 -0.4987635 -0.5529685 -0.24696339
## [9,] 0.41848386 0.31652089 -0.3973373 -0.0581796 0.26619558
## [10,] 0.45552706 -1.18254578 0.4353220 2.1954609 -2.04303236
## [11,] -0.66265475 -1.16744143 1.0077901 1.5155424 0.08699805
## [12,] 1.20700538 1.45259663 -1.4527079 -0.8281230 -1.25100306
## [13,] 2.02460428 0.79623331 -1.5045584 1.3539698 -1.54372344
## [14,] 1.00693900 0.50981096 -0.8129674 -0.6924140 1.04718892
## [15,] 0.35177125 1.26535845 -0.9018882 -0.4387066 -0.65083493
## [16,] 0.52542852 1.43279870 -1.0872825 -0.8095229 -0.17973457
## [17,] 0.06930895 0.44412690 -0.2884097 -0.5886293 0.58282222
## pend_tinggi
## [1,] -0.1983320724
## [2,] 0.3392533979
## [3,] 0.1735815991
## [4,] -0.8613912102
## [5,] 0.0221715538
## [6,] -0.5512254502
## [7,] -0.8869810947
## [8,] 0.9340482755
## [9,] -0.1538088181
## [10,] -1.0998418861
## [11,] -2.0510667739
## [12,] 2.1590560624
## [13,] -0.4334693444
## [14,] -0.0006919067
## [15,] 1.1335178360
## [16,] 1.2105484192
## [17,] 0.2646314128
## attr(,"scaled:center")
## p_0_14 p_15_64 p_65_up pend_rendah pend_menengah
## 18.85956 69.18756 11.95288 44.41749 44.97536
## pend_tinggi
## 10.60715
## attr(,"scaled:scale")
## p_0_14 p_15_64 p_65_up pend_rendah pend_menengah
## 1.023438 1.119260 1.969514 4.710583 3.117081
## pend_tinggi
## 3.612885
# Jarak Euclidean
jarak <- dist(datastandardisasi, method = "euclidean")
jarak
## 1 2 3 4 5 6 7
## 2 0.8740971
## 3 0.7118922 0.8340322
## 4 2.4357556 2.7464109 3.0733908
## 5 1.8873001 1.8844516 2.1512555 1.8625027
## 6 2.9122681 2.9821347 3.3646324 1.3837054 1.3900509
## 7 3.6623111 4.0034980 4.1814355 1.6988791 2.4715984 1.6819131
## 8 3.6583929 3.6351684 3.8127275 3.0024705 2.1360609 2.3801064 2.3943858
## 9 3.1622602 3.4341848 3.5204135 2.0396850 1.8304262 1.7610742 1.2214558
## 10 4.9924090 5.0865311 5.6086944 2.6822833 4.1346754 2.9803707 2.8343351
## 11 3.2838706 3.6543290 3.9545762 1.6037778 3.0500001 2.3109147 2.6800539
## 12 5.8053170 5.6992189 5.8934761 5.0106098 4.3647629 4.3978209 4.0527169
## 13 5.9969195 6.2069270 6.4559337 3.9657317 4.6078526 3.7103519 2.4889533
## 14 3.5847939 3.9699036 3.8135112 3.0537834 2.6554318 2.9649963 2.1903416
## 15 4.4858389 4.4344111 4.6301556 3.6434987 2.8677398 2.8762837 2.7550082
## 16 4.5634543 4.5676712 4.6485379 3.9653879 3.0716606 3.2790490 3.0410258
## 17 2.8653166 3.0607290 3.0674184 2.4590118 1.5551878 2.0775724 2.0408267
## 8 9 10 11 12 13 14
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 1.3863693
## 10 4.4134171 3.7746924
## 11 4.4526424 3.3843298 2.7354023
## 12 2.2582402 3.3571252 5.6092012 6.4360258
## 13 3.4098193 3.0633100 3.3973777 4.7635338 3.5595465
## 14 1.8444536 1.2617597 4.8809602 4.3493622 3.3619569 3.5614307
## 15 0.8424538 1.9499412 4.6536093 5.0099413 1.6227944 3.1381599 2.2895606
## 16 1.0617998 2.0876730 5.1964360 5.3551217 1.6266849 3.4743090 2.0359682
## 17 1.1261237 0.8407099 4.4527603 3.8534265 3.2674259 3.7695370 1.2064118
## 15 16
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16 0.6767112
## 17 1.8520023 1.8299619
# Koefisien Korelasi Cophenetic
d1 <- dist(datastandardisasi)
## Single Linkage
hc1 <- hclust(d1, "single")
d2 <- cophenetic(hc1)
cors <- cor(d1,d2)
cors
## [1] 0.636194
## Average Linkage
hc2 <- hclust(d1, "average")
d3 <- cophenetic(hc2)
corave <- cor(d1,d3)
corave
## [1] 0.7152892
## Complete Linkage
hc3 <- hclust(d1, "complete")
d4 <- cophenetic(hc3)
corcomp <- cor(d1,d4)
corcomp
## [1] 0.6354055
## Centorid Linkage
hc4 <- hclust(d1, "centroid")
d5 <- cophenetic(hc4)
corcen <- cor(d1,d5)
corcen
## [1] 0.7081911
## Ward
hc5 <- hclust(d1,"ward.D2")
d6 <- cophenetic(hc5)
corward <- cor(d1,d6)
corward
## [1] 0.6157763
KorCop<-data.frame(cors,corave,corcomp,corcen,corward)
KorCop
## cors corave corcomp corcen corward
## 1 0.636194 0.7152892 0.6354055 0.7081911 0.6157763
Koefisien cophenetic digunakan untuk menentukan metode linkage terbaik. Hasil menunjukkan bahwa average linkage memiliki nilai cophenetic terbesar, sehingga dipilih sebagai metode pengelompokan.
# Average Linkage
hc_avg <- hclust(jarak, method = "average")
# Indeks Validitas (Silhouette)
sil_score <- data.frame(k = 2:7, silhouette = NA)
for(i in 2:7){
cl <- cutree(hc_avg, k = i)
sil <- silhouette(cl, jarak)
sil_score[sil_score$k == i, "silhouette"] <- mean(sil[,3])
}
sil_score
## k silhouette
## 1 2 0.2568525
## 2 3 0.3396909
## 3 4 0.2859193
## 4 5 0.3311082
## 5 6 0.2926477
## 6 7 0.3181887
plot(sil_score$k, sil_score$silhouette, type="b",
xlab="Jumlah Cluster (k)",
ylab="Rata-rata Silhouette",
main="Silhouette Method")
Metode silhouette digunakan untuk menentukan jumlah cluster optimal. Nilai silhouette tertinggi diperoleh pada k = 3, sehingga jumlah cluster terbaik adalah 3 cluster.
# Dendrogram
plot(hc_avg, main="Cluster Dendrogram", xlab="Kapanewon", ylab="Jarak")
fviz_dend(hc_avg, k = 3, rect = TRUE, cex = 0.5)
# Cluster final
idclus <- cutree(hc_avg, k=3)
idclus
## [1] 1 1 1 2 2 2 2 2 2 3 3 2 2 2 2 2 2
# Rata-rata tiap cluster
aggregate(datapkl[,2:7], list(idclus), mean)
## Group.1 p_0_14 p_15_64 p_65_up pend_rendah pend_menengah pend_tinggi
## 1 1 17.43250 67.77203 14.79547 39.90269 49.11140 10.98591
## 2 2 19.23399 69.76063 11.00538 44.08944 44.44944 11.46112
## 3 3 18.75357 67.87244 13.37400 53.15798 41.92680 4.91522