# Library
library(ggplot2)
library(cluster)
library(factoextra)
library(tidyverse)
library(readxl)

# Memanggil Data
datapkl<-read_excel("C:/Data/data_pkl.xlsx")
datapkl<-data.frame(datapkl)
datapkl
##        KAPANEWON   p_0_14  p_15_64   p_65_up pend_rendah pend_menengah
## 1      SRANDAKAN 17.68631 67.70396 14.609733    40.76546      49.34394
## 2         SANDEN 17.22648 67.75395 15.019565    40.27547      47.89169
## 3         KRETEK 17.38470 67.85818 14.757118    38.66714      50.09857
## 4        PUNDONG 18.67158 68.10478 13.223641    47.47414      45.03082
## 5  BAMBANGLIPURO 17.77268 69.15382 13.073503    43.05884      46.25390
## 6         PANDAK 18.03994 69.33701 12.623047    47.48805      43.89631
## 7       PAJANGAN 19.53318 69.26936 11.197463    48.02849      44.56892
## 8         BANTUL 19.03868 69.99076 10.970558    41.81268      44.20555
## 9          JETIS 19.28785 69.54183 11.170319    44.14343      45.80511
## 10       IMOGIRI 19.32576 67.86399 12.810253    54.75939      38.60706
## 11        DLINGO 18.18137 67.88089 13.937737    51.55657      45.24654
## 12   BANGUNTAPAN 20.09485 70.81340  9.091751    40.51654      41.07588
## 13        PLERET 20.93161 70.07876  8.989631    50.79547      40.16345
## 14      PIYUNGAN 19.89010 69.75817 10.351730    41.15581      48.23953
## 15         SEWON 19.21957 70.60383 10.176599    42.35092      42.94665
## 16       KASIHAN 19.39730 70.79124  9.811462    40.60416      44.41511
## 17        SEDAYU 18.93049 69.68466 11.384853    41.64470      46.79206
##    pend_tinggi
## 1     9.890603
## 2    11.832838
## 3    11.234285
## 4     7.495047
## 5    10.687258
## 6     8.615640
## 7     7.402594
## 8    13.981763
## 9    10.051461
## 10    6.633553
## 11    3.196887
## 12   18.407575
## 13    9.041080
## 14   10.604655
## 15   14.702423
## 16   14.980726
## 17   11.563237
# Statistika Deskriptif
statdes <-summary(datapkl)
statdes
##   KAPANEWON             p_0_14         p_15_64         p_65_up     
##  Length:17          Min.   :17.23   Min.   :67.70   Min.   : 8.99  
##  Class :character   1st Qu.:18.04   1st Qu.:67.88   1st Qu.:10.35  
##  Mode  :character   Median :19.04   Median :69.34   Median :11.38  
##                     Mean   :18.86   Mean   :69.19   Mean   :11.95  
##                     3rd Qu.:19.40   3rd Qu.:69.99   3rd Qu.:13.22  
##                     Max.   :20.93   Max.   :70.81   Max.   :15.02  
##   pend_rendah    pend_menengah    pend_tinggi    
##  Min.   :38.67   Min.   :38.61   Min.   : 3.197  
##  1st Qu.:40.77   1st Qu.:43.90   1st Qu.: 8.616  
##  Median :42.35   Median :45.03   Median :10.605  
##  Mean   :44.42   Mean   :44.98   Mean   :10.607  
##  3rd Qu.:47.49   3rd Qu.:46.79   3rd Qu.:11.833  
##  Max.   :54.76   Max.   :50.10   Max.   :18.408
# Uji Asumsi
## Uji Non-Multikolinieritas
ujikorelasi <- cor(datapkl[,2:7], method = 'pearson')
ujikorelasi
##                   p_0_14    p_15_64       p_65_up   pend_rendah pend_menengah
## p_0_14         1.0000000  0.6891428 -0.9112745540  0.2824600177   -0.65622624
## p_15_64        0.6891428  1.0000000 -0.9263986292 -0.2567023310   -0.38792700
## p_65_up       -0.9112746 -0.9263986  1.0000000000 -0.0008954291    0.56145731
## pend_rendah    0.2824600 -0.2567023 -0.0008954291  1.0000000000   -0.64198407
## pend_menengah -0.6562262 -0.3879270  0.5614573057 -0.6419840690    1.00000000
## pend_tinggi    0.1978916  0.6693869 -0.4832399538 -0.7499453463   -0.02573082
##               pend_tinggi
## p_0_14         0.19789157
## p_15_64        0.66938690
## p_65_up       -0.48323995
## pend_rendah   -0.74994535
## pend_menengah -0.02573082
## pend_tinggi    1.00000000
# Standardisasi
datastandardisasi <- scale(datapkl[,2:7])
datastandardisasi
##            p_0_14     p_15_64    p_65_up pend_rendah pend_menengah
##  [1,] -1.14637671 -1.32552528  1.3489892  -0.7752822    1.40149751
##  [2,] -1.59567410 -1.28085604  1.5570768  -0.8792994    0.93559611
##  [3,] -1.44108055 -1.18773240  1.4238223  -1.2207291    1.64359360
##  [4,] -0.18366889 -0.96741289  0.6452152   0.6488900    0.01779135
##  [5,] -1.06198568 -0.03015059  0.5689844  -0.2884242    0.41017299
##  [6,] -0.80084445  0.13352385  0.3402700   0.6518437   -0.34617304
##  [7,]  0.65819602  0.07307891 -0.3835550   0.7665721   -0.13039156
##  [8,]  0.17502082  0.71761579 -0.4987635  -0.5529685   -0.24696339
##  [9,]  0.41848386  0.31652089 -0.3973373  -0.0581796    0.26619558
## [10,]  0.45552706 -1.18254578  0.4353220   2.1954609   -2.04303236
## [11,] -0.66265475 -1.16744143  1.0077901   1.5155424    0.08699805
## [12,]  1.20700538  1.45259663 -1.4527079  -0.8281230   -1.25100306
## [13,]  2.02460428  0.79623331 -1.5045584   1.3539698   -1.54372344
## [14,]  1.00693900  0.50981096 -0.8129674  -0.6924140    1.04718892
## [15,]  0.35177125  1.26535845 -0.9018882  -0.4387066   -0.65083493
## [16,]  0.52542852  1.43279870 -1.0872825  -0.8095229   -0.17973457
## [17,]  0.06930895  0.44412690 -0.2884097  -0.5886293    0.58282222
##         pend_tinggi
##  [1,] -0.1983320724
##  [2,]  0.3392533979
##  [3,]  0.1735815991
##  [4,] -0.8613912102
##  [5,]  0.0221715538
##  [6,] -0.5512254502
##  [7,] -0.8869810947
##  [8,]  0.9340482755
##  [9,] -0.1538088181
## [10,] -1.0998418861
## [11,] -2.0510667739
## [12,]  2.1590560624
## [13,] -0.4334693444
## [14,] -0.0006919067
## [15,]  1.1335178360
## [16,]  1.2105484192
## [17,]  0.2646314128
## attr(,"scaled:center")
##        p_0_14       p_15_64       p_65_up   pend_rendah pend_menengah 
##      18.85956      69.18756      11.95288      44.41749      44.97536 
##   pend_tinggi 
##      10.60715 
## attr(,"scaled:scale")
##        p_0_14       p_15_64       p_65_up   pend_rendah pend_menengah 
##      1.023438      1.119260      1.969514      4.710583      3.117081 
##   pend_tinggi 
##      3.612885
# Jarak Euclidean 
jarak <- dist(datastandardisasi, method = "euclidean")
jarak
##            1         2         3         4         5         6         7
## 2  0.8740971                                                            
## 3  0.7118922 0.8340322                                                  
## 4  2.4357556 2.7464109 3.0733908                                        
## 5  1.8873001 1.8844516 2.1512555 1.8625027                              
## 6  2.9122681 2.9821347 3.3646324 1.3837054 1.3900509                    
## 7  3.6623111 4.0034980 4.1814355 1.6988791 2.4715984 1.6819131          
## 8  3.6583929 3.6351684 3.8127275 3.0024705 2.1360609 2.3801064 2.3943858
## 9  3.1622602 3.4341848 3.5204135 2.0396850 1.8304262 1.7610742 1.2214558
## 10 4.9924090 5.0865311 5.6086944 2.6822833 4.1346754 2.9803707 2.8343351
## 11 3.2838706 3.6543290 3.9545762 1.6037778 3.0500001 2.3109147 2.6800539
## 12 5.8053170 5.6992189 5.8934761 5.0106098 4.3647629 4.3978209 4.0527169
## 13 5.9969195 6.2069270 6.4559337 3.9657317 4.6078526 3.7103519 2.4889533
## 14 3.5847939 3.9699036 3.8135112 3.0537834 2.6554318 2.9649963 2.1903416
## 15 4.4858389 4.4344111 4.6301556 3.6434987 2.8677398 2.8762837 2.7550082
## 16 4.5634543 4.5676712 4.6485379 3.9653879 3.0716606 3.2790490 3.0410258
## 17 2.8653166 3.0607290 3.0674184 2.4590118 1.5551878 2.0775724 2.0408267
##            8         9        10        11        12        13        14
## 2                                                                       
## 3                                                                       
## 4                                                                       
## 5                                                                       
## 6                                                                       
## 7                                                                       
## 8                                                                       
## 9  1.3863693                                                            
## 10 4.4134171 3.7746924                                                  
## 11 4.4526424 3.3843298 2.7354023                                        
## 12 2.2582402 3.3571252 5.6092012 6.4360258                              
## 13 3.4098193 3.0633100 3.3973777 4.7635338 3.5595465                    
## 14 1.8444536 1.2617597 4.8809602 4.3493622 3.3619569 3.5614307          
## 15 0.8424538 1.9499412 4.6536093 5.0099413 1.6227944 3.1381599 2.2895606
## 16 1.0617998 2.0876730 5.1964360 5.3551217 1.6266849 3.4743090 2.0359682
## 17 1.1261237 0.8407099 4.4527603 3.8534265 3.2674259 3.7695370 1.2064118
##           15        16
## 2                     
## 3                     
## 4                     
## 5                     
## 6                     
## 7                     
## 8                     
## 9                     
## 10                    
## 11                    
## 12                    
## 13                    
## 14                    
## 15                    
## 16 0.6767112          
## 17 1.8520023 1.8299619
# Koefisien Korelasi Cophenetic
d1 <- dist(datastandardisasi)
## Single Linkage
hc1 <- hclust(d1, "single")
d2 <- cophenetic(hc1)
cors <- cor(d1,d2)
cors
## [1] 0.636194
## Average Linkage
hc2 <- hclust(d1, "average")
d3 <- cophenetic(hc2)
corave <- cor(d1,d3)
corave
## [1] 0.7152892
## Complete Linkage
hc3 <- hclust(d1, "complete")
d4 <- cophenetic(hc3)
corcomp <- cor(d1,d4)
corcomp
## [1] 0.6354055
## Centorid Linkage
hc4 <- hclust(d1, "centroid")
d5 <- cophenetic(hc4)
corcen <- cor(d1,d5)
corcen
## [1] 0.7081911
## Ward
hc5 <- hclust(d1,"ward.D2")
d6 <- cophenetic(hc5)
corward <- cor(d1,d6)
corward
## [1] 0.6157763
KorCop<-data.frame(cors,corave,corcomp,corcen,corward)
KorCop
##       cors    corave   corcomp    corcen   corward
## 1 0.636194 0.7152892 0.6354055 0.7081911 0.6157763

Koefisien cophenetic digunakan untuk menentukan metode linkage terbaik. Hasil menunjukkan bahwa average linkage memiliki nilai cophenetic terbesar, sehingga dipilih sebagai metode pengelompokan.

# Average Linkage
hc_avg <- hclust(jarak, method = "average")

# Indeks Validitas (Silhouette)
sil_score <- data.frame(k = 2:7, silhouette = NA)

for(i in 2:7){
  cl <- cutree(hc_avg, k = i)
  sil <- silhouette(cl, jarak)
  sil_score[sil_score$k == i, "silhouette"] <- mean(sil[,3])
}

sil_score
##   k silhouette
## 1 2  0.2568525
## 2 3  0.3396909
## 3 4  0.2859193
## 4 5  0.3311082
## 5 6  0.2926477
## 6 7  0.3181887
plot(sil_score$k, sil_score$silhouette, type="b",
     xlab="Jumlah Cluster (k)",
     ylab="Rata-rata Silhouette",
     main="Silhouette Method")

Metode silhouette digunakan untuk menentukan jumlah cluster optimal. Nilai silhouette tertinggi diperoleh pada k = 3, sehingga jumlah cluster terbaik adalah 3 cluster.

# Dendrogram
plot(hc_avg, main="Cluster Dendrogram", xlab="Kapanewon", ylab="Jarak")

fviz_dend(hc_avg, k = 3, rect = TRUE, cex = 0.5)

# Cluster final
idclus <- cutree(hc_avg, k=3)
idclus
##  [1] 1 1 1 2 2 2 2 2 2 3 3 2 2 2 2 2 2
# Rata-rata tiap cluster
aggregate(datapkl[,2:7], list(idclus), mean)
##   Group.1   p_0_14  p_15_64  p_65_up pend_rendah pend_menengah pend_tinggi
## 1       1 17.43250 67.77203 14.79547    39.90269      49.11140    10.98591
## 2       2 19.23399 69.76063 11.00538    44.08944      44.44944    11.46112
## 3       3 18.75357 67.87244 13.37400    53.15798      41.92680     4.91522