Pada tugas ini dilakukan analisis clustering menggunakan metode unsupervised learning untuk mengelompokkan data berdasarkan kemiripan karakteristik. Dataset yang digunakan adalah dataset Parkinson’s Disease yang memiliki 23 fitur numerik. Dua kolom yaitu name dan status tidak digunakan dalam proses clustering, karena name bukan variabel numerik dan status merupakan label. Tahapan yang dilakukan meliputi eksplorasi data, preprocessing, serta penerapan lima metode clustering yaitu K-Means, K-Medians, DBSCAN, Mean Shift, dan Fuzzy C-Means. Hasil clustering kemudian dievaluasi menggunakan metrik internal seperti Silhouette, Dunn Index, dan Ari score.
install.packages("flexclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dbscan")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("meanShiftR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("e1071")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("cluster")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("fpc")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("mclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("psych")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("factoextra")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(flexclust)
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(meanShiftR)
library(e1071)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:flexclust':
##
## bclust
library(cluster)
library(fpc)
##
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
##
## dbscan
library(mclust)
## Package 'mclust' version 6.1.2
## Type 'citation("mclust")' for citing this R package in publications.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:mclust':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:mclust':
##
## sim
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## The following object is masked from 'package:e1071':
##
## element
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
data <- read.csv("parkinsons.csv")
str(data)
## 'data.frame': 195 obs. of 24 variables:
## $ name : chr "phon_R01_S01_1" "phon_R01_S01_2" "phon_R01_S01_3" "phon_R01_S01_4" ...
## $ MDVP.Fo.Hz. : num 120 122 117 117 116 ...
## $ MDVP.Fhi.Hz. : num 157 149 131 138 142 ...
## $ MDVP.Flo.Hz. : num 75 114 112 111 111 ...
## $ MDVP.Jitter... : num 0.00784 0.00968 0.0105 0.00997 0.01284 ...
## $ MDVP.Jitter.Abs.: num 0.00007 0.00008 0.00009 0.00009 0.00011 0.00008 0.00003 0.00003 0.00006 0.00006 ...
## $ MDVP.RAP : num 0.0037 0.00465 0.00544 0.00502 0.00655 0.00463 0.00155 0.00144 0.00293 0.00268 ...
## $ MDVP.PPQ : num 0.00554 0.00696 0.00781 0.00698 0.00908 0.0075 0.00202 0.00182 0.00332 0.00332 ...
## $ Jitter.DDP : num 0.0111 0.0139 0.0163 0.015 0.0197 ...
## $ MDVP.Shimmer : num 0.0437 0.0613 0.0523 0.0549 0.0643 ...
## $ MDVP.Shimmer.dB.: num 0.426 0.626 0.482 0.517 0.584 0.456 0.14 0.134 0.191 0.255 ...
## $ Shimmer.APQ3 : num 0.0218 0.0313 0.0276 0.0292 0.0349 ...
## $ Shimmer.APQ5 : num 0.0313 0.0452 0.0386 0.0401 0.0483 ...
## $ MDVP.APQ : num 0.0297 0.0437 0.0359 0.0377 0.0447 ...
## $ Shimmer.DDA : num 0.0654 0.094 0.0827 0.0877 0.1047 ...
## $ NHR : num 0.0221 0.0193 0.0131 0.0135 0.0177 ...
## $ HNR : num 21 19.1 20.7 20.6 19.6 ...
## $ status : int 1 1 1 1 1 1 1 1 1 1 ...
## $ RPDE : num 0.415 0.458 0.43 0.435 0.417 ...
## $ DFA : num 0.815 0.82 0.825 0.819 0.823 ...
## $ spread1 : num -4.81 -4.08 -4.44 -4.12 -3.75 ...
## $ spread2 : num 0.266 0.336 0.311 0.334 0.235 ...
## $ D2 : num 2.3 2.49 2.34 2.41 2.33 ...
## $ PPE : num 0.285 0.369 0.333 0.369 0.41 ...
data_numeric <- data[sapply(data, is.numeric)]
label <- data_numeric$status
data_clustering <- data_numeric[, colnames(data_numeric) != "status"]
colSums(is.na(data_numeric))
## MDVP.Fo.Hz. MDVP.Fhi.Hz. MDVP.Flo.Hz. MDVP.Jitter...
## 0 0 0 0
## MDVP.Jitter.Abs. MDVP.RAP MDVP.PPQ Jitter.DDP
## 0 0 0 0
## MDVP.Shimmer MDVP.Shimmer.dB. Shimmer.APQ3 Shimmer.APQ5
## 0 0 0 0
## MDVP.APQ Shimmer.DDA NHR HNR
## 0 0 0 0
## status RPDE DFA spread1
## 0 0 0 0
## spread2 D2 PPE
## 0 0 0
sum(duplicated(data_numeric))
## [1] 0
describe(data_clustering)
## vars n mean sd median trimmed mad min max
## MDVP.Fo.Hz. 1 195 154.23 41.39 148.79 150.80 47.13 88.33 260.10
## MDVP.Fhi.Hz. 2 195 197.10 91.49 175.83 180.97 62.99 102.14 592.03
## MDVP.Flo.Hz. 3 195 116.32 43.52 104.32 109.94 35.11 65.48 239.17
## MDVP.Jitter... 4 195 0.01 0.00 0.00 0.01 0.00 0.00 0.03
## MDVP.Jitter.Abs. 5 195 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## MDVP.RAP 6 195 0.00 0.00 0.00 0.00 0.00 0.00 0.02
## MDVP.PPQ 7 195 0.00 0.00 0.00 0.00 0.00 0.00 0.02
## Jitter.DDP 8 195 0.01 0.01 0.01 0.01 0.00 0.00 0.06
## MDVP.Shimmer 9 195 0.03 0.02 0.02 0.03 0.01 0.01 0.12
## MDVP.Shimmer.dB. 10 195 0.28 0.19 0.22 0.25 0.13 0.09 1.30
## Shimmer.APQ3 11 195 0.02 0.01 0.01 0.01 0.01 0.00 0.06
## Shimmer.APQ5 12 195 0.02 0.01 0.01 0.02 0.01 0.01 0.08
## MDVP.APQ 13 195 0.02 0.02 0.02 0.02 0.01 0.01 0.14
## Shimmer.DDA 14 195 0.05 0.03 0.04 0.04 0.02 0.01 0.17
## NHR 15 195 0.02 0.04 0.01 0.02 0.01 0.00 0.31
## HNR 16 195 21.89 4.43 22.09 22.17 4.37 8.44 33.05
## RPDE 17 195 0.50 0.10 0.50 0.50 0.12 0.26 0.69
## DFA 18 195 0.72 0.06 0.72 0.72 0.06 0.57 0.83
## spread1 19 195 -5.68 1.09 -5.72 -5.74 1.07 -7.96 -2.43
## spread2 20 195 0.23 0.08 0.22 0.23 0.07 0.01 0.45
## D2 21 195 2.38 0.38 2.36 2.36 0.40 1.42 3.67
## PPE 22 195 0.21 0.09 0.19 0.20 0.09 0.04 0.53
## range skew kurtosis se
## MDVP.Fo.Hz. 171.77 0.58 -0.67 2.96
## MDVP.Fhi.Hz. 489.88 2.50 7.30 6.55
## MDVP.Flo.Hz. 173.69 1.20 0.57 3.12
## MDVP.Jitter... 0.03 3.04 11.54 0.00
## MDVP.Jitter.Abs. 0.00 2.61 10.42 0.00
## MDVP.RAP 0.02 3.31 13.65 0.00
## MDVP.PPQ 0.02 3.03 11.48 0.00
## Jitter.DDP 0.06 3.31 13.66 0.00
## MDVP.Shimmer 0.11 1.64 3.06 0.00
## MDVP.Shimmer.dB. 1.22 1.97 4.89 0.01
## Shimmer.APQ3 0.05 1.56 2.56 0.00
## Shimmer.APQ5 0.07 1.77 3.68 0.00
## MDVP.APQ 0.13 2.58 10.71 0.00
## Shimmer.DDA 0.16 1.56 2.56 0.00
## NHR 0.31 4.16 21.15 0.00
## HNR 24.61 -0.51 0.53 0.32
## RPDE 0.43 -0.14 -0.95 0.01
## DFA 0.25 -0.03 -0.72 0.00
## spread1 5.53 0.43 -0.11 0.08
## spread2 0.44 0.14 -0.14 0.01
## D2 2.25 0.42 0.15 0.03
## PPE 0.48 0.79 0.45 0.01
cor(data_clustering)
## MDVP.Fo.Hz. MDVP.Fhi.Hz. MDVP.Flo.Hz. MDVP.Jitter...
## MDVP.Fo.Hz. 1.00000000 0.400984676 0.59654552 -0.11800263
## MDVP.Fhi.Hz. 0.40098468 1.000000000 0.08495125 0.10208635
## MDVP.Flo.Hz. 0.59654552 0.084951251 1.00000000 -0.13991889
## MDVP.Jitter... -0.11800263 0.102086349 -0.13991889 1.00000000
## MDVP.Jitter.Abs. -0.38202665 -0.029198333 -0.27781524 0.93571398
## MDVP.RAP -0.07619381 0.097176619 -0.10051932 0.99027559
## MDVP.PPQ -0.11216543 0.091126194 -0.09582837 0.97425639
## Jitter.DDP -0.07621269 0.097149867 -0.10048768 0.99027622
## MDVP.Shimmer -0.09837375 0.002281231 -0.14454332 0.76906323
## MDVP.Shimmer.dB. -0.07374246 0.043465164 -0.11908865 0.80428928
## Shimmer.APQ3 -0.09471706 -0.003743251 -0.15074650 0.74662520
## Shimmer.APQ5 -0.07068178 -0.009996776 -0.10109462 0.72556100
## MDVP.APQ -0.07777382 0.004936985 -0.10729342 0.75825526
## Shimmer.DDA -0.09473157 -0.003732893 -0.15073674 0.74663517
## NHR -0.02198078 0.163766404 -0.10867045 0.90695860
## HNR 0.05914439 -0.024893140 0.21085090 -0.72816507
## RPDE -0.38389390 -0.112403513 -0.40014307 0.36067348
## DFA -0.44601329 -0.343096724 -0.05040631 0.09857244
## spread1 -0.41373817 -0.076657801 -0.39485736 0.69357671
## spread2 -0.24945005 -0.002953614 -0.24382940 0.38512347
## D2 0.17797951 0.176322818 -0.10062909 0.43343402
## PPE -0.37235638 -0.069543002 -0.34007134 0.72154293
## MDVP.Jitter.Abs. MDVP.RAP MDVP.PPQ Jitter.DDP
## MDVP.Fo.Hz. -0.38202665 -0.07619381 -0.11216543 -0.07621269
## MDVP.Fhi.Hz. -0.02919833 0.09717662 0.09112619 0.09714987
## MDVP.Flo.Hz. -0.27781524 -0.10051932 -0.09582837 -0.10048768
## MDVP.Jitter... 0.93571398 0.99027559 0.97425639 0.99027622
## MDVP.Jitter.Abs. 1.00000000 0.92291097 0.89777790 0.92291303
## MDVP.RAP 0.92291097 1.00000000 0.95731689 0.99999960
## MDVP.PPQ 0.89777790 0.95731689 1.00000000 0.95731916
## Jitter.DDP 0.92291303 0.99999960 0.95731916 1.00000000
## MDVP.Shimmer 0.70332237 0.75958052 0.79782603 0.75955470
## MDVP.Shimmer.dB. 0.71660128 0.79065155 0.83923894 0.79062060
## Shimmer.APQ3 0.69715303 0.74491236 0.76357992 0.74489376
## Shimmer.APQ5 0.64896070 0.70992682 0.78678047 0.70990709
## MDVP.APQ 0.64879344 0.73745467 0.80413930 0.73743867
## Shimmer.DDA 0.69716971 0.74491923 0.76359216 0.74490062
## NHR 0.83497218 0.91952071 0.84460353 0.91954818
## HNR -0.65680959 -0.72154316 -0.73151047 -0.72149437
## RPDE 0.44183856 0.34213980 0.33327415 0.34207948
## DFA 0.17503599 0.06408302 0.19630142 0.06402640
## spread1 0.73577923 0.64832777 0.71648864 0.64832755
## spread2 0.38854284 0.32440718 0.40760548 0.32437685
## D2 0.31069445 0.42660538 0.41252368 0.42655576
## PPE 0.74816165 0.67099905 0.76964729 0.67100525
## MDVP.Shimmer MDVP.Shimmer.dB. Shimmer.APQ3 Shimmer.APQ5
## MDVP.Fo.Hz. -0.098373748 -0.07374246 -0.094717062 -0.070681785
## MDVP.Fhi.Hz. 0.002281231 0.04346516 -0.003743251 -0.009996776
## MDVP.Flo.Hz. -0.144543324 -0.11908865 -0.150746503 -0.101094616
## MDVP.Jitter... 0.769063235 0.80428928 0.746625201 0.725561003
## MDVP.Jitter.Abs. 0.703322372 0.71660128 0.697153027 0.648960696
## MDVP.RAP 0.759580521 0.79065155 0.744912357 0.709926823
## MDVP.PPQ 0.797826025 0.83923894 0.763579916 0.786780474
## Jitter.DDP 0.759554700 0.79062060 0.744893765 0.709907085
## MDVP.Shimmer 1.000000000 0.98725781 0.987625052 0.982835448
## MDVP.Shimmer.dB. 0.987257815 1.00000000 0.963198085 0.973750561
## Shimmer.APQ3 0.987625052 0.96319809 1.000000000 0.960069764
## Shimmer.APQ5 0.982835448 0.97375056 0.960069764 1.000000000
## MDVP.APQ 0.950082893 0.96097672 0.896644519 0.949146111
## Shimmer.DDA 0.987625653 0.96320170 0.999999964 0.960071593
## NHR 0.722194454 0.74447728 0.716206735 0.658079759
## HNR -0.835270676 -0.82780535 -0.827123311 -0.813752789
## RPDE 0.447423645 0.41068380 0.435242425 0.399902849
## DFA 0.159953618 0.16515658 0.151124464 0.213873102
## spread1 0.654734300 0.65254666 0.610967362 0.646808933
## spread2 0.452024826 0.45431437 0.402242954 0.457194945
## D2 0.507088281 0.51223317 0.467265430 0.502174433
## PPE 0.693770720 0.69505813 0.645376686 0.702455664
## MDVP.APQ Shimmer.DDA NHR HNR RPDE
## MDVP.Fo.Hz. -0.077773819 -0.094731567 -0.02198078 0.059144387 -0.3838939
## MDVP.Fhi.Hz. 0.004936985 -0.003732893 0.16376640 -0.024893140 -0.1124035
## MDVP.Flo.Hz. -0.107293423 -0.150736736 -0.10867045 0.210850898 -0.4001431
## MDVP.Jitter... 0.758255255 0.746635172 0.90695860 -0.728165074 0.3606735
## MDVP.Jitter.Abs. 0.648793442 0.697169706 0.83497218 -0.656809593 0.4418386
## MDVP.RAP 0.737454666 0.744919228 0.91952071 -0.721543160 0.3421398
## MDVP.PPQ 0.804139303 0.763592161 0.84460353 -0.731510466 0.3332741
## Jitter.DDP 0.737438675 0.744900620 0.91954818 -0.721494370 0.3420795
## MDVP.Shimmer 0.950082893 0.987625653 0.72219445 -0.835270676 0.4474236
## MDVP.Shimmer.dB. 0.960976723 0.963201704 0.74447728 -0.827805346 0.4106838
## Shimmer.APQ3 0.896644519 0.999999964 0.71620674 -0.827123311 0.4352424
## Shimmer.APQ5 0.949146111 0.960071593 0.65807976 -0.813752789 0.3999028
## MDVP.APQ 1.000000000 0.896646766 0.69401905 -0.800406561 0.4513791
## Shimmer.DDA 0.896646766 1.000000000 0.71621454 -0.827130202 0.4352373
## NHR 0.694019049 0.716214539 1.00000000 -0.714072430 0.3708905
## HNR -0.800406561 -0.827130202 -0.71407243 1.000000000 -0.5987363
## RPDE 0.451379050 0.435237302 0.37089049 -0.598736337 1.0000000
## DFA 0.157275784 0.151132394 -0.13188174 -0.008664939 -0.1109500
## spread1 0.673158141 0.610971213 0.54086514 -0.673209818 0.5911170
## spread2 0.502187886 0.402223092 0.31809896 -0.431563748 0.4799045
## D2 0.536869028 0.467261374 0.47094876 -0.601401006 0.2369314
## PPE 0.721693953 0.645388954 0.55259127 -0.692875853 0.5458857
## DFA spread1 spread2 D2 PPE
## MDVP.Fo.Hz. -0.446013292 -0.4137382 -0.249450046 0.1779795 -0.3723564
## MDVP.Fhi.Hz. -0.343096724 -0.0766578 -0.002953614 0.1763228 -0.0695430
## MDVP.Flo.Hz. -0.050406307 -0.3948574 -0.243829403 -0.1006291 -0.3400713
## MDVP.Jitter... 0.098572442 0.6935767 0.385123473 0.4334340 0.7215429
## MDVP.Jitter.Abs. 0.175035992 0.7357792 0.388542835 0.3106944 0.7481617
## MDVP.RAP 0.064083019 0.6483278 0.324407179 0.4266054 0.6709990
## MDVP.PPQ 0.196301424 0.7164886 0.407605477 0.4125237 0.7696473
## Jitter.DDP 0.064026401 0.6483276 0.324376852 0.4265558 0.6710053
## MDVP.Shimmer 0.159953618 0.6547343 0.452024826 0.5070883 0.6937707
## MDVP.Shimmer.dB. 0.165156580 0.6525467 0.454314373 0.5122332 0.6950581
## Shimmer.APQ3 0.151124464 0.6109674 0.402242954 0.4672654 0.6453767
## Shimmer.APQ5 0.213873102 0.6468089 0.457194945 0.5021744 0.7024557
## MDVP.APQ 0.157275784 0.6731581 0.502187886 0.5368690 0.7216940
## Shimmer.DDA 0.151132394 0.6109712 0.402223092 0.4672614 0.6453890
## NHR -0.131881743 0.5408651 0.318098962 0.4709488 0.5525913
## HNR -0.008664939 -0.6732098 -0.431563748 -0.6014010 -0.6928759
## RPDE -0.110949960 0.5911170 0.479904528 0.2369314 0.5458857
## DFA 1.000000000 0.1956684 0.166548111 -0.1653810 0.2704447
## spread1 0.195668403 1.0000000 0.652357812 0.4951227 0.9624353
## spread2 0.166548111 0.6523578 1.000000000 0.5235317 0.6447110
## D2 -0.165381035 0.4951227 0.523531735 1.0000000 0.4805845
## PPE 0.270444747 0.9624353 0.644710959 0.4805845 1.0000000
corrplot::corrplot(cor(data_numeric),tl.col = "black",type= "full",tl.srt=40,tl.cex = 0.5)
boxplot(data_clustering, main = "Boxplot Variabel Penelitian", las = 2)
df <- scale(data_clustering)
set.seed(123)
wss <- sapply(1:10, function(k){
kmeans(df, centers = k, nstart = 20)$tot.withinss
})
plot(1:10, wss, type="b", pch=19,
xlab="Jumlah Cluster (K)",
ylab="Total Within Sum of Squares",
main="Elbow Method")
avg_sil <- function(k){
km_res <- kmeans(df, centers = k, nstart = 25)
ss <- silhouette(km_res$cluster, dist(df))
mean(ss[,3])
}
k_values <- 2:10
avg_sil_values <- sapply(k_values, avg_sil)
plot(k_values, avg_sil_values, type="b", pch=19,
xlab="Jumlah Cluster",
ylab="Average Silhouette Width",
main="Silhouette Analysis")
km_res <- kmeans(df, centers = 2)
plot(df, col = km_res$cluster, main = "K-Means")
fviz_cluster(list(data = df, cluster = km_res$cluster), main="K-Means")
mean(silhouette(km_res$cluster, dist(df))[,3])
## [1] 0.3936954
stats <- cluster.stats(dist(df), km_res$cluster)
paste("Dunn Index:", stats$dunn)
## [1] "Dunn Index: 0.0827622816538801"
paste("Within-cluster SS:", stats$within.cluster.ss)
## [1] "Within-cluster SS: 2804.86392622146"
ari_score <- adjustedRandIndex(km_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_score))
## [1] "Adjusted Rand Index: -0.0910349268611108"
avg_sil_kmed <- function(k){
kmed_res <- pam(df, k = k)
ss <- silhouette(kmed_res$clustering, dist(df))
mean(ss[,3])
}
k_values <- 2:10
sil_values <- sapply(k_values, avg_sil_kmed)
plot(k_values, sil_values, type="b", pch=19,
xlab="Jumlah Cluster",
ylab="Average Silhouette",
main="Silhouette Analysis")
kmed_res <- pam(df, k = 2)
plot(df, col = kmed_res$clustering, main = "K-Median (PAM)")
fviz_cluster(list(data = df, cluster = kmed_res$cluster), main="K-Median")
mean(silhouette(kmed_res$cluster, dist(df))[,3])
## [1] 0.25392
stats_kmed <- cluster.stats(dist(df), kmed_res$cluster)
paste("Dunn Index:", stats_kmed$dunn)
## [1] "Dunn Index: 0.06765155782393"
paste("Within-cluster SS:", stats_kmed$within.cluster.ss)
## [1] "Within-cluster SS: 3026.86259287097"
ari_kmed <- adjustedRandIndex(kmed_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_kmed))
## [1] "Adjusted Rand Index: 0.144604822351554"
db_res <- dbscan(df, eps = 1.0, MinPts = 3)
plot(df, col = db_res$cluster + 1, main = "DBSCAN (0 = Noise)")
fviz_cluster(list(data = df, cluster = db_res$cluster), main="DBSCAN")
valid <- db_res$cluster != 0
cluster_db <- db_res$cluster[valid]
data_db <- df[valid, ]
label_db <- label[valid]
if(length(unique(cluster_db)) > 1){
sil_db <- silhouette(cluster_db, dist(data_db))
mean(sil_db[,3])
} else {
print("Cluster tidak cukup untuk dihitung")
}
## [1] 0.6621296
if(length(unique(cluster_db)) > 1){
stats_db <- cluster.stats(
dist(data_db),
as.integer(as.factor(cluster_db))
)
print(paste("Dunn Index:", stats_db$dunn))
print(paste("Within-cluster SS:", stats_db$within.cluster.ss))
} else {
print("Cluster tidak cukup untuk dihitung")
}
## [1] "Dunn Index: 1.09871117938903"
## [1] "Within-cluster SS: 6.93090845354017"
ari_db <- adjustedRandIndex(cluster_db, label_db)
print(paste("Adjusted Rand Index:", ari_db))
## [1] "Adjusted Rand Index: 0.280155642023346"
bw <- rep(3, ncol(df))
ms_res <- meanShift(df, bandwidth = bw)
plot(df, col = ms_res$assignment, main = "Mean Shift")
mean(silhouette(ms_res$assignment, dist(df))[,3])
## [1] 0.5918753
table(ms_res$assignment)
##
## 1 2 3 4 5
## 191 1 1 1 1
stats_ms <- cluster.stats(dist(df), ms_res$assignment)
paste("Dunn Index:", stats_ms$dunn)
## [1] "Dunn Index: 0.36052736079939"
paste("Within-cluster SS:", stats_ms$within.cluster.ss)
## [1] "Within-cluster SS: 3153.13229791174"
ari_ms <- adjustedRandIndex(ms_res$assignment, label)
print(paste("Adjusted Rand Index:", ari_ms))
## [1] "Adjusted Rand Index: -0.0262672281902019"
avg_sil_fcm <- function(k){
fcm_res <- cmeans(df, centers = k, m = 2)
ss <- silhouette(fcm_res$cluster, dist(df))
mean(ss[,3])
}
k_values_fcm <- 2:10
avg_sil_values_fcm <- sapply(k_values_fcm, avg_sil_fcm)
plot(k_values_fcm, avg_sil_values_fcm, type="b", pch=19,
xlab="Jumlah Cluster",
ylab="Average Silhouette Width",
main="Silhouette Analysis")
fcm_res <- cmeans(df, centers = 2, m = 2)
plot(df, col = fcm_res$cluster, main = "Fuzzy C-Means")
fviz_cluster(list(data = df, cluster = fcm_res$cluster), main="Fuzzy C-Means")
mean(silhouette(fcm_res$cluster, dist(df))[,3])
## [1] 0.3078738
stats_fcm <- cluster.stats(dist(df), fcm_res$cluster)
paste("Dunn Index:", stats_fcm$dunn)
## [1] "Dunn Index: 0.0461779989979158"
paste("Within-cluster SS:", stats_fcm$within.cluster.ss)
## [1] "Within-cluster SS: 2869.53525011602"
ari_score <- adjustedRandIndex(fcm_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_score))
## [1] "Adjusted Rand Index: 0.0266435046556967"