DESKRIPSI TUGAS

Pada tugas ini dilakukan analisis clustering menggunakan metode unsupervised learning untuk mengelompokkan data berdasarkan kemiripan karakteristik. Dataset yang digunakan adalah dataset Parkinson’s Disease yang memiliki 23 fitur numerik. Dua kolom yaitu name dan status tidak digunakan dalam proses clustering, karena name bukan variabel numerik dan status merupakan label. Tahapan yang dilakukan meliputi eksplorasi data, preprocessing, serta penerapan lima metode clustering yaitu K-Means, K-Medians, DBSCAN, Mean Shift, dan Fuzzy C-Means. Hasil clustering kemudian dievaluasi menggunakan metrik internal seperti Silhouette, Dunn Index, dan Ari score.

install.packages("flexclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dbscan")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("meanShiftR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("e1071")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("cluster")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("fpc")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("mclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("psych")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("factoextra")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(flexclust)
library(dbscan)
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(meanShiftR)
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:flexclust':
## 
##     bclust
library(cluster)
library(fpc)
## 
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
## 
##     dbscan
library(mclust)
## Package 'mclust' version 6.1.2
## Type 'citation("mclust")' for citing this R package in publications.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:mclust':
## 
##     count
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:mclust':
## 
##     sim
library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## The following object is masked from 'package:e1071':
## 
##     element
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/

LOAD DATA DAN EKSPLORASI DATA

data <- read.csv("parkinsons.csv")
str(data)
## 'data.frame':    195 obs. of  24 variables:
##  $ name            : chr  "phon_R01_S01_1" "phon_R01_S01_2" "phon_R01_S01_3" "phon_R01_S01_4" ...
##  $ MDVP.Fo.Hz.     : num  120 122 117 117 116 ...
##  $ MDVP.Fhi.Hz.    : num  157 149 131 138 142 ...
##  $ MDVP.Flo.Hz.    : num  75 114 112 111 111 ...
##  $ MDVP.Jitter...  : num  0.00784 0.00968 0.0105 0.00997 0.01284 ...
##  $ MDVP.Jitter.Abs.: num  0.00007 0.00008 0.00009 0.00009 0.00011 0.00008 0.00003 0.00003 0.00006 0.00006 ...
##  $ MDVP.RAP        : num  0.0037 0.00465 0.00544 0.00502 0.00655 0.00463 0.00155 0.00144 0.00293 0.00268 ...
##  $ MDVP.PPQ        : num  0.00554 0.00696 0.00781 0.00698 0.00908 0.0075 0.00202 0.00182 0.00332 0.00332 ...
##  $ Jitter.DDP      : num  0.0111 0.0139 0.0163 0.015 0.0197 ...
##  $ MDVP.Shimmer    : num  0.0437 0.0613 0.0523 0.0549 0.0643 ...
##  $ MDVP.Shimmer.dB.: num  0.426 0.626 0.482 0.517 0.584 0.456 0.14 0.134 0.191 0.255 ...
##  $ Shimmer.APQ3    : num  0.0218 0.0313 0.0276 0.0292 0.0349 ...
##  $ Shimmer.APQ5    : num  0.0313 0.0452 0.0386 0.0401 0.0483 ...
##  $ MDVP.APQ        : num  0.0297 0.0437 0.0359 0.0377 0.0447 ...
##  $ Shimmer.DDA     : num  0.0654 0.094 0.0827 0.0877 0.1047 ...
##  $ NHR             : num  0.0221 0.0193 0.0131 0.0135 0.0177 ...
##  $ HNR             : num  21 19.1 20.7 20.6 19.6 ...
##  $ status          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ RPDE            : num  0.415 0.458 0.43 0.435 0.417 ...
##  $ DFA             : num  0.815 0.82 0.825 0.819 0.823 ...
##  $ spread1         : num  -4.81 -4.08 -4.44 -4.12 -3.75 ...
##  $ spread2         : num  0.266 0.336 0.311 0.334 0.235 ...
##  $ D2              : num  2.3 2.49 2.34 2.41 2.33 ...
##  $ PPE             : num  0.285 0.369 0.333 0.369 0.41 ...
data_numeric <- data[sapply(data, is.numeric)]
label <- data_numeric$status
data_clustering <- data_numeric[, colnames(data_numeric) != "status"]
colSums(is.na(data_numeric))
##      MDVP.Fo.Hz.     MDVP.Fhi.Hz.     MDVP.Flo.Hz.   MDVP.Jitter... 
##                0                0                0                0 
## MDVP.Jitter.Abs.         MDVP.RAP         MDVP.PPQ       Jitter.DDP 
##                0                0                0                0 
##     MDVP.Shimmer MDVP.Shimmer.dB.     Shimmer.APQ3     Shimmer.APQ5 
##                0                0                0                0 
##         MDVP.APQ      Shimmer.DDA              NHR              HNR 
##                0                0                0                0 
##           status             RPDE              DFA          spread1 
##                0                0                0                0 
##          spread2               D2              PPE 
##                0                0                0
sum(duplicated(data_numeric))
## [1] 0
describe(data_clustering)
##                  vars   n   mean    sd median trimmed   mad    min    max
## MDVP.Fo.Hz.         1 195 154.23 41.39 148.79  150.80 47.13  88.33 260.10
## MDVP.Fhi.Hz.        2 195 197.10 91.49 175.83  180.97 62.99 102.14 592.03
## MDVP.Flo.Hz.        3 195 116.32 43.52 104.32  109.94 35.11  65.48 239.17
## MDVP.Jitter...      4 195   0.01  0.00   0.00    0.01  0.00   0.00   0.03
## MDVP.Jitter.Abs.    5 195   0.00  0.00   0.00    0.00  0.00   0.00   0.00
## MDVP.RAP            6 195   0.00  0.00   0.00    0.00  0.00   0.00   0.02
## MDVP.PPQ            7 195   0.00  0.00   0.00    0.00  0.00   0.00   0.02
## Jitter.DDP          8 195   0.01  0.01   0.01    0.01  0.00   0.00   0.06
## MDVP.Shimmer        9 195   0.03  0.02   0.02    0.03  0.01   0.01   0.12
## MDVP.Shimmer.dB.   10 195   0.28  0.19   0.22    0.25  0.13   0.09   1.30
## Shimmer.APQ3       11 195   0.02  0.01   0.01    0.01  0.01   0.00   0.06
## Shimmer.APQ5       12 195   0.02  0.01   0.01    0.02  0.01   0.01   0.08
## MDVP.APQ           13 195   0.02  0.02   0.02    0.02  0.01   0.01   0.14
## Shimmer.DDA        14 195   0.05  0.03   0.04    0.04  0.02   0.01   0.17
## NHR                15 195   0.02  0.04   0.01    0.02  0.01   0.00   0.31
## HNR                16 195  21.89  4.43  22.09   22.17  4.37   8.44  33.05
## RPDE               17 195   0.50  0.10   0.50    0.50  0.12   0.26   0.69
## DFA                18 195   0.72  0.06   0.72    0.72  0.06   0.57   0.83
## spread1            19 195  -5.68  1.09  -5.72   -5.74  1.07  -7.96  -2.43
## spread2            20 195   0.23  0.08   0.22    0.23  0.07   0.01   0.45
## D2                 21 195   2.38  0.38   2.36    2.36  0.40   1.42   3.67
## PPE                22 195   0.21  0.09   0.19    0.20  0.09   0.04   0.53
##                   range  skew kurtosis   se
## MDVP.Fo.Hz.      171.77  0.58    -0.67 2.96
## MDVP.Fhi.Hz.     489.88  2.50     7.30 6.55
## MDVP.Flo.Hz.     173.69  1.20     0.57 3.12
## MDVP.Jitter...     0.03  3.04    11.54 0.00
## MDVP.Jitter.Abs.   0.00  2.61    10.42 0.00
## MDVP.RAP           0.02  3.31    13.65 0.00
## MDVP.PPQ           0.02  3.03    11.48 0.00
## Jitter.DDP         0.06  3.31    13.66 0.00
## MDVP.Shimmer       0.11  1.64     3.06 0.00
## MDVP.Shimmer.dB.   1.22  1.97     4.89 0.01
## Shimmer.APQ3       0.05  1.56     2.56 0.00
## Shimmer.APQ5       0.07  1.77     3.68 0.00
## MDVP.APQ           0.13  2.58    10.71 0.00
## Shimmer.DDA        0.16  1.56     2.56 0.00
## NHR                0.31  4.16    21.15 0.00
## HNR               24.61 -0.51     0.53 0.32
## RPDE               0.43 -0.14    -0.95 0.01
## DFA                0.25 -0.03    -0.72 0.00
## spread1            5.53  0.43    -0.11 0.08
## spread2            0.44  0.14    -0.14 0.01
## D2                 2.25  0.42     0.15 0.03
## PPE                0.48  0.79     0.45 0.01
cor(data_clustering)
##                  MDVP.Fo.Hz. MDVP.Fhi.Hz. MDVP.Flo.Hz. MDVP.Jitter...
## MDVP.Fo.Hz.       1.00000000  0.400984676   0.59654552    -0.11800263
## MDVP.Fhi.Hz.      0.40098468  1.000000000   0.08495125     0.10208635
## MDVP.Flo.Hz.      0.59654552  0.084951251   1.00000000    -0.13991889
## MDVP.Jitter...   -0.11800263  0.102086349  -0.13991889     1.00000000
## MDVP.Jitter.Abs. -0.38202665 -0.029198333  -0.27781524     0.93571398
## MDVP.RAP         -0.07619381  0.097176619  -0.10051932     0.99027559
## MDVP.PPQ         -0.11216543  0.091126194  -0.09582837     0.97425639
## Jitter.DDP       -0.07621269  0.097149867  -0.10048768     0.99027622
## MDVP.Shimmer     -0.09837375  0.002281231  -0.14454332     0.76906323
## MDVP.Shimmer.dB. -0.07374246  0.043465164  -0.11908865     0.80428928
## Shimmer.APQ3     -0.09471706 -0.003743251  -0.15074650     0.74662520
## Shimmer.APQ5     -0.07068178 -0.009996776  -0.10109462     0.72556100
## MDVP.APQ         -0.07777382  0.004936985  -0.10729342     0.75825526
## Shimmer.DDA      -0.09473157 -0.003732893  -0.15073674     0.74663517
## NHR              -0.02198078  0.163766404  -0.10867045     0.90695860
## HNR               0.05914439 -0.024893140   0.21085090    -0.72816507
## RPDE             -0.38389390 -0.112403513  -0.40014307     0.36067348
## DFA              -0.44601329 -0.343096724  -0.05040631     0.09857244
## spread1          -0.41373817 -0.076657801  -0.39485736     0.69357671
## spread2          -0.24945005 -0.002953614  -0.24382940     0.38512347
## D2                0.17797951  0.176322818  -0.10062909     0.43343402
## PPE              -0.37235638 -0.069543002  -0.34007134     0.72154293
##                  MDVP.Jitter.Abs.    MDVP.RAP    MDVP.PPQ  Jitter.DDP
## MDVP.Fo.Hz.           -0.38202665 -0.07619381 -0.11216543 -0.07621269
## MDVP.Fhi.Hz.          -0.02919833  0.09717662  0.09112619  0.09714987
## MDVP.Flo.Hz.          -0.27781524 -0.10051932 -0.09582837 -0.10048768
## MDVP.Jitter...         0.93571398  0.99027559  0.97425639  0.99027622
## MDVP.Jitter.Abs.       1.00000000  0.92291097  0.89777790  0.92291303
## MDVP.RAP               0.92291097  1.00000000  0.95731689  0.99999960
## MDVP.PPQ               0.89777790  0.95731689  1.00000000  0.95731916
## Jitter.DDP             0.92291303  0.99999960  0.95731916  1.00000000
## MDVP.Shimmer           0.70332237  0.75958052  0.79782603  0.75955470
## MDVP.Shimmer.dB.       0.71660128  0.79065155  0.83923894  0.79062060
## Shimmer.APQ3           0.69715303  0.74491236  0.76357992  0.74489376
## Shimmer.APQ5           0.64896070  0.70992682  0.78678047  0.70990709
## MDVP.APQ               0.64879344  0.73745467  0.80413930  0.73743867
## Shimmer.DDA            0.69716971  0.74491923  0.76359216  0.74490062
## NHR                    0.83497218  0.91952071  0.84460353  0.91954818
## HNR                   -0.65680959 -0.72154316 -0.73151047 -0.72149437
## RPDE                   0.44183856  0.34213980  0.33327415  0.34207948
## DFA                    0.17503599  0.06408302  0.19630142  0.06402640
## spread1                0.73577923  0.64832777  0.71648864  0.64832755
## spread2                0.38854284  0.32440718  0.40760548  0.32437685
## D2                     0.31069445  0.42660538  0.41252368  0.42655576
## PPE                    0.74816165  0.67099905  0.76964729  0.67100525
##                  MDVP.Shimmer MDVP.Shimmer.dB. Shimmer.APQ3 Shimmer.APQ5
## MDVP.Fo.Hz.      -0.098373748      -0.07374246 -0.094717062 -0.070681785
## MDVP.Fhi.Hz.      0.002281231       0.04346516 -0.003743251 -0.009996776
## MDVP.Flo.Hz.     -0.144543324      -0.11908865 -0.150746503 -0.101094616
## MDVP.Jitter...    0.769063235       0.80428928  0.746625201  0.725561003
## MDVP.Jitter.Abs.  0.703322372       0.71660128  0.697153027  0.648960696
## MDVP.RAP          0.759580521       0.79065155  0.744912357  0.709926823
## MDVP.PPQ          0.797826025       0.83923894  0.763579916  0.786780474
## Jitter.DDP        0.759554700       0.79062060  0.744893765  0.709907085
## MDVP.Shimmer      1.000000000       0.98725781  0.987625052  0.982835448
## MDVP.Shimmer.dB.  0.987257815       1.00000000  0.963198085  0.973750561
## Shimmer.APQ3      0.987625052       0.96319809  1.000000000  0.960069764
## Shimmer.APQ5      0.982835448       0.97375056  0.960069764  1.000000000
## MDVP.APQ          0.950082893       0.96097672  0.896644519  0.949146111
## Shimmer.DDA       0.987625653       0.96320170  0.999999964  0.960071593
## NHR               0.722194454       0.74447728  0.716206735  0.658079759
## HNR              -0.835270676      -0.82780535 -0.827123311 -0.813752789
## RPDE              0.447423645       0.41068380  0.435242425  0.399902849
## DFA               0.159953618       0.16515658  0.151124464  0.213873102
## spread1           0.654734300       0.65254666  0.610967362  0.646808933
## spread2           0.452024826       0.45431437  0.402242954  0.457194945
## D2                0.507088281       0.51223317  0.467265430  0.502174433
## PPE               0.693770720       0.69505813  0.645376686  0.702455664
##                      MDVP.APQ  Shimmer.DDA         NHR          HNR       RPDE
## MDVP.Fo.Hz.      -0.077773819 -0.094731567 -0.02198078  0.059144387 -0.3838939
## MDVP.Fhi.Hz.      0.004936985 -0.003732893  0.16376640 -0.024893140 -0.1124035
## MDVP.Flo.Hz.     -0.107293423 -0.150736736 -0.10867045  0.210850898 -0.4001431
## MDVP.Jitter...    0.758255255  0.746635172  0.90695860 -0.728165074  0.3606735
## MDVP.Jitter.Abs.  0.648793442  0.697169706  0.83497218 -0.656809593  0.4418386
## MDVP.RAP          0.737454666  0.744919228  0.91952071 -0.721543160  0.3421398
## MDVP.PPQ          0.804139303  0.763592161  0.84460353 -0.731510466  0.3332741
## Jitter.DDP        0.737438675  0.744900620  0.91954818 -0.721494370  0.3420795
## MDVP.Shimmer      0.950082893  0.987625653  0.72219445 -0.835270676  0.4474236
## MDVP.Shimmer.dB.  0.960976723  0.963201704  0.74447728 -0.827805346  0.4106838
## Shimmer.APQ3      0.896644519  0.999999964  0.71620674 -0.827123311  0.4352424
## Shimmer.APQ5      0.949146111  0.960071593  0.65807976 -0.813752789  0.3999028
## MDVP.APQ          1.000000000  0.896646766  0.69401905 -0.800406561  0.4513791
## Shimmer.DDA       0.896646766  1.000000000  0.71621454 -0.827130202  0.4352373
## NHR               0.694019049  0.716214539  1.00000000 -0.714072430  0.3708905
## HNR              -0.800406561 -0.827130202 -0.71407243  1.000000000 -0.5987363
## RPDE              0.451379050  0.435237302  0.37089049 -0.598736337  1.0000000
## DFA               0.157275784  0.151132394 -0.13188174 -0.008664939 -0.1109500
## spread1           0.673158141  0.610971213  0.54086514 -0.673209818  0.5911170
## spread2           0.502187886  0.402223092  0.31809896 -0.431563748  0.4799045
## D2                0.536869028  0.467261374  0.47094876 -0.601401006  0.2369314
## PPE               0.721693953  0.645388954  0.55259127 -0.692875853  0.5458857
##                           DFA    spread1      spread2         D2        PPE
## MDVP.Fo.Hz.      -0.446013292 -0.4137382 -0.249450046  0.1779795 -0.3723564
## MDVP.Fhi.Hz.     -0.343096724 -0.0766578 -0.002953614  0.1763228 -0.0695430
## MDVP.Flo.Hz.     -0.050406307 -0.3948574 -0.243829403 -0.1006291 -0.3400713
## MDVP.Jitter...    0.098572442  0.6935767  0.385123473  0.4334340  0.7215429
## MDVP.Jitter.Abs.  0.175035992  0.7357792  0.388542835  0.3106944  0.7481617
## MDVP.RAP          0.064083019  0.6483278  0.324407179  0.4266054  0.6709990
## MDVP.PPQ          0.196301424  0.7164886  0.407605477  0.4125237  0.7696473
## Jitter.DDP        0.064026401  0.6483276  0.324376852  0.4265558  0.6710053
## MDVP.Shimmer      0.159953618  0.6547343  0.452024826  0.5070883  0.6937707
## MDVP.Shimmer.dB.  0.165156580  0.6525467  0.454314373  0.5122332  0.6950581
## Shimmer.APQ3      0.151124464  0.6109674  0.402242954  0.4672654  0.6453767
## Shimmer.APQ5      0.213873102  0.6468089  0.457194945  0.5021744  0.7024557
## MDVP.APQ          0.157275784  0.6731581  0.502187886  0.5368690  0.7216940
## Shimmer.DDA       0.151132394  0.6109712  0.402223092  0.4672614  0.6453890
## NHR              -0.131881743  0.5408651  0.318098962  0.4709488  0.5525913
## HNR              -0.008664939 -0.6732098 -0.431563748 -0.6014010 -0.6928759
## RPDE             -0.110949960  0.5911170  0.479904528  0.2369314  0.5458857
## DFA               1.000000000  0.1956684  0.166548111 -0.1653810  0.2704447
## spread1           0.195668403  1.0000000  0.652357812  0.4951227  0.9624353
## spread2           0.166548111  0.6523578  1.000000000  0.5235317  0.6447110
## D2               -0.165381035  0.4951227  0.523531735  1.0000000  0.4805845
## PPE               0.270444747  0.9624353  0.644710959  0.4805845  1.0000000
corrplot::corrplot(cor(data_numeric),tl.col = "black",type= "full",tl.srt=40,tl.cex = 0.5)

boxplot(data_clustering, main = "Boxplot Variabel Penelitian", las = 2)

Preprocessing

df <- scale(data_clustering)
set.seed(123)

Lima Metode Clustering

1. K-Means

Clustering

Elbow
wss <- sapply(1:10, function(k){
  kmeans(df, centers = k, nstart = 20)$tot.withinss
})

plot(1:10, wss, type="b", pch=19,
     xlab="Jumlah Cluster (K)",
     ylab="Total Within Sum of Squares",
     main="Elbow Method")

Silhouette Analysis
avg_sil <- function(k){
  km_res <- kmeans(df, centers = k, nstart = 25)
  ss <- silhouette(km_res$cluster, dist(df))
  mean(ss[,3])
}

k_values <- 2:10
avg_sil_values <- sapply(k_values, avg_sil)

plot(k_values, avg_sil_values, type="b", pch=19,
     xlab="Jumlah Cluster",
     ylab="Average Silhouette Width",
     main="Silhouette Analysis")

Visualisasi

km_res <- kmeans(df, centers = 2)
plot(df, col = km_res$cluster, main = "K-Means")

fviz_cluster(list(data = df, cluster = km_res$cluster), main="K-Means")

Evaluasi Model

Silhouette
mean(silhouette(km_res$cluster, dist(df))[,3])
## [1] 0.3936954
Dunn-Index
stats <- cluster.stats(dist(df), km_res$cluster)
paste("Dunn Index:", stats$dunn)
## [1] "Dunn Index: 0.0827622816538801"
paste("Within-cluster SS:", stats$within.cluster.ss)
## [1] "Within-cluster SS: 2804.86392622146"
Ari Score
ari_score <- adjustedRandIndex(km_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_score))
## [1] "Adjusted Rand Index: -0.0910349268611108"

2. K-Median

Clustering

Silhouette
avg_sil_kmed <- function(k){
  kmed_res <- pam(df, k = k)
  ss <- silhouette(kmed_res$clustering, dist(df))
  mean(ss[,3])
}

k_values <- 2:10
sil_values <- sapply(k_values, avg_sil_kmed)

plot(k_values, sil_values, type="b", pch=19,
     xlab="Jumlah Cluster",
     ylab="Average Silhouette",
     main="Silhouette Analysis")

Visualisasi

kmed_res <- pam(df, k = 2)
plot(df, col = kmed_res$clustering, main = "K-Median (PAM)")

fviz_cluster(list(data = df, cluster = kmed_res$cluster), main="K-Median")

Evaluasi Model

Silhouette
mean(silhouette(kmed_res$cluster, dist(df))[,3])
## [1] 0.25392
Dunn-Index
stats_kmed <- cluster.stats(dist(df), kmed_res$cluster)
paste("Dunn Index:", stats_kmed$dunn)
## [1] "Dunn Index: 0.06765155782393"
paste("Within-cluster SS:", stats_kmed$within.cluster.ss)
## [1] "Within-cluster SS: 3026.86259287097"
Ari Score
ari_kmed <- adjustedRandIndex(kmed_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_kmed))
## [1] "Adjusted Rand Index: 0.144604822351554"

3. DBSCAN

Clustering

db_res <- dbscan(df, eps = 1.0, MinPts = 3)

Visualisasi

plot(df, col = db_res$cluster + 1, main = "DBSCAN (0 = Noise)")

fviz_cluster(list(data = df, cluster = db_res$cluster), main="DBSCAN")

Evaluasi

valid <- db_res$cluster != 0
cluster_db <- db_res$cluster[valid]
data_db <- df[valid, ]
label_db <- label[valid]
Silhouette
if(length(unique(cluster_db)) > 1){
  sil_db <- silhouette(cluster_db, dist(data_db))
  mean(sil_db[,3])
} else {
  print("Cluster tidak cukup untuk dihitung")
}
## [1] 0.6621296
Dunn-Index
if(length(unique(cluster_db)) > 1){
  stats_db <- cluster.stats(
    dist(data_db),
    as.integer(as.factor(cluster_db))
  )
  
  print(paste("Dunn Index:", stats_db$dunn))
  print(paste("Within-cluster SS:", stats_db$within.cluster.ss))
} else {
  print("Cluster tidak cukup untuk dihitung")
}
## [1] "Dunn Index: 1.09871117938903"
## [1] "Within-cluster SS: 6.93090845354017"
Ari Score
ari_db <- adjustedRandIndex(cluster_db, label_db)
print(paste("Adjusted Rand Index:", ari_db))
## [1] "Adjusted Rand Index: 0.280155642023346"

4. Mean Shift

Clustering

bw <- rep(3, ncol(df))
ms_res <- meanShift(df, bandwidth = bw)

Visualisasi

plot(df, col = ms_res$assignment, main = "Mean Shift")

Evaluasi Model

Silhouette
mean(silhouette(ms_res$assignment, dist(df))[,3])
## [1] 0.5918753
table(ms_res$assignment)
## 
##   1   2   3   4   5 
## 191   1   1   1   1
Dunn-Index
stats_ms <- cluster.stats(dist(df), ms_res$assignment)
paste("Dunn Index:", stats_ms$dunn)
## [1] "Dunn Index: 0.36052736079939"
paste("Within-cluster SS:", stats_ms$within.cluster.ss)
## [1] "Within-cluster SS: 3153.13229791174"
Ari Score
ari_ms <- adjustedRandIndex(ms_res$assignment, label)
print(paste("Adjusted Rand Index:", ari_ms))
## [1] "Adjusted Rand Index: -0.0262672281902019"

5. Fuzzy C-Means

Clustering

Silhouette Analysis
avg_sil_fcm <- function(k){
  fcm_res <- cmeans(df, centers = k, m = 2)
  ss <- silhouette(fcm_res$cluster, dist(df))
  mean(ss[,3])
}

k_values_fcm <- 2:10
avg_sil_values_fcm <- sapply(k_values_fcm, avg_sil_fcm)

plot(k_values_fcm, avg_sil_values_fcm, type="b", pch=19,
     xlab="Jumlah Cluster",
     ylab="Average Silhouette Width",
     main="Silhouette Analysis")

Visualisasi

fcm_res <- cmeans(df, centers = 2, m = 2)
plot(df, col = fcm_res$cluster, main = "Fuzzy C-Means")

fviz_cluster(list(data = df, cluster = fcm_res$cluster), main="Fuzzy C-Means")

Evaluasi Model

Silhouette
mean(silhouette(fcm_res$cluster, dist(df))[,3])
## [1] 0.3078738
Dunn-Index
stats_fcm <- cluster.stats(dist(df), fcm_res$cluster)
paste("Dunn Index:", stats_fcm$dunn)
## [1] "Dunn Index: 0.0461779989979158"
paste("Within-cluster SS:", stats_fcm$within.cluster.ss)
## [1] "Within-cluster SS: 2869.53525011602"
Ari Score
ari_score <- adjustedRandIndex(fcm_res$cluster, label)
print(paste("Adjusted Rand Index:", ari_score))
## [1] "Adjusted Rand Index: 0.0266435046556967"