install.packages("flexclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dbscan")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("meanShiftR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("e1071")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("cluster")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("fpc")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("mclust")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("psych")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("factoextra")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(flexclust)
library(dbscan)
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(meanShiftR)
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:flexclust':
## 
##     bclust
library(cluster)
library(fpc)
## 
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
## 
##     dbscan
library(mclust)
## Package 'mclust' version 6.1.2
## Type 'citation("mclust")' for citing this R package in publications.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:mclust':
## 
##     count
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:mclust':
## 
##     sim
library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## The following object is masked from 'package:e1071':
## 
##     element
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
data <- read.csv("parkinsons.csv")
str(data)
## 'data.frame':    195 obs. of  24 variables:
##  $ name            : chr  "phon_R01_S01_1" "phon_R01_S01_2" "phon_R01_S01_3" "phon_R01_S01_4" ...
##  $ MDVP.Fo.Hz.     : num  120 122 117 117 116 ...
##  $ MDVP.Fhi.Hz.    : num  157 149 131 138 142 ...
##  $ MDVP.Flo.Hz.    : num  75 114 112 111 111 ...
##  $ MDVP.Jitter...  : num  0.00784 0.00968 0.0105 0.00997 0.01284 ...
##  $ MDVP.Jitter.Abs.: num  0.00007 0.00008 0.00009 0.00009 0.00011 0.00008 0.00003 0.00003 0.00006 0.00006 ...
##  $ MDVP.RAP        : num  0.0037 0.00465 0.00544 0.00502 0.00655 0.00463 0.00155 0.00144 0.00293 0.00268 ...
##  $ MDVP.PPQ        : num  0.00554 0.00696 0.00781 0.00698 0.00908 0.0075 0.00202 0.00182 0.00332 0.00332 ...
##  $ Jitter.DDP      : num  0.0111 0.0139 0.0163 0.015 0.0197 ...
##  $ MDVP.Shimmer    : num  0.0437 0.0613 0.0523 0.0549 0.0643 ...
##  $ MDVP.Shimmer.dB.: num  0.426 0.626 0.482 0.517 0.584 0.456 0.14 0.134 0.191 0.255 ...
##  $ Shimmer.APQ3    : num  0.0218 0.0313 0.0276 0.0292 0.0349 ...
##  $ Shimmer.APQ5    : num  0.0313 0.0452 0.0386 0.0401 0.0483 ...
##  $ MDVP.APQ        : num  0.0297 0.0437 0.0359 0.0377 0.0447 ...
##  $ Shimmer.DDA     : num  0.0654 0.094 0.0827 0.0877 0.1047 ...
##  $ NHR             : num  0.0221 0.0193 0.0131 0.0135 0.0177 ...
##  $ HNR             : num  21 19.1 20.7 20.6 19.6 ...
##  $ status          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ RPDE            : num  0.415 0.458 0.43 0.435 0.417 ...
##  $ DFA             : num  0.815 0.82 0.825 0.819 0.823 ...
##  $ spread1         : num  -4.81 -4.08 -4.44 -4.12 -3.75 ...
##  $ spread2         : num  0.266 0.336 0.311 0.334 0.235 ...
##  $ D2              : num  2.3 2.49 2.34 2.41 2.33 ...
##  $ PPE             : num  0.285 0.369 0.333 0.369 0.41 ...
data_numeric <- data[sapply(data, is.numeric)]
label <- data_numeric$status
data_clustering <- data_numeric[, colnames(data_numeric) != "status"]
colSums(is.na(data_numeric))
##      MDVP.Fo.Hz.     MDVP.Fhi.Hz.     MDVP.Flo.Hz.   MDVP.Jitter... 
##                0                0                0                0 
## MDVP.Jitter.Abs.         MDVP.RAP         MDVP.PPQ       Jitter.DDP 
##                0                0                0                0 
##     MDVP.Shimmer MDVP.Shimmer.dB.     Shimmer.APQ3     Shimmer.APQ5 
##                0                0                0                0 
##         MDVP.APQ      Shimmer.DDA              NHR              HNR 
##                0                0                0                0 
##           status             RPDE              DFA          spread1 
##                0                0                0                0 
##          spread2               D2              PPE 
##                0                0                0
sum(duplicated(data_numeric))
## [1] 0
describe(data_clustering)
##                  vars   n   mean    sd median trimmed   mad    min    max
## MDVP.Fo.Hz.         1 195 154.23 41.39 148.79  150.80 47.13  88.33 260.10
## MDVP.Fhi.Hz.        2 195 197.10 91.49 175.83  180.97 62.99 102.14 592.03
## MDVP.Flo.Hz.        3 195 116.32 43.52 104.32  109.94 35.11  65.48 239.17
## MDVP.Jitter...      4 195   0.01  0.00   0.00    0.01  0.00   0.00   0.03
## MDVP.Jitter.Abs.    5 195   0.00  0.00   0.00    0.00  0.00   0.00   0.00
## MDVP.RAP            6 195   0.00  0.00   0.00    0.00  0.00   0.00   0.02
## MDVP.PPQ            7 195   0.00  0.00   0.00    0.00  0.00   0.00   0.02
## Jitter.DDP          8 195   0.01  0.01   0.01    0.01  0.00   0.00   0.06
## MDVP.Shimmer        9 195   0.03  0.02   0.02    0.03  0.01   0.01   0.12
## MDVP.Shimmer.dB.   10 195   0.28  0.19   0.22    0.25  0.13   0.09   1.30
## Shimmer.APQ3       11 195   0.02  0.01   0.01    0.01  0.01   0.00   0.06
## Shimmer.APQ5       12 195   0.02  0.01   0.01    0.02  0.01   0.01   0.08
## MDVP.APQ           13 195   0.02  0.02   0.02    0.02  0.01   0.01   0.14
## Shimmer.DDA        14 195   0.05  0.03   0.04    0.04  0.02   0.01   0.17
## NHR                15 195   0.02  0.04   0.01    0.02  0.01   0.00   0.31
## HNR                16 195  21.89  4.43  22.09   22.17  4.37   8.44  33.05
## RPDE               17 195   0.50  0.10   0.50    0.50  0.12   0.26   0.69
## DFA                18 195   0.72  0.06   0.72    0.72  0.06   0.57   0.83
## spread1            19 195  -5.68  1.09  -5.72   -5.74  1.07  -7.96  -2.43
## spread2            20 195   0.23  0.08   0.22    0.23  0.07   0.01   0.45
## D2                 21 195   2.38  0.38   2.36    2.36  0.40   1.42   3.67
## PPE                22 195   0.21  0.09   0.19    0.20  0.09   0.04   0.53
##                   range  skew kurtosis   se
## MDVP.Fo.Hz.      171.77  0.58    -0.67 2.96
## MDVP.Fhi.Hz.     489.88  2.50     7.30 6.55
## MDVP.Flo.Hz.     173.69  1.20     0.57 3.12
## MDVP.Jitter...     0.03  3.04    11.54 0.00
## MDVP.Jitter.Abs.   0.00  2.61    10.42 0.00
## MDVP.RAP           0.02  3.31    13.65 0.00
## MDVP.PPQ           0.02  3.03    11.48 0.00
## Jitter.DDP         0.06  3.31    13.66 0.00
## MDVP.Shimmer       0.11  1.64     3.06 0.00
## MDVP.Shimmer.dB.   1.22  1.97     4.89 0.01
## Shimmer.APQ3       0.05  1.56     2.56 0.00
## Shimmer.APQ5       0.07  1.77     3.68 0.00
## MDVP.APQ           0.13  2.58    10.71 0.00
## Shimmer.DDA        0.16  1.56     2.56 0.00
## NHR                0.31  4.16    21.15 0.00
## HNR               24.61 -0.51     0.53 0.32
## RPDE               0.43 -0.14    -0.95 0.01
## DFA                0.25 -0.03    -0.72 0.00
## spread1            5.53  0.43    -0.11 0.08
## spread2            0.44  0.14    -0.14 0.01
## D2                 2.25  0.42     0.15 0.03
## PPE                0.48  0.79     0.45 0.01
cor(data_clustering)
##                  MDVP.Fo.Hz. MDVP.Fhi.Hz. MDVP.Flo.Hz. MDVP.Jitter...
## MDVP.Fo.Hz.       1.00000000  0.400984676   0.59654552    -0.11800263
## MDVP.Fhi.Hz.      0.40098468  1.000000000   0.08495125     0.10208635
## MDVP.Flo.Hz.      0.59654552  0.084951251   1.00000000    -0.13991889
## MDVP.Jitter...   -0.11800263  0.102086349  -0.13991889     1.00000000
## MDVP.Jitter.Abs. -0.38202665 -0.029198333  -0.27781524     0.93571398
## MDVP.RAP         -0.07619381  0.097176619  -0.10051932     0.99027559
## MDVP.PPQ         -0.11216543  0.091126194  -0.09582837     0.97425639
## Jitter.DDP       -0.07621269  0.097149867  -0.10048768     0.99027622
## MDVP.Shimmer     -0.09837375  0.002281231  -0.14454332     0.76906323
## MDVP.Shimmer.dB. -0.07374246  0.043465164  -0.11908865     0.80428928
## Shimmer.APQ3     -0.09471706 -0.003743251  -0.15074650     0.74662520
## Shimmer.APQ5     -0.07068178 -0.009996776  -0.10109462     0.72556100
## MDVP.APQ         -0.07777382  0.004936985  -0.10729342     0.75825526
## Shimmer.DDA      -0.09473157 -0.003732893  -0.15073674     0.74663517
## NHR              -0.02198078  0.163766404  -0.10867045     0.90695860
## HNR               0.05914439 -0.024893140   0.21085090    -0.72816507
## RPDE             -0.38389390 -0.112403513  -0.40014307     0.36067348
## DFA              -0.44601329 -0.343096724  -0.05040631     0.09857244
## spread1          -0.41373817 -0.076657801  -0.39485736     0.69357671
## spread2          -0.24945005 -0.002953614  -0.24382940     0.38512347
## D2                0.17797951  0.176322818  -0.10062909     0.43343402
## PPE              -0.37235638 -0.069543002  -0.34007134     0.72154293
##                  MDVP.Jitter.Abs.    MDVP.RAP    MDVP.PPQ  Jitter.DDP
## MDVP.Fo.Hz.           -0.38202665 -0.07619381 -0.11216543 -0.07621269
## MDVP.Fhi.Hz.          -0.02919833  0.09717662  0.09112619  0.09714987
## MDVP.Flo.Hz.          -0.27781524 -0.10051932 -0.09582837 -0.10048768
## MDVP.Jitter...         0.93571398  0.99027559  0.97425639  0.99027622
## MDVP.Jitter.Abs.       1.00000000  0.92291097  0.89777790  0.92291303
## MDVP.RAP               0.92291097  1.00000000  0.95731689  0.99999960
## MDVP.PPQ               0.89777790  0.95731689  1.00000000  0.95731916
## Jitter.DDP             0.92291303  0.99999960  0.95731916  1.00000000
## MDVP.Shimmer           0.70332237  0.75958052  0.79782603  0.75955470
## MDVP.Shimmer.dB.       0.71660128  0.79065155  0.83923894  0.79062060
## Shimmer.APQ3           0.69715303  0.74491236  0.76357992  0.74489376
## Shimmer.APQ5           0.64896070  0.70992682  0.78678047  0.70990709
## MDVP.APQ               0.64879344  0.73745467  0.80413930  0.73743867
## Shimmer.DDA            0.69716971  0.74491923  0.76359216  0.74490062
## NHR                    0.83497218  0.91952071  0.84460353  0.91954818
## HNR                   -0.65680959 -0.72154316 -0.73151047 -0.72149437
## RPDE                   0.44183856  0.34213980  0.33327415  0.34207948
## DFA                    0.17503599  0.06408302  0.19630142  0.06402640
## spread1                0.73577923  0.64832777  0.71648864  0.64832755
## spread2                0.38854284  0.32440718  0.40760548  0.32437685
## D2                     0.31069445  0.42660538  0.41252368  0.42655576
## PPE                    0.74816165  0.67099905  0.76964729  0.67100525
##                  MDVP.Shimmer MDVP.Shimmer.dB. Shimmer.APQ3 Shimmer.APQ5
## MDVP.Fo.Hz.      -0.098373748      -0.07374246 -0.094717062 -0.070681785
## MDVP.Fhi.Hz.      0.002281231       0.04346516 -0.003743251 -0.009996776
## MDVP.Flo.Hz.     -0.144543324      -0.11908865 -0.150746503 -0.101094616
## MDVP.Jitter...    0.769063235       0.80428928  0.746625201  0.725561003
## MDVP.Jitter.Abs.  0.703322372       0.71660128  0.697153027  0.648960696
## MDVP.RAP          0.759580521       0.79065155  0.744912357  0.709926823
## MDVP.PPQ          0.797826025       0.83923894  0.763579916  0.786780474
## Jitter.DDP        0.759554700       0.79062060  0.744893765  0.709907085
## MDVP.Shimmer      1.000000000       0.98725781  0.987625052  0.982835448
## MDVP.Shimmer.dB.  0.987257815       1.00000000  0.963198085  0.973750561
## Shimmer.APQ3      0.987625052       0.96319809  1.000000000  0.960069764
## Shimmer.APQ5      0.982835448       0.97375056  0.960069764  1.000000000
## MDVP.APQ          0.950082893       0.96097672  0.896644519  0.949146111
## Shimmer.DDA       0.987625653       0.96320170  0.999999964  0.960071593
## NHR               0.722194454       0.74447728  0.716206735  0.658079759
## HNR              -0.835270676      -0.82780535 -0.827123311 -0.813752789
## RPDE              0.447423645       0.41068380  0.435242425  0.399902849
## DFA               0.159953618       0.16515658  0.151124464  0.213873102
## spread1           0.654734300       0.65254666  0.610967362  0.646808933
## spread2           0.452024826       0.45431437  0.402242954  0.457194945
## D2                0.507088281       0.51223317  0.467265430  0.502174433
## PPE               0.693770720       0.69505813  0.645376686  0.702455664
##                      MDVP.APQ  Shimmer.DDA         NHR          HNR       RPDE
## MDVP.Fo.Hz.      -0.077773819 -0.094731567 -0.02198078  0.059144387 -0.3838939
## MDVP.Fhi.Hz.      0.004936985 -0.003732893  0.16376640 -0.024893140 -0.1124035
## MDVP.Flo.Hz.     -0.107293423 -0.150736736 -0.10867045  0.210850898 -0.4001431
## MDVP.Jitter...    0.758255255  0.746635172  0.90695860 -0.728165074  0.3606735
## MDVP.Jitter.Abs.  0.648793442  0.697169706  0.83497218 -0.656809593  0.4418386
## MDVP.RAP          0.737454666  0.744919228  0.91952071 -0.721543160  0.3421398
## MDVP.PPQ          0.804139303  0.763592161  0.84460353 -0.731510466  0.3332741
## Jitter.DDP        0.737438675  0.744900620  0.91954818 -0.721494370  0.3420795
## MDVP.Shimmer      0.950082893  0.987625653  0.72219445 -0.835270676  0.4474236
## MDVP.Shimmer.dB.  0.960976723  0.963201704  0.74447728 -0.827805346  0.4106838
## Shimmer.APQ3      0.896644519  0.999999964  0.71620674 -0.827123311  0.4352424
## Shimmer.APQ5      0.949146111  0.960071593  0.65807976 -0.813752789  0.3999028
## MDVP.APQ          1.000000000  0.896646766  0.69401905 -0.800406561  0.4513791
## Shimmer.DDA       0.896646766  1.000000000  0.71621454 -0.827130202  0.4352373
## NHR               0.694019049  0.716214539  1.00000000 -0.714072430  0.3708905
## HNR              -0.800406561 -0.827130202 -0.71407243  1.000000000 -0.5987363
## RPDE              0.451379050  0.435237302  0.37089049 -0.598736337  1.0000000
## DFA               0.157275784  0.151132394 -0.13188174 -0.008664939 -0.1109500
## spread1           0.673158141  0.610971213  0.54086514 -0.673209818  0.5911170
## spread2           0.502187886  0.402223092  0.31809896 -0.431563748  0.4799045
## D2                0.536869028  0.467261374  0.47094876 -0.601401006  0.2369314
## PPE               0.721693953  0.645388954  0.55259127 -0.692875853  0.5458857
##                           DFA    spread1      spread2         D2        PPE
## MDVP.Fo.Hz.      -0.446013292 -0.4137382 -0.249450046  0.1779795 -0.3723564
## MDVP.Fhi.Hz.     -0.343096724 -0.0766578 -0.002953614  0.1763228 -0.0695430
## MDVP.Flo.Hz.     -0.050406307 -0.3948574 -0.243829403 -0.1006291 -0.3400713
## MDVP.Jitter...    0.098572442  0.6935767  0.385123473  0.4334340  0.7215429
## MDVP.Jitter.Abs.  0.175035992  0.7357792  0.388542835  0.3106944  0.7481617
## MDVP.RAP          0.064083019  0.6483278  0.324407179  0.4266054  0.6709990
## MDVP.PPQ          0.196301424  0.7164886  0.407605477  0.4125237  0.7696473
## Jitter.DDP        0.064026401  0.6483276  0.324376852  0.4265558  0.6710053
## MDVP.Shimmer      0.159953618  0.6547343  0.452024826  0.5070883  0.6937707
## MDVP.Shimmer.dB.  0.165156580  0.6525467  0.454314373  0.5122332  0.6950581
## Shimmer.APQ3      0.151124464  0.6109674  0.402242954  0.4672654  0.6453767
## Shimmer.APQ5      0.213873102  0.6468089  0.457194945  0.5021744  0.7024557
## MDVP.APQ          0.157275784  0.6731581  0.502187886  0.5368690  0.7216940
## Shimmer.DDA       0.151132394  0.6109712  0.402223092  0.4672614  0.6453890
## NHR              -0.131881743  0.5408651  0.318098962  0.4709488  0.5525913
## HNR              -0.008664939 -0.6732098 -0.431563748 -0.6014010 -0.6928759
## RPDE             -0.110949960  0.5911170  0.479904528  0.2369314  0.5458857
## DFA               1.000000000  0.1956684  0.166548111 -0.1653810  0.2704447
## spread1           0.195668403  1.0000000  0.652357812  0.4951227  0.9624353
## spread2           0.166548111  0.6523578  1.000000000  0.5235317  0.6447110
## D2               -0.165381035  0.4951227  0.523531735  1.0000000  0.4805845
## PPE               0.270444747  0.9624353  0.644710959  0.4805845  1.0000000
corrplot::corrplot(cor(data_numeric),tl.col = "black",type= "full",tl.srt=40,tl.cex = 0.5)

r <- cor(data_clustering)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA =  0.89
## MSA for each item = 
##      MDVP.Fo.Hz.     MDVP.Fhi.Hz.     MDVP.Flo.Hz.   MDVP.Jitter... 
##             0.53             0.65             0.68             0.91 
## MDVP.Jitter.Abs.         MDVP.RAP         MDVP.PPQ       Jitter.DDP 
##             0.88             0.90             0.88             0.90 
##     MDVP.Shimmer MDVP.Shimmer.dB.     Shimmer.APQ3     Shimmer.APQ5 
##             0.89             0.95             0.90             0.90 
##         MDVP.APQ      Shimmer.DDA              NHR              HNR 
##             0.91             0.90             0.96             0.93 
##             RPDE              DFA          spread1          spread2 
##             0.81             0.49             0.89             0.87 
##               D2              PPE 
##             0.85             0.87
bartlett.test(data_clustering)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data_clustering
## Bartlett's K-squared = 48089, df = 21, p-value < 2.2e-16
df <- scale(data_clustering)
set.seed(123)
wss <- sapply(1:10, function(k){
  kmeans(df, centers = k, nstart = 20)$tot.withinss
})

plot(1:10, wss, type="b", pch=19,
     xlab="Jumlah Cluster (K)",
     ylab="Total Within Sum of Squares",
     main="Elbow Method")

avg_sil <- function(k){
  km <- kmeans(df, centers = k, nstart = 25)
  ss <- silhouette(km$cluster, dist(df))
  mean(ss[,3])
}

k_values <- 2:10
avg_sil_values <- sapply(k_values, avg_sil)

plot(k_values, avg_sil_values, type="b", pch=19,
     xlab="Jumlah Cluster",
     ylab="Average Silhouette Width",
     main="Silhouette Analysis")

kmeans <- kmeans(df, centers = 2)
kmedian <- pam(df, k = 2)
dbscan <- dbscan(df, eps = 0.7, MinPts = 5)
ms <- meanShift(df)
fcm <- cmeans(df, centers = 2, m = 2)
plot(df, col = kmeans$cluster, main = "K-Means")

plot(df, col = kmedian$clustering, main = "K-Median (PAM)")

plot(df, col = dbscan$cluster + 1, main = "DBSCAN (0 = Noise)")

plot(df, col = ms$assignment, main = "Mean Shift")

plot(df, col = fcm$cluster, main = "Fuzzy C-Means")

plot(df, col = label + 1, main = "Status")

fviz_cluster(list(data = df, cluster = kmeans$cluster), main="K-Means")

fviz_cluster(list(data = df, cluster = kmedian$clustering), main="K-Median")

fviz_cluster(list(data = df, cluster = dbscan$cluster), main="DBSCAN")

fviz_cluster(list(data = df, cluster = fcm$cluster), main="Fuzzy C-Means")