1. Load Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(flexclust)
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(meanShiftR)
library(e1071)
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:flexclust':
## 
##     bclust
## 
## The following object is masked from 'package:ggplot2':
## 
##     element
library(cluster)
library(fpc)
## 
## Attaching package: 'fpc'
## 
## The following object is masked from 'package:dbscan':
## 
##     dbscan
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(readxl)
  1. Import Dataset

    df <- read_excel("Dry_Bean_Dataset.xlsx")
    head(df)
    ## # A tibble: 6 × 17
    ##    Area Perimeter MajorAxisLength MinorAxisLength AspectRation Eccentricity
    ##   <dbl>     <dbl>           <dbl>           <dbl>        <dbl>        <dbl>
    ## 1 28395      610.            208.            174.         1.20        0.550
    ## 2 28734      638.            201.            183.         1.10        0.412
    ## 3 29380      624.            213.            176.         1.21        0.563
    ## 4 30008      646.            211.            183.         1.15        0.499
    ## 5 30140      620.            202.            190.         1.06        0.334
    ## 6 30279      635.            213.            182.         1.17        0.520
    ## # ℹ 11 more variables: ConvexArea <dbl>, EquivDiameter <dbl>, Extent <dbl>,
    ## #   Solidity <dbl>, roundness <dbl>, Compactness <dbl>, ShapeFactor1 <dbl>,
    ## #   ShapeFactor2 <dbl>, ShapeFactor3 <dbl>, ShapeFactor4 <dbl>, Class <chr>
  2. Deskripsi Data

    describe(df)
    ##                 vars     n     mean       sd   median  trimmed      mad
    ## Area               1 13611 53048.28 29324.10 44652.00 48011.59 15398.28
    ## Perimeter          2 13611   855.28   214.29   794.94   828.15   174.76
    ## MajorAxisLength    3 13611   320.14    85.69   296.88   310.31    81.50
    ## MinorAxisLength    4 13611   202.27    44.97   192.43   195.85    28.26
    ## AspectRation       5 13611     1.58     0.25     1.55     1.57     0.20
    ## Eccentricity       6 13611     0.75     0.09     0.76     0.76     0.07
    ## ConvexArea         7 13611 53768.20 29774.92 45178.00 48652.37 15665.15
    ## EquivDiameter      8 13611   253.06    59.18   238.44   245.18    42.02
    ## Extent             9 13611     0.75     0.05     0.76     0.75     0.05
    ## Solidity          10 13611     0.99     0.00     0.99     0.99     0.00
    ## roundness         11 13611     0.87     0.06     0.88     0.88     0.06
    ## Compactness       12 13611     0.80     0.06     0.80     0.80     0.05
    ## ShapeFactor1      13 13611     0.01     0.00     0.01     0.01     0.00
    ## ShapeFactor2      14 13611     0.00     0.00     0.00     0.00     0.00
    ## ShapeFactor3      15 13611     0.64     0.10     0.64     0.64     0.08
    ## ShapeFactor4      16 13611     1.00     0.00     1.00     1.00     0.00
    ## Class*            17 13611     4.53     1.83     4.00     4.66     1.48
    ##                      min       max     range  skew kurtosis     se
    ## Area            20420.00 254616.00 234196.00  2.95    10.79 251.35
    ## Perimeter         524.74   1985.37   1460.63  1.63     3.59   1.84
    ## MajorAxisLength   183.60    738.86    555.26  1.36     2.53   0.73
    ## MinorAxisLength   122.51    460.20    337.69  2.24     6.65   0.39
    ## AspectRation        1.02      2.43      1.41  0.58     0.11   0.00
    ## Eccentricity        0.22      0.91      0.69 -1.06     1.39   0.00
    ## ConvexArea      20684.00 263261.00 242577.00  2.94    10.74 255.21
    ## EquivDiameter     161.24    569.37    408.13  1.95     5.19   0.51
    ## Extent              0.56      0.87      0.31 -0.90     0.64   0.00
    ## Solidity            0.92      0.99      0.08 -2.55    12.79   0.00
    ## roundness           0.49      0.99      0.50 -0.64     0.37   0.00
    ## Compactness         0.64      0.99      0.35  0.04    -0.22   0.00
    ## ShapeFactor1        0.00      0.01      0.01 -0.53     0.71   0.00
    ## ShapeFactor2        0.00      0.00      0.00  0.30    -0.86   0.00
    ## ShapeFactor3        0.41      0.97      0.56  0.24    -0.15   0.00
    ## ShapeFactor4        0.95      1.00      0.05 -2.76    13.03   0.00
    ## Class*              1.00      7.00      6.00 -0.32    -0.74   0.02
  3. Seleksi Variabel

    df <- df %>%
      select(-matches("class", ignore.case = TRUE))
  4. Cek Missing Value

    colSums(is.na(df))
    ##            Area       Perimeter MajorAxisLength MinorAxisLength    AspectRation 
    ##               0               0               0               0               0 
    ##    Eccentricity      ConvexArea   EquivDiameter          Extent        Solidity 
    ##               0               0               0               0               0 
    ##       roundness     Compactness    ShapeFactor1    ShapeFactor2    ShapeFactor3 
    ##               0               0               0               0               0 
    ##    ShapeFactor4 
    ##               0
  5. Tabel Missing Value

missing_table <- df %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(cols = everything(),
               names_to = "variable",
               values_to = "missing_values")

missing_table
## # A tibble: 16 × 2
##    variable        missing_values
##    <chr>                    <int>
##  1 Area                         0
##  2 Perimeter                    0
##  3 MajorAxisLength              0
##  4 MinorAxisLength              0
##  5 AspectRation                 0
##  6 Eccentricity                 0
##  7 ConvexArea                   0
##  8 EquivDiameter                0
##  9 Extent                       0
## 10 Solidity                     0
## 11 roundness                    0
## 12 Compactness                  0
## 13 ShapeFactor1                 0
## 14 ShapeFactor2                 0
## 15 ShapeFactor3                 0
## 16 ShapeFactor4                 0
  1. Handling Missing Value

    df <- df %>%
      mutate(across(everything(),
                    ~ifelse(is.na(.), median(., na.rm = TRUE), .)))
  2. Normalisasi Data

    df_scaled <- scale(df)
    summary(df_scaled)
    ##       Area           Perimeter       MajorAxisLength   MinorAxisLength  
    ##  Min.   :-1.1127   Min.   :-1.5425   Min.   :-1.5933   Min.   :-1.7736  
    ##  1st Qu.:-0.5702   1st Qu.:-0.7082   1st Qu.:-0.7800   1st Qu.:-0.5876  
    ##  Median :-0.2863   Median :-0.2816   Median :-0.2714   Median :-0.2188  
    ##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
    ##  3rd Qu.: 0.2825   3rd Qu.: 0.5690   3rd Qu.: 0.6576   3rd Qu.: 0.3282  
    ##  Max.   : 6.8738   Max.   : 5.2736   Max.   : 4.8862   Max.   : 5.7355  
    ##   AspectRation      Eccentricity       ConvexArea      EquivDiameter    
    ##  Min.   :-2.2636   Min.   :-5.7819   Min.   :-1.1111   Min.   :-1.5516  
    ##  1st Qu.:-0.6119   1st Qu.:-0.3801   1st Qu.:-0.5728   1st Qu.:-0.6421  
    ##  Median :-0.1302   Median : 0.1472   Median :-0.2885   Median :-0.2472  
    ##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
    ##  3rd Qu.: 0.5021   3rd Qu.: 0.6475   3rd Qu.: 0.2863   3rd Qu.: 0.4458  
    ##  Max.   : 3.4339   Max.   : 1.7448   Max.   : 7.0359   Max.   : 5.3451  
    ##      Extent           Solidity          roundness        Compactness     
    ##  Min.   :-3.9607   Min.   :-14.5689   Min.   :-6.4460   Min.   :-2.5811  
    ##  1st Qu.:-0.6336   1st Qu.: -0.3159   1st Qu.:-0.6920   1st Qu.:-0.6059  
    ##  Median : 0.2063   Median :  0.2446   Median : 0.1659   Median : 0.0229  
    ##  Mean   : 0.0000   Mean   :  0.0000   Mean   : 0.0000   Mean   : 0.0000  
    ##  3rd Qu.: 0.7562   3rd Qu.:  0.6159   3rd Qu.: 0.7323   3rd Qu.: 0.5575  
    ##  Max.   : 2.3726   Max.   :  1.6167   Max.   : 1.9725   Max.   : 3.0373  
    ##   ShapeFactor1       ShapeFactor2       ShapeFactor3       ShapeFactor4     
    ##  Min.   :-3.35603   Min.   :-1.93292   Min.   :-2.35617   Min.   :-10.8500  
    ##  1st Qu.:-0.58838   1st Qu.:-0.94387   1st Qu.:-0.62863   1st Qu.: -0.3116  
    ##  Median : 0.07231   Median :-0.03762   Median :-0.01562   Median :  0.3029  
    ##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000   Mean   :  0.0000  
    ##  3rd Qu.: 0.62749   3rd Qu.: 0.76244   3rd Qu.: 0.52948   3rd Qu.:  0.6457  
    ##  Max.   : 3.44642   Max.   : 3.27086   Max.   : 3.34535   Max.   :  1.0693
  3. Sampling Data

    set.seed(123)
    df_sample <- df_scaled[sample(1:nrow(df_scaled), 500), ]
    
    cat("Jumlah data:", nrow(df_sample), "\n")
    ## Jumlah data: 500
    cat("Jumlah variabel:", ncol(df_sample))
    ## Jumlah variabel: 16
  4. Elbow Method

    wss <- sapply(1:10, function(k){
      kmeans(df_sample, centers = k, nstart = 20)$tot.withinss
    })
    
    plot(1:10, wss, type="b", pch=19,
         xlab="Jumlah Cluster",
         ylab="WSS",
         main="Elbow Method")

  5. Silhouette Method

    avg_sil <- function(k){
      km <- kmeans(df_sample, centers = k, nstart = 25)
      ss <- silhouette(km$cluster, dist(df_sample))
      mean(ss[,3])
    }
    
    k_values <- 2:10
    sil_values <- sapply(k_values, avg_sil)
    ## Warning: did not converge in 10 iterations
    plot(k_values, sil_values, type="b", pch=19,
         xlab="Jumlah Cluster",
         ylab="Silhouette",
         main="Silhouette Method")

  6. K Optimal

    k <- k_values[which.max(sil_values)]
    k
    ## [1] 2
  7. K-Means

    km_res <- kmeans(df_sample, centers = k, nstart = 25)
    km_res$centers      # pusat cluster
    ##         Area  Perimeter MajorAxisLength MinorAxisLength AspectRation
    ## 1 -0.4950036 -0.5888755      -0.6133563      -0.4398724   -0.4479450
    ## 2  0.7557036  0.9223304       0.9618134       0.6498142    0.7542531
    ##   Eccentricity ConvexArea EquivDiameter     Extent   Solidity  roundness
    ## 1   -0.3346927 -0.4963798    -0.5533037  0.1825227  0.2348752  0.5035383
    ## 2    0.6994287  0.7595342     0.8529195 -0.1871576 -0.5860549 -0.9101130
    ##   Compactness ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4
    ## 1   0.4201909    0.4683987    0.5561958    0.4058195    0.3671445
    ## 2  -0.7629544   -0.6861022   -0.9764030   -0.7567268   -0.6758116
    km_res$cluster      # hasil cluster tiap data
    ##   [1] 2 1 1 1 1 2 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 2 1 2 2 2 2 2 1 2 1 1 2 1 1 1 1
    ##  [38] 2 1 1 2 2 1 2 1 2 1 1 1 1 1 1 2 1 1 2 1 2 2 2 1 1 1 1 1 2 2 2 1 1 2 2 1 1
    ##  [75] 1 1 1 1 1 1 2 1 2 2 1 2 2 1 1 1 2 1 1 1 2 2 1 1 2 1 2 1 1 2 2 1 1 2 1 1 1
    ## [112] 1 1 1 1 2 2 1 2 1 1 1 2 2 2 1 2 1 2 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 2
    ## [149] 2 1 2 1 2 1 1 1 2 1 1 1 1 2 2 2 1 2 1 1 2 1 1 1 1 2 1 2 1 2 1 2 1 2 1 2 1
    ## [186] 1 1 1 2 2 1 1 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1
    ## [223] 2 1 1 2 2 1 1 1 2 1 2 2 1 2 2 2 1 1 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
    ## [260] 2 1 2 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 2 2 2 1 1 1
    ## [297] 1 2 1 1 1 2 1 1 1 1 2 1 1 2 2 2 2 2 1 2 1 1 1 2 1 1 2 2 1 2 2 1 1 1 2 1 2
    ## [334] 1 2 1 1 1 2 2 1 2 2 2 2 2 2 1 2 2 1 1 1 1 2 1 1 2 2 1 2 1 1 1 1 1 1 1 2 2
    ## [371] 2 2 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 2 1 2 1 2 1 2 1 1 2 2 1 1 2 1 1 2 2 2 2
    ## [408] 1 1 2 2 2 1 2 2 1 1 1 1 2 1 1 1 2 2 2 2 1 1 2 2 2 1 1 2 2 2 1 2 2 1 1 2 1
    ## [445] 1 1 1 2 1 1 1 1 1 2 2 1 2 1 2 1 1 1 1 2 1 2 2 1 1 2 2 1 2 1 2 1 2 1 1 2 1
    ## [482] 1 1 2 1 1 1 2 2 1 1 2 1 1 1 1 1 2 2 1
    km_res$tot.withinss # total variasi dalam cluster
    ## [1] 4623.542
  1. K-Median

    kmed_res <- kcca(df_sample, k = k, family = kccaFamily("kmedians"))
    ## Found more than one class "kcca" in cache; using the first, from namespace 'flexclust'
    ## Also defined by 'kernlab'
    ## Found more than one class "kcca" in cache; using the first, from namespace 'flexclust'
    ## Also defined by 'kernlab'
    clusters(kmed_res)   # hasil cluster
    ##   [1] 2 2 1 1 1 2 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 2 1 2 2 2 2 2 1 2 1 1 2 2 1 1 1
    ##  [38] 2 1 1 2 2 1 2 1 2 1 1 1 1 1 1 2 1 1 2 1 2 2 2 1 1 1 1 1 2 2 2 1 1 2 2 1 1
    ##  [75] 1 1 1 1 2 1 2 1 2 2 1 2 2 1 1 1 2 1 1 1 2 2 1 1 2 1 2 1 1 2 2 1 1 2 1 1 1
    ## [112] 1 1 1 1 2 2 1 2 1 1 1 2 2 2 1 2 1 2 1 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 1 2 2
    ## [149] 2 1 2 1 2 1 2 1 2 1 1 1 1 2 2 2 1 2 1 1 2 1 1 1 1 2 1 2 1 2 1 2 1 2 1 2 1
    ## [186] 1 2 1 2 2 1 1 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1
    ## [223] 2 1 1 2 2 1 1 1 2 1 2 2 1 2 2 2 1 1 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2
    ## [260] 2 1 2 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 2 2 2 1 1 1
    ## [297] 1 2 1 2 1 2 1 1 1 1 2 1 1 2 2 2 2 2 1 2 1 1 1 2 1 1 2 2 1 2 2 1 1 1 2 1 2
    ## [334] 1 2 1 1 1 2 2 1 2 2 2 2 2 2 1 2 2 1 1 1 1 2 1 1 2 2 1 2 1 1 1 1 1 1 1 2 2
    ## [371] 2 2 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 2 1 2 1 2 1 2 1 1 2 2 1 1 2 1 1 2 2 2 2
    ## [408] 1 1 2 2 2 1 2 2 1 1 2 1 2 1 1 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 1 2 2 1 1 2 1
    ## [445] 1 1 1 2 1 1 1 1 1 2 2 1 2 1 2 1 1 1 1 2 1 2 2 1 1 2 2 1 2 1 2 1 2 1 1 2 1
    ## [482] 1 1 2 1 1 1 2 2 1 1 2 1 1 1 1 1 2 2 1
    parameters(kmed_res) # pusat cluster
    ##            Area  Perimeter MajorAxisLength MinorAxisLength AspectRation
    ## [1,] -0.4993942 -0.6061815      -0.6580358      -0.3956746   -0.3932004
    ## [2,]  0.4267724  0.6406680       0.7529436       0.5218694    0.5780800
    ##      Eccentricity ConvexArea EquivDiameter      Extent   Solidity  roundness
    ## [1,]   -0.1206780 -0.5001257    -0.5396756  0.31241754  0.3566702  0.5535950
    ## [2,]    0.6970405  0.4333782     0.6059833 -0.01362707 -0.2264801 -0.8240637
    ##      Compactness ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4
    ## [1,]   0.3113895    0.3268573    0.4735203    0.2757972    0.4879563
    ## [2,]  -0.6801757   -0.7722140   -1.0016005   -0.6989811   -0.4369522

    Jumlah anggota tiap cluster

    table(km_res$cluster)
    ## 
    ##   1   2 
    ## 301 199
  1. kNN Distance Plot

    kNNdistplot(df_sample, k = 5)
    abline(h = 1.5, col = "red", lwd = 2)
    title("kNN Distance Plot")

  2. DBSCAN

    db_res <- dbscan::dbscan(df_sample, eps = 0.8, minPts = 5)
    table(db_res$cluster)
    ## 
    ##   0   1   2   3   4   5   6   7 
    ## 292 143  34   6   6   8   3   8
  3. PCA + Mean Shift

    df_pca <- prcomp(df_sample)$x[,1:2]
    
    ms_res <- meanShiftR::meanShift(
      as.matrix(df_pca),
      bandwidth = c(1.5, 1.5)
    )
    
    ms_cluster <- ms_res$assignment
    table(ms_cluster)
    ## ms_cluster
    ##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
    ##   1   5 159  27   8  55   2   7   5   1   6   3   4   4   7   5  40   4  10   3 
    ##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
    ##   2   4   3   4   4   3   3   2   3   3   5   1   1   1   7   4   3   1   1   2 
    ##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
    ##   3   1   1   2   4   3   3   1   1   7   2   4   1   5   1   3   1   2   2   1 
    ##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
    ##   4   2   2   3   1   1   2   2   2   1   1   1   1   3   1   2   2   1   1   1 
    ##  81  82  83  84  85 
    ##   1   1   1   1   1
  4. Fuzzy C-Means

    fcm_res <- cmeans(df_sample, centers = k, m = 2)
    head(fcm_res$membership)
    ##              1         2
    ## [1,] 0.2365864 0.7634136
    ## [2,] 0.5007008 0.4992992
    ## [3,] 0.8354073 0.1645927
    ## [4,] 0.6152742 0.3847258
    ## [5,] 0.8145180 0.1854820
    ## [6,] 0.2253815 0.7746185
  5. Distance

    dist_sample <- dist(df_sample)
    dist_pca <- dist(df_pca)
  6. Silhouette

    sil_km   <- mean(silhouette(km_res$cluster, dist_sample)[,3])
    sil_kmed <- mean(silhouette(clusters(kmed_res), dist_sample)[,3])
    sil_fcm  <- mean(silhouette(fcm_res$cluster, dist_sample)[,3])
  7. Silhouette DBSCAN

    db_cluster <- db_res$cluster
    valid <- db_cluster != 0
    
    if(length(unique(db_cluster[valid])) > 1){
      d <- as.matrix(dist(df_sample))
      sil_db <- mean(silhouette(db_cluster[valid], d[valid, valid])[,3])
    } else {
      sil_db <- NA
    }
  8. Silhouette Mean Shift

    if(length(unique(ms_cluster)) > 1){
      sil_ms <- mean(silhouette(ms_cluster, dist_pca)[,3])
    } else {
      sil_ms <- NA
    }
  9. Dunn Index

    dunn_km   <- cluster.stats(dist_sample, km_res$cluster)$dunn
    dunn_kmed <- cluster.stats(dist_sample, clusters(kmed_res))$dunn
    dunn_fcm  <- cluster.stats(dist_sample, fcm_res$cluster)$dunn
  10. Dunn DBSCAN

    if(length(unique(db_cluster[valid])) > 1){
      dunn_db <- cluster.stats(dist(df_sample[valid,]), db_cluster[valid])$dunn
    } else {
      dunn_db <- NA
    }
  11. Dunn Mean Shift

    if(length(unique(ms_cluster)) > 1){
      dunn_ms <- cluster.stats(dist_pca, ms_cluster)$dunn
    } else {
      dunn_ms <- NA
    }
  12. Tabel Evaluasi

    evaluation_table <- data.frame(
      Method = c("K-Means", "K-Median", "DBSCAN", "Mean Shift", "Fuzzy C-Means"),
      Silhouette = c(sil_km, sil_kmed, sil_db, sil_ms, sil_fcm),
      Dunn_Index = c(dunn_km, dunn_kmed, dunn_db, dunn_ms, dunn_fcm)
    )
    
    evaluation_table
    ##          Method  Silhouette  Dunn_Index
    ## 1       K-Means  0.37757267 0.041532335
    ## 2      K-Median  0.36990395 0.030220443
    ## 3        DBSCAN  0.11091725 0.130895545
    ## 4    Mean Shift -0.09059167 0.006812513
    ## 5 Fuzzy C-Means  0.37288334 0.034605573
  13. Output Evaluasi

    cat("SILHOUETTE\n")
    ## SILHOUETTE
    cat("K-Means :", sil_km, "\n")
    ## K-Means : 0.3775727
    cat("K-Median :", sil_kmed, "\n")
    ## K-Median : 0.369904
    cat("DBSCAN :", sil_db, "\n")
    ## DBSCAN : 0.1109172
    cat("Mean Shift :", sil_ms, "\n")
    ## Mean Shift : -0.09059167
    cat("Fuzzy C-Means :", sil_fcm, "\n")
    ## Fuzzy C-Means : 0.3728833
    cat("\nDUNN INDEX\n")
    ## 
    ## DUNN INDEX
    cat("K-Means :", dunn_km, "\n")
    ## K-Means : 0.04153233
    cat("K-Median :", dunn_kmed, "\n")
    ## K-Median : 0.03022044
    cat("DBSCAN :", dunn_db, "\n")
    ## DBSCAN : 0.1308955
    cat("Mean Shift :", dunn_ms, "\n")
    ## Mean Shift : 0.006812513
    cat("Fuzzy C-Means :", dunn_fcm, "\n")
    ## Fuzzy C-Means : 0.03460557
  14. Interpretasi K-Means

    df_result <- as.data.frame(df_sample)
    df_result$Cluster_KMeans <- km_res$cluster
    
    aggregate(df_result,
              by=list(df_result$Cluster_KMeans),
              mean)
    ##   Group.1       Area  Perimeter MajorAxisLength MinorAxisLength AspectRation
    ## 1       1 -0.4950036 -0.5888755      -0.6133563      -0.4398724   -0.4479450
    ## 2       2  0.7557036  0.9223304       0.9618134       0.6498142    0.7542531
    ##   Eccentricity ConvexArea EquivDiameter     Extent   Solidity  roundness
    ## 1   -0.3346927 -0.4963798    -0.5533037  0.1825227  0.2348752  0.5035383
    ## 2    0.6994287  0.7595342     0.8529195 -0.1871576 -0.5860549 -0.9101130
    ##   Compactness ShapeFactor1 ShapeFactor2 ShapeFactor3 ShapeFactor4
    ## 1   0.4201909    0.4683987    0.5561958    0.4058195    0.3671445
    ## 2  -0.7629544   -0.6861022   -0.9764030   -0.7567268   -0.6758116
    ##   Cluster_KMeans
    ## 1              1
    ## 2              2