Package yang diperlukan

library(MASS)
library(cluster)
library(mclust)
library(dplyr)
library(effectsize)
library(ggplot2)

Fungsi Pembangkit Data

generate_data <- function(n, sigma){

  n_per_cluster <- n / 3

  mu1 <- c(0, 0)
  mu2 <- c(5, 5)
  mu3 <- c(10, 0)

  Sigma <- matrix(
    c(sigma^2, 0,
      0, sigma^2),
    nrow = 2
  )

  cluster1 <- mvrnorm(
    n = n_per_cluster,
    mu = mu1,
    Sigma = Sigma
  )

  cluster2 <- mvrnorm(
    n = n_per_cluster,
    mu = mu2,
    Sigma = Sigma
  )

  cluster3 <- mvrnorm(
    n = n_per_cluster,
    mu = mu3,
    Sigma = Sigma
  )
  
  # Gabungkan menjadi satu dataset
  X <- rbind(
    cluster1,
    cluster2,
    cluster3
  )

  # Buat label cluster
  label_true <- c(
    rep(1, n_per_cluster),
    rep(2, n_per_cluster),
    rep(3, n_per_cluster)
  )

  # Simpan dalam fungsi list
  list(
    X = X,
    label_true = label_true
  )
}

Fungsi menghitung parameter alpha dan beta Silhoutte

compute_ab <- function(X, cluster_labels, D = NULL){

  if(is.null(D)){
    D <- as.matrix(dist(X))  # secara default menggunakan Euclidean Distance
  } else {
    D <- as.matrix(D)
  }
  
  n <- nrow(X)

  a <- numeric(n)
  b <- numeric(n)

  unique_clusters <- sort(unique(cluster_labels))

  
  for(i in 1:n){

    current_cluster <- cluster_labels[i]
    
    # Proses perhitungan parameter alpha
    same_cluster <- which(
      cluster_labels == current_cluster
    )

    same_cluster <- setdiff(
      same_cluster,
      i
    )

    if(length(same_cluster) > 0){

      a[i] <- mean(
        D[i, same_cluster]
      )

    } else {

      a[i] <- 0

    }

    # Proses perhitungan parameter beta
    other_clusters <- setdiff(
      unique_clusters,
      current_cluster
    )

    b_candidates <- c()

    for(cl in other_clusters){

      idx <- which(
        cluster_labels == cl
      )

      b_candidates <- c(
        b_candidates,
        mean(D[i, idx])
      )

    }

    b[i] <- min(b_candidates)

  }

  list(
    a = a,
    b = b
  )
}

Fungsi Setiap Replikasi Dalam Kombinasi Simulasi Tertentu

run_single_simulation <- function(
    n,
    sigma,
    k
){

  dat <- generate_data(
    n = n,
    sigma = sigma
  )

  X <- dat$X

  label_true <- dat$label_true

  km <- kmeans(
    X,
    centers = k,
    nstart = 25
  )

  label_pred <- km$cluster

  # Evaluasi ARI
  ari <- adjustedRandIndex(
    label_true,
    label_pred
  )

  # Evaluasi Silhoutte
  D <- dist(X)
  
  sil <- silhouette(
    label_pred,
    D
  )

  mean_sil <- mean(
    sil[, "sil_width"]
  )

  ab <- compute_ab(
    X,
    label_pred,
    D = D
  )

  mean_a <- mean(ab$a)

  sd_a <- sd(ab$a)

  mean_b <- mean(ab$b)

  sd_b <- sd(ab$b)

  # Buat dalam bentuk data frame
  data.frame(
    ARI = ari,
    Mean_Silhouette = mean_sil,
    Mean_a = mean_a,
    SD_a = sd_a,
    Mean_b = mean_b,
    SD_b = sd_b
  )
}

Desain Simulasi

# Ukuran sampel
sample_sizes <- c(
  45,
  180,
  450
)

# Tingkat noise
noise_levels <- c(
  0.5,
  1.5,
  3.0
)

# Jumlah klaster dalam algoritma k-means
k_values <- c(
  2,
  3,
  4
)

# Replikasi atau pengulangan
R <- 1000

Simulasi Monte Carlo

counter <- 1

# Banyaknya record dalam dataframe results
total_runs <-
  length(sample_sizes) *
  length(noise_levels) *
  length(k_values) *
  R

results_list <- vector("list", total_runs)  # alokasi sekali

for(n in sample_sizes){
  for(noise in noise_levels){
    for(k in k_values){
      for(rep in 1:R){

        temp <- run_single_simulation(n = n, sigma = noise, k = k)
        temp$n <- n
        temp$noise <- noise
        temp$k <- k
        temp$replication <- rep

        results_list[[counter]] <- temp  

        if(counter %% 100 == 0){
          cat("Progress:", counter, "/", total_runs, "\n")
        }

        counter <- counter + 1
      }
    }
  }
}
## Progress: 100 / 27000 
## Progress: 200 / 27000 
## Progress: 300 / 27000 
## Progress: 400 / 27000 
## Progress: 500 / 27000 
## Progress: 600 / 27000 
## Progress: 700 / 27000 
## Progress: 800 / 27000 
## Progress: 900 / 27000 
## Progress: 1000 / 27000 
## Progress: 1100 / 27000 
## Progress: 1200 / 27000 
## Progress: 1300 / 27000 
## Progress: 1400 / 27000 
## Progress: 1500 / 27000 
## Progress: 1600 / 27000 
## Progress: 1700 / 27000 
## Progress: 1800 / 27000 
## Progress: 1900 / 27000 
## Progress: 2000 / 27000 
## Progress: 2100 / 27000 
## Progress: 2200 / 27000 
## Progress: 2300 / 27000 
## Progress: 2400 / 27000 
## Progress: 2500 / 27000 
## Progress: 2600 / 27000 
## Progress: 2700 / 27000 
## Progress: 2800 / 27000 
## Progress: 2900 / 27000 
## Progress: 3000 / 27000 
## Progress: 3100 / 27000 
## Progress: 3200 / 27000 
## Progress: 3300 / 27000 
## Progress: 3400 / 27000 
## Progress: 3500 / 27000 
## Progress: 3600 / 27000 
## Progress: 3700 / 27000 
## Progress: 3800 / 27000 
## Progress: 3900 / 27000 
## Progress: 4000 / 27000 
## Progress: 4100 / 27000 
## Progress: 4200 / 27000 
## Progress: 4300 / 27000 
## Progress: 4400 / 27000 
## Progress: 4500 / 27000 
## Progress: 4600 / 27000 
## Progress: 4700 / 27000 
## Progress: 4800 / 27000 
## Progress: 4900 / 27000 
## Progress: 5000 / 27000 
## Progress: 5100 / 27000 
## Progress: 5200 / 27000 
## Progress: 5300 / 27000 
## Progress: 5400 / 27000 
## Progress: 5500 / 27000 
## Progress: 5600 / 27000 
## Progress: 5700 / 27000 
## Progress: 5800 / 27000 
## Progress: 5900 / 27000 
## Progress: 6000 / 27000 
## Progress: 6100 / 27000 
## Progress: 6200 / 27000 
## Progress: 6300 / 27000 
## Progress: 6400 / 27000 
## Progress: 6500 / 27000 
## Progress: 6600 / 27000 
## Progress: 6700 / 27000 
## Progress: 6800 / 27000 
## Progress: 6900 / 27000 
## Progress: 7000 / 27000 
## Progress: 7100 / 27000 
## Progress: 7200 / 27000 
## Progress: 7300 / 27000 
## Progress: 7400 / 27000 
## Progress: 7500 / 27000 
## Progress: 7600 / 27000 
## Progress: 7700 / 27000 
## Progress: 7800 / 27000 
## Progress: 7900 / 27000 
## Progress: 8000 / 27000 
## Progress: 8100 / 27000 
## Progress: 8200 / 27000 
## Progress: 8300 / 27000 
## Progress: 8400 / 27000 
## Progress: 8500 / 27000 
## Progress: 8600 / 27000 
## Progress: 8700 / 27000 
## Progress: 8800 / 27000 
## Progress: 8900 / 27000 
## Progress: 9000 / 27000 
## Progress: 9100 / 27000 
## Progress: 9200 / 27000 
## Progress: 9300 / 27000 
## Progress: 9400 / 27000 
## Progress: 9500 / 27000 
## Progress: 9600 / 27000 
## Progress: 9700 / 27000 
## Progress: 9800 / 27000 
## Progress: 9900 / 27000 
## Progress: 10000 / 27000 
## Progress: 10100 / 27000 
## Progress: 10200 / 27000 
## Progress: 10300 / 27000 
## Progress: 10400 / 27000 
## Progress: 10500 / 27000 
## Progress: 10600 / 27000 
## Progress: 10700 / 27000 
## Progress: 10800 / 27000 
## Progress: 10900 / 27000 
## Progress: 11000 / 27000 
## Progress: 11100 / 27000 
## Progress: 11200 / 27000 
## Progress: 11300 / 27000 
## Progress: 11400 / 27000 
## Progress: 11500 / 27000 
## Progress: 11600 / 27000 
## Progress: 11700 / 27000 
## Progress: 11800 / 27000 
## Progress: 11900 / 27000 
## Progress: 12000 / 27000 
## Progress: 12100 / 27000 
## Progress: 12200 / 27000 
## Progress: 12300 / 27000 
## Progress: 12400 / 27000 
## Progress: 12500 / 27000 
## Progress: 12600 / 27000 
## Progress: 12700 / 27000 
## Progress: 12800 / 27000 
## Progress: 12900 / 27000 
## Progress: 13000 / 27000 
## Progress: 13100 / 27000 
## Progress: 13200 / 27000 
## Progress: 13300 / 27000 
## Progress: 13400 / 27000 
## Progress: 13500 / 27000 
## Progress: 13600 / 27000 
## Progress: 13700 / 27000 
## Progress: 13800 / 27000 
## Progress: 13900 / 27000 
## Progress: 14000 / 27000 
## Progress: 14100 / 27000 
## Progress: 14200 / 27000 
## Progress: 14300 / 27000 
## Progress: 14400 / 27000 
## Progress: 14500 / 27000 
## Progress: 14600 / 27000 
## Progress: 14700 / 27000 
## Progress: 14800 / 27000 
## Progress: 14900 / 27000 
## Progress: 15000 / 27000 
## Progress: 15100 / 27000 
## Progress: 15200 / 27000 
## Progress: 15300 / 27000 
## Progress: 15400 / 27000 
## Progress: 15500 / 27000 
## Progress: 15600 / 27000 
## Progress: 15700 / 27000 
## Progress: 15800 / 27000 
## Progress: 15900 / 27000 
## Progress: 16000 / 27000 
## Progress: 16100 / 27000 
## Progress: 16200 / 27000 
## Progress: 16300 / 27000 
## Progress: 16400 / 27000 
## Progress: 16500 / 27000 
## Progress: 16600 / 27000 
## Progress: 16700 / 27000 
## Progress: 16800 / 27000 
## Progress: 16900 / 27000 
## Progress: 17000 / 27000 
## Progress: 17100 / 27000 
## Progress: 17200 / 27000 
## Progress: 17300 / 27000 
## Progress: 17400 / 27000 
## Progress: 17500 / 27000 
## Progress: 17600 / 27000 
## Progress: 17700 / 27000 
## Progress: 17800 / 27000 
## Progress: 17900 / 27000 
## Progress: 18000 / 27000 
## Progress: 18100 / 27000 
## Progress: 18200 / 27000 
## Progress: 18300 / 27000 
## Progress: 18400 / 27000 
## Progress: 18500 / 27000 
## Progress: 18600 / 27000 
## Progress: 18700 / 27000 
## Progress: 18800 / 27000 
## Progress: 18900 / 27000 
## Progress: 19000 / 27000 
## Progress: 19100 / 27000 
## Progress: 19200 / 27000 
## Progress: 19300 / 27000 
## Progress: 19400 / 27000 
## Progress: 19500 / 27000 
## Progress: 19600 / 27000 
## Progress: 19700 / 27000 
## Progress: 19800 / 27000 
## Progress: 19900 / 27000 
## Progress: 20000 / 27000 
## Progress: 20100 / 27000 
## Progress: 20200 / 27000 
## Progress: 20300 / 27000 
## Progress: 20400 / 27000 
## Progress: 20500 / 27000 
## Progress: 20600 / 27000 
## Progress: 20700 / 27000 
## Progress: 20800 / 27000 
## Progress: 20900 / 27000 
## Progress: 21000 / 27000 
## Progress: 21100 / 27000 
## Progress: 21200 / 27000 
## Progress: 21300 / 27000 
## Progress: 21400 / 27000 
## Progress: 21500 / 27000 
## Progress: 21600 / 27000 
## Progress: 21700 / 27000 
## Progress: 21800 / 27000 
## Progress: 21900 / 27000 
## Progress: 22000 / 27000 
## Progress: 22100 / 27000 
## Progress: 22200 / 27000 
## Progress: 22300 / 27000 
## Progress: 22400 / 27000 
## Progress: 22500 / 27000 
## Progress: 22600 / 27000 
## Progress: 22700 / 27000 
## Progress: 22800 / 27000 
## Progress: 22900 / 27000 
## Progress: 23000 / 27000 
## Progress: 23100 / 27000 
## Progress: 23200 / 27000 
## Progress: 23300 / 27000 
## Progress: 23400 / 27000 
## Progress: 23500 / 27000 
## Progress: 23600 / 27000 
## Progress: 23700 / 27000 
## Progress: 23800 / 27000 
## Progress: 23900 / 27000 
## Progress: 24000 / 27000 
## Progress: 24100 / 27000 
## Progress: 24200 / 27000 
## Progress: 24300 / 27000 
## Progress: 24400 / 27000 
## Progress: 24500 / 27000 
## Progress: 24600 / 27000 
## Progress: 24700 / 27000 
## Progress: 24800 / 27000 
## Progress: 24900 / 27000 
## Progress: 25000 / 27000 
## Progress: 25100 / 27000 
## Progress: 25200 / 27000 
## Progress: 25300 / 27000 
## Progress: 25400 / 27000 
## Progress: 25500 / 27000 
## Progress: 25600 / 27000 
## Progress: 25700 / 27000 
## Progress: 25800 / 27000 
## Progress: 25900 / 27000 
## Progress: 26000 / 27000 
## Progress: 26100 / 27000 
## Progress: 26200 / 27000 
## Progress: 26300 / 27000 
## Progress: 26400 / 27000 
## Progress: 26500 / 27000 
## Progress: 26600 / 27000 
## Progress: 26700 / 27000 
## Progress: 26800 / 27000 
## Progress: 26900 / 27000 
## Progress: 27000 / 27000
# Gabungkan setelah loop selesai
results <- do.call(rbind, results_list)  

Data Hasil Simulasi

# 5 data teratas
head(results, 5)
##    ARI Mean_Silhouette   Mean_a     SD_a   Mean_b     SD_b  n noise k
## 1 0.56       0.6343822 3.001019 1.332255 8.364889 1.127164 45   0.5 2
## 2 0.56       0.6490691 3.038576 1.674560 8.900508 1.280366 45   0.5 2
## 3 0.56       0.6407476 3.097378 1.658248 8.887228 1.316755 45   0.5 2
## 4 0.56       0.6434983 3.028490 1.414859 8.741211 1.331052 45   0.5 2
## 5 0.56       0.6455947 2.985892 1.566584 8.665588 1.351188 45   0.5 2
##   replication
## 1           1
## 2           2
## 3           3
## 4           4
## 5           5
# Lima Data Terakhir
tail(results, 5)
##             ARI Mean_Silhouette   Mean_a     SD_a   Mean_b     SD_b   n noise k
## 26996 0.4681075       0.3661214 4.297360 1.226547 7.081494 2.034390 450     3 4
## 26997 0.4423655       0.3577192 4.286901 1.090775 6.923746 1.913432 450     3 4
## 26998 0.3798746       0.3589400 4.075811 1.108540 6.556188 1.697817 450     3 4
## 26999 0.4406424       0.3606018 4.194952 1.087018 6.839994 1.837484 450     3 4
## 27000 0.4609846       0.3653984 4.299116 1.065736 7.074528 1.856457 450     3 4
##       replication
## 26996         996
## 26997         997
## 26998         998
## 26999         999
## 27000        1000

Data Ringkasan Kombinasi

summary_results <-
  results %>%
  group_by(
    n,
    noise,
    k
  ) %>%
  summarise(
    Mean_ARI =
      mean(ARI),

    SD_ARI =
      sd(ARI),

    `Mean_mean(Silhouette)` =
      mean(Mean_Silhouette),

    `Mean_mean(a)` =
      mean(Mean_a),

    `Mean_mean(b)` =
      mean(Mean_b),

    .groups = "drop"
  )

readr::write_excel_csv(summary_results, file = "C:/Users/LENOVO/Documents/Output R/Ringkasan Kombinasi Perlakuan.csv")
summary_results
## # A tibble: 27 × 8
##        n noise     k Mean_ARI SD_ARI `Mean_mean(Silhouette)` `Mean_mean(a)`
##    <dbl> <dbl> <dbl>    <dbl>  <dbl>                   <dbl>          <dbl>
##  1    45   0.5     2    0.56  0                        0.643          2.99 
##  2    45   0.5     3    1     0                        0.873          0.886
##  3    45   0.5     4    0.877 0.0187                   0.725          0.797
##  4    45   1.5     2    0.519 0.0422                   0.512          4.18 
##  5    45   1.5     3    0.958 0.0544                   0.613          2.64 
##  6    45   1.5     4    0.824 0.0551                   0.537          2.36 
##  7    45   3       2    0.368 0.0773                   0.410          5.84 
##  8    45   3       3    0.528 0.135                    0.422          4.63 
##  9    45   3       4    0.432 0.109                    0.403          4.04 
## 10   180   0.5     2    0.569 0                        0.646          2.96 
## # ℹ 17 more rows
## # ℹ 1 more variable: `Mean_mean(b)` <dbl>

ANOVA dan Asumsinya

1. ANOVA ARI

anova_ari <- aov(
  ARI ~
    factor(n) *
    factor(noise) *
    factor(k),
  data = results
)

cat("Hasil ANOVA ARI: \n")
## Hasil ANOVA ARI:
summary(anova_ari)
##                                      Df Sum Sq Mean Sq   F value Pr(>F)    
## factor(n)                             2    0.3     0.2     76.67 <2e-16 ***
## factor(noise)                         2  696.0   348.0 161044.61 <2e-16 ***
## factor(k)                             2  572.6   286.3 132506.54 <2e-16 ***
## factor(n):factor(noise)               4    0.2     0.1     27.54 <2e-16 ***
## factor(n):factor(k)                   4    0.1     0.0     10.20  3e-08 ***
## factor(noise):factor(k)               4   78.6    19.7   9095.19 <2e-16 ***
## factor(n):factor(noise):factor(k)     8    0.4     0.1     23.62 <2e-16 ***
## Residuals                         26973   58.3     0.0                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n ", rep("=",80), "\n Partial eta squared: \n", sep = "")
## 
##  ================================================================================
##  Partial eta squared:
eta_squared(anova_ari, partial = TRUE)
## # Effect Size for ANOVA (Type I)
## 
## Parameter                         | Eta2 (partial) |       95% CI
## -----------------------------------------------------------------
## factor(n)                         |       5.65e-03 | [0.00, 1.00]
## factor(noise)                     |           0.92 | [0.92, 1.00]
## factor(k)                         |           0.91 | [0.91, 1.00]
## factor(n):factor(noise)           |       4.07e-03 | [0.00, 1.00]
## factor(n):factor(k)               |       1.51e-03 | [0.00, 1.00]
## factor(noise):factor(k)           |           0.57 | [0.57, 1.00]
## factor(n):factor(noise):factor(k) |       6.96e-03 | [0.01, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

Uji Asumsi ANOVA ARI

plot(anova_ari)

2. Silhoutte

anova_sil <- aov(
  Mean_Silhouette ~
    factor(n) *
    factor(noise) *
    factor(k),
  data = results
)

cat("Hasil ANOVA Silhoutte: \n")
## Hasil ANOVA Silhoutte:
summary(anova_sil)
##                                      Df Sum Sq Mean Sq   F value Pr(>F)    
## factor(n)                             2    1.3    0.63   1604.49 <2e-16 ***
## factor(noise)                         2  536.3  268.13 678998.89 <2e-16 ***
## factor(k)                             2   70.2   35.10  88889.59 <2e-16 ***
## factor(n):factor(noise)               4    0.2    0.05    123.65 <2e-16 ***
## factor(n):factor(k)                   4    0.8    0.20    508.60 <2e-16 ***
## factor(noise):factor(k)               4   35.4    8.85  22419.64 <2e-16 ***
## factor(n):factor(noise):factor(k)     8    0.0    0.01     13.35 <2e-16 ***
## Residuals                         26973   10.7    0.00                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n ", rep("=",80), "\n Partial eta squared: \n", sep = "")
## 
##  ================================================================================
##  Partial eta squared:
eta_squared(anova_sil, partial = TRUE)
## # Effect Size for ANOVA (Type I)
## 
## Parameter                         | Eta2 (partial) |       95% CI
## -----------------------------------------------------------------
## factor(n)                         |           0.11 | [0.10, 1.00]
## factor(noise)                     |           0.98 | [0.98, 1.00]
## factor(k)                         |           0.87 | [0.87, 1.00]
## factor(n):factor(noise)           |           0.02 | [0.02, 1.00]
## factor(n):factor(k)               |           0.07 | [0.07, 1.00]
## factor(noise):factor(k)           |           0.77 | [0.77, 1.00]
## factor(n):factor(noise):factor(k) |       3.94e-03 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

Uji Asumsi ANOVA Silhoutte

plot(anova_sil)

Plot

# Mengubah tipe data beberapa variabel sehingga dapat divisualisasikan
results <- results %>%
  mutate(
    n = factor(n),
    noise = factor(noise),
    k = factor(k)
  )

Box-plot dan lince chart pengaruh ukuran sampel terhadap ARI dan Silhoutte

Boxplot

  1. ARI
ggplot(results,
       aes(x = n,
           y = ARI)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Ukuran Sampel"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/1.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = n,
           y = Mean_Silhouette)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/2.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(n) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_ARI,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Ukuran Sampel"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/3.png", width = 8, height = 6, dpi = 300)
  1. Silhouette
results %>%
  group_by(n) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_mean_Silhoutte,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Ukuran Sampel")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/4.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(n) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b)
  )
## # A tibble: 3 × 3
##   n     Mean_mean_a Mean_mean_b
##   <fct>       <dbl>       <dbl>
## 1 45           3.15        7.56
## 2 180          3.18        7.50
## 3 450          3.20        7.48

Box-plot dan line chart pengaruh tingkat noise terhadap ARI dan Silhoutte

Box-plot

  1. Ari
ggplot(results,
       aes(x = noise,
           y = ARI)) +
  geom_boxplot() +
  labs(
    x = "Tingkat Noise",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/5.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = noise,
           y = Mean_Silhouette)) +
  geom_boxplot() +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhouette Berdasarkan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/6.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. Ari
results %>%
  group_by(noise) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = noise,
      y = Mean_ARI,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/7.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
results %>%
  group_by(noise) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = noise,
      y = Mean_mean_Silhoutte,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Tingkat Noise")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/8.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(noise) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b)
  )
## # A tibble: 3 × 3
##   noise Mean_mean_a Mean_mean_b
##   <fct>       <dbl>       <dbl>
## 1 0.5          1.56        6.86
## 2 1.5          3.08        7.18
## 3 3            4.90        8.50

Box-plot dan lince chart pengaruh jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte

Box-plot

  1. ARI
ggplot(results,
       aes(x = k,
           y = ARI)) +
  geom_boxplot() +
  labs(
    x = "Jumlah Klaster",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Jumlah Klaster Algoritma K-Means"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/9.png", width = 8, height = 6, dpi = 300)
  1. silhoutte
ggplot(results,
       aes(x = k,
           y = Mean_Silhouette)) +
  geom_boxplot() +
  labs(
    x = "Jumlah Klaster",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhouette Berdasarkan Jumlah Klaster Algoritma K-means"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/10.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(k) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = k,
      y = Mean_ARI,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Jumlah Klaster",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Jumlah Klaster Algoritma K-Means"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/11.png", width = 8, height = 6, dpi = 300)
  1. silhoutte
results %>%
  group_by(k) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = k,
      y = Mean_mean_Silhoutte,
      group = 1
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Jumlah Klaster",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Jumlah Klaster Algoritma K-Means")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/12.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(k) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b)
  )
## # A tibble: 3 × 3
##   k     Mean_mean_a Mean_mean_b
##   <fct>       <dbl>       <dbl>
## 1 2            4.35        9.21
## 2 3            2.73        7.50
## 3 4            2.45        5.83

Box-plot dan lince chart pengaruh interaksi antara ukuran sampel dan tingkat noise terhadap ARI dan Silhoutte

Box-plot

  1. ARI
ggplot(results,
       aes(x = n,
           y = ARI,
           fill = noise)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Ukuran Sampel dan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/13.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = n,
           y = Mean_Silhouette,
           fill = noise)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel dan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/14.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(n, noise) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_ARI,
      group = noise,
      colour = noise
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Interaksi Ukuran Sampel dan Tingkat Noise"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/15.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
results %>%
  group_by(n, noise) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_mean_Silhoutte,
      group = noise,
      colour = noise
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Ukuran Sampel dan Tingkat Noise")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/16.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(n, noise) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b),
    .groups = 'drop'
  )
## # A tibble: 9 × 4
##   n     noise Mean_mean_a Mean_mean_b
##   <fct> <fct>       <dbl>       <dbl>
## 1 45    0.5          1.56        6.87
## 2 45    1.5          3.06        7.22
## 3 45    3            4.84        8.58
## 4 180   0.5          1.55        6.85
## 5 180   1.5          3.08        7.16
## 6 180   3            4.92        8.48
## 7 450   0.5          1.55        6.85
## 8 450   1.5          3.09        7.15
## 9 450   3            4.95        8.45

Box-plot dan lince chart pengaruh interaksi antara tingkat noise dan jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte

Box-plot

  1. ARI
ggplot(results,
       aes(x = noise,
           y = ARI,
           fill = k)) +
  geom_boxplot() +
  labs(
    x = "Tingkat Noise",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Tingkat Noise dan Jumlah Klaster K-Means"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/17.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = noise,
           y = Mean_Silhouette,
           fill = k)) +
  geom_boxplot() +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhoutte Berdasarkan Tingkat Noise dan Jumlah Klaster"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/18.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(noise, k) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = noise,
      y = Mean_ARI,
      group = k,
      colour = k
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Interaksi Tingkat Noise dan Jumlah Klaster"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/19.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
results %>%
  group_by(noise, k) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = noise,
      y = Mean_mean_Silhoutte,
      group = k,
      colour = k
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Tingkat Noise",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Tingkat Noise dan Jumlah Klaster K-Means")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/20.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(noise, k) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b),
    .groups = 'drop'
  )
## # A tibble: 9 × 4
##   noise k     Mean_mean_a Mean_mean_b
##   <fct> <fct>       <dbl>       <dbl>
## 1 0.5   2           2.97         8.60
## 2 0.5   3           0.886        7.01
## 3 0.5   4           0.811        4.96
## 4 1.5   2           4.20         8.85
## 5 1.5   3           2.64         7.14
## 6 1.5   4           2.40         5.54
## 7 3     2           5.88        10.2 
## 8 3     3           4.67         8.35
## 9 3     4           4.15         6.98

Box-plot dan lince chart pengaruh interaksi antara ukuran sampel dan jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte

Box-Plot

  1. ARI
ggplot(results,
       aes(x = n,
           y = ARI,
           fill = k)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = "ARI",
    title = "Distribusi ARI Berdasarkan Ukuran Sampel dan Jumlah Klaster K-Means"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/21.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = n,
           y = Mean_Silhouette,
           fill = k)) +
  geom_boxplot() +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(Silhouette)),
    title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel dan Jumlah Klaster"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/22.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(n, k) %>%
  summarise(
    Mean_ARI = mean(ARI)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_ARI,
      group = k,
      colour = k
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(ARI)),
    title = "Rata-rata ARI Berdasarkan Interaksi Ukuran Sampel dan Jumlah Klaster"
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/23.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
results %>%
  group_by(n, k) %>%
  summarise(
    Mean_mean_Silhoutte = mean(Mean_Silhouette)
  ) %>%
  ggplot(
    aes(
      x = n,
      y = Mean_mean_Silhoutte,
      group = k,
      colour = k
    )
  ) +
  geom_line() +
  geom_point(size = 3) +
  labs(
    x = "Ukuran Sampel",
    y = expression(bar(bar(Silhouette))),
    title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Ukuran Sampel dan Jumlah Klaster K-Means")))
  ) +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/24.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(n, k) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b),
    .groups = 'drop'
  )
## # A tibble: 9 × 4
##   n     k     Mean_mean_a Mean_mean_b
##   <fct> <fct>       <dbl>       <dbl>
## 1 45    2            4.34        9.28
## 2 45    3            2.72        7.52
## 3 45    4            2.40        5.87
## 4 180   2            4.35        9.19
## 5 180   3            2.74        7.49
## 6 180   4            2.47        5.81
## 7 450   2            4.35        9.16
## 8 450   3            2.75        7.49
## 9 450   4            2.50        5.79

Box-plot dan lince chart pengaruh interaksi antara ukuran sampel, noise, dan jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte

Box-Plot

  1. ARI
ggplot(results,
       aes(x = n, 
           y = ARI, 
           fill = k)) +      
  geom_boxplot(outlier.size = 1, alpha = 0.8) +
  # Memisahkan panel berdasarkan tingkat Noise
  facet_wrap(~ noise, labeller = label_both) + 
  labs(
    x = "Ukuran Sampel (n)",
    y = "ARI",
    fill = "Jumlah Klaster (k)",
    title = "Distribusi ARI berdasarkan Ukuran Sampel, Tingkat Noise, dan Jumlah Klaster"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
    strip.text = element_text(face = 'bold', size = 10), 
    legend.position = "bottom" 
  )

ggsave("C:/Users/LENOVO/Documents/Output R/25.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
ggplot(results,
       aes(x = n, 
           y = Mean_Silhouette, 
           fill = k)) +      
  geom_boxplot(outlier.size = 1, alpha = 0.8) +
  # Memisahkan panel berdasarkan tingkat Noise
  facet_wrap(~ noise, labeller = label_both) + 
  labs(
    x = "Ukuran Sampel (n)",
    y = expression(bar(bar(Silhouette))),
    fill = "Jumlah Klaster (k)",
    title = expression(bold(paste("Distribusi ", bar(bar(Silhouette)), " berdasarkan Ukuran Sampel, Tingkat Noise, dan Jumlah Klaster")))
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
    strip.text = element_text(face = 'bold', size = 10), 
    legend.position = "bottom" 
  )

ggsave("C:/Users/LENOVO/Documents/Output R/26.png", width = 8, height = 6, dpi = 300)

Line Chart

  1. ARI
results %>%
  group_by(n, noise, k) %>%
  summarise(
    Mean_ARI = mean(ARI),
    .groups = "drop"
  ) %>%

  ggplot(
    aes(
      x = n,
      y = Mean_ARI,
      group = k,
      color = k
    )
  ) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  # Memisahkan grafik berdasarkan tingkat Noise
  facet_wrap(~ noise, labeller = label_both) + 
  labs(
    x = "Ukuran Sampel (n)",
    y = expression(bar(ARI)),
    color = "Jumlah Klaster (k)",
    title = expression(bold(paste("Interaksi n, Noise, dan Jumlah Klaster terhadap ", bar(ARI))))
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
    strip.text = element_text(face = 'bold', size = 10) # Menebalkan judul panel noise
  )

ggsave("C:/Users/LENOVO/Documents/Output R/27.png", width = 8, height = 6, dpi = 300)
  1. Silhoutte
results %>%
  group_by(n, noise, k) %>%
  summarise(
    Mean_Sil = mean(Mean_Silhouette),
    .groups = "drop"
  ) %>%

  ggplot(
    aes(
      x = n,
      y = Mean_Sil,
      group = k,
      color = k
    )
  ) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  # Memisahkan grafik berdasarkan tingkat Noise
  facet_wrap(~ noise, labeller = label_both) + 
  labs(
    x = "Ukuran Sampel (n)",
    y = expression(bar(bar(Silhouette))),
    color = "Jumlah Klaster (k)",
    title = expression(bold(paste("Interaksi n, Noise, dan Jumlah Klaster terhadap ", bar(bar(Silhouette)))))
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
    strip.text = element_text(face = 'bold', size = 10) # Menebalkan judul panel noise
  )

ggsave("C:/Users/LENOVO/Documents/Output R/28.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
  group_by(n, noise, k) %>%
  summarise(
    Mean_mean_a = mean(Mean_a),
    Mean_mean_b = mean(Mean_b),
    .groups = 'drop'
  )
## # A tibble: 27 × 5
##    n     noise k     Mean_mean_a Mean_mean_b
##    <fct> <fct> <fct>       <dbl>       <dbl>
##  1 45    0.5   2           2.99         8.62
##  2 45    0.5   3           0.886        7.01
##  3 45    0.5   4           0.797        4.97
##  4 45    1.5   2           4.18         8.91
##  5 45    1.5   3           2.64         7.15
##  6 45    1.5   4           2.36         5.59
##  7 45    3     2           5.84        10.3 
##  8 45    3     3           4.63         8.39
##  9 45    3     4           4.04         7.06
## 10 180   0.5   2           2.96         8.59
## # ℹ 17 more rows