Package yang diperlukan
library(MASS)
library(cluster)
library(mclust)
library(dplyr)
library(effectsize)
library(ggplot2)
Fungsi Pembangkit Data
generate_data <- function(n, sigma){
n_per_cluster <- n / 3
mu1 <- c(0, 0)
mu2 <- c(5, 5)
mu3 <- c(10, 0)
Sigma <- matrix(
c(sigma^2, 0,
0, sigma^2),
nrow = 2
)
cluster1 <- mvrnorm(
n = n_per_cluster,
mu = mu1,
Sigma = Sigma
)
cluster2 <- mvrnorm(
n = n_per_cluster,
mu = mu2,
Sigma = Sigma
)
cluster3 <- mvrnorm(
n = n_per_cluster,
mu = mu3,
Sigma = Sigma
)
# Gabungkan menjadi satu dataset
X <- rbind(
cluster1,
cluster2,
cluster3
)
# Buat label cluster
label_true <- c(
rep(1, n_per_cluster),
rep(2, n_per_cluster),
rep(3, n_per_cluster)
)
# Simpan dalam fungsi list
list(
X = X,
label_true = label_true
)
}
Fungsi menghitung parameter alpha dan beta Silhoutte
compute_ab <- function(X, cluster_labels, D = NULL){
if(is.null(D)){
D <- as.matrix(dist(X)) # secara default menggunakan Euclidean Distance
} else {
D <- as.matrix(D)
}
n <- nrow(X)
a <- numeric(n)
b <- numeric(n)
unique_clusters <- sort(unique(cluster_labels))
for(i in 1:n){
current_cluster <- cluster_labels[i]
# Proses perhitungan parameter alpha
same_cluster <- which(
cluster_labels == current_cluster
)
same_cluster <- setdiff(
same_cluster,
i
)
if(length(same_cluster) > 0){
a[i] <- mean(
D[i, same_cluster]
)
} else {
a[i] <- 0
}
# Proses perhitungan parameter beta
other_clusters <- setdiff(
unique_clusters,
current_cluster
)
b_candidates <- c()
for(cl in other_clusters){
idx <- which(
cluster_labels == cl
)
b_candidates <- c(
b_candidates,
mean(D[i, idx])
)
}
b[i] <- min(b_candidates)
}
list(
a = a,
b = b
)
}
Fungsi Setiap Replikasi Dalam Kombinasi Simulasi Tertentu
run_single_simulation <- function(
n,
sigma,
k
){
dat <- generate_data(
n = n,
sigma = sigma
)
X <- dat$X
label_true <- dat$label_true
km <- kmeans(
X,
centers = k,
nstart = 25
)
label_pred <- km$cluster
# Evaluasi ARI
ari <- adjustedRandIndex(
label_true,
label_pred
)
# Evaluasi Silhoutte
D <- dist(X)
sil <- silhouette(
label_pred,
D
)
mean_sil <- mean(
sil[, "sil_width"]
)
ab <- compute_ab(
X,
label_pred,
D = D
)
mean_a <- mean(ab$a)
sd_a <- sd(ab$a)
mean_b <- mean(ab$b)
sd_b <- sd(ab$b)
# Buat dalam bentuk data frame
data.frame(
ARI = ari,
Mean_Silhouette = mean_sil,
Mean_a = mean_a,
SD_a = sd_a,
Mean_b = mean_b,
SD_b = sd_b
)
}
Desain Simulasi
# Ukuran sampel
sample_sizes <- c(
45,
180,
450
)
# Tingkat noise
noise_levels <- c(
0.5,
1.5,
3.0
)
# Jumlah klaster dalam algoritma k-means
k_values <- c(
2,
3,
4
)
# Replikasi atau pengulangan
R <- 1000
Simulasi Monte Carlo
counter <- 1
# Banyaknya record dalam dataframe results
total_runs <-
length(sample_sizes) *
length(noise_levels) *
length(k_values) *
R
results_list <- vector("list", total_runs) # alokasi sekali
for(n in sample_sizes){
for(noise in noise_levels){
for(k in k_values){
for(rep in 1:R){
temp <- run_single_simulation(n = n, sigma = noise, k = k)
temp$n <- n
temp$noise <- noise
temp$k <- k
temp$replication <- rep
results_list[[counter]] <- temp
if(counter %% 100 == 0){
cat("Progress:", counter, "/", total_runs, "\n")
}
counter <- counter + 1
}
}
}
}
## Progress: 100 / 27000
## Progress: 200 / 27000
## Progress: 300 / 27000
## Progress: 400 / 27000
## Progress: 500 / 27000
## Progress: 600 / 27000
## Progress: 700 / 27000
## Progress: 800 / 27000
## Progress: 900 / 27000
## Progress: 1000 / 27000
## Progress: 1100 / 27000
## Progress: 1200 / 27000
## Progress: 1300 / 27000
## Progress: 1400 / 27000
## Progress: 1500 / 27000
## Progress: 1600 / 27000
## Progress: 1700 / 27000
## Progress: 1800 / 27000
## Progress: 1900 / 27000
## Progress: 2000 / 27000
## Progress: 2100 / 27000
## Progress: 2200 / 27000
## Progress: 2300 / 27000
## Progress: 2400 / 27000
## Progress: 2500 / 27000
## Progress: 2600 / 27000
## Progress: 2700 / 27000
## Progress: 2800 / 27000
## Progress: 2900 / 27000
## Progress: 3000 / 27000
## Progress: 3100 / 27000
## Progress: 3200 / 27000
## Progress: 3300 / 27000
## Progress: 3400 / 27000
## Progress: 3500 / 27000
## Progress: 3600 / 27000
## Progress: 3700 / 27000
## Progress: 3800 / 27000
## Progress: 3900 / 27000
## Progress: 4000 / 27000
## Progress: 4100 / 27000
## Progress: 4200 / 27000
## Progress: 4300 / 27000
## Progress: 4400 / 27000
## Progress: 4500 / 27000
## Progress: 4600 / 27000
## Progress: 4700 / 27000
## Progress: 4800 / 27000
## Progress: 4900 / 27000
## Progress: 5000 / 27000
## Progress: 5100 / 27000
## Progress: 5200 / 27000
## Progress: 5300 / 27000
## Progress: 5400 / 27000
## Progress: 5500 / 27000
## Progress: 5600 / 27000
## Progress: 5700 / 27000
## Progress: 5800 / 27000
## Progress: 5900 / 27000
## Progress: 6000 / 27000
## Progress: 6100 / 27000
## Progress: 6200 / 27000
## Progress: 6300 / 27000
## Progress: 6400 / 27000
## Progress: 6500 / 27000
## Progress: 6600 / 27000
## Progress: 6700 / 27000
## Progress: 6800 / 27000
## Progress: 6900 / 27000
## Progress: 7000 / 27000
## Progress: 7100 / 27000
## Progress: 7200 / 27000
## Progress: 7300 / 27000
## Progress: 7400 / 27000
## Progress: 7500 / 27000
## Progress: 7600 / 27000
## Progress: 7700 / 27000
## Progress: 7800 / 27000
## Progress: 7900 / 27000
## Progress: 8000 / 27000
## Progress: 8100 / 27000
## Progress: 8200 / 27000
## Progress: 8300 / 27000
## Progress: 8400 / 27000
## Progress: 8500 / 27000
## Progress: 8600 / 27000
## Progress: 8700 / 27000
## Progress: 8800 / 27000
## Progress: 8900 / 27000
## Progress: 9000 / 27000
## Progress: 9100 / 27000
## Progress: 9200 / 27000
## Progress: 9300 / 27000
## Progress: 9400 / 27000
## Progress: 9500 / 27000
## Progress: 9600 / 27000
## Progress: 9700 / 27000
## Progress: 9800 / 27000
## Progress: 9900 / 27000
## Progress: 10000 / 27000
## Progress: 10100 / 27000
## Progress: 10200 / 27000
## Progress: 10300 / 27000
## Progress: 10400 / 27000
## Progress: 10500 / 27000
## Progress: 10600 / 27000
## Progress: 10700 / 27000
## Progress: 10800 / 27000
## Progress: 10900 / 27000
## Progress: 11000 / 27000
## Progress: 11100 / 27000
## Progress: 11200 / 27000
## Progress: 11300 / 27000
## Progress: 11400 / 27000
## Progress: 11500 / 27000
## Progress: 11600 / 27000
## Progress: 11700 / 27000
## Progress: 11800 / 27000
## Progress: 11900 / 27000
## Progress: 12000 / 27000
## Progress: 12100 / 27000
## Progress: 12200 / 27000
## Progress: 12300 / 27000
## Progress: 12400 / 27000
## Progress: 12500 / 27000
## Progress: 12600 / 27000
## Progress: 12700 / 27000
## Progress: 12800 / 27000
## Progress: 12900 / 27000
## Progress: 13000 / 27000
## Progress: 13100 / 27000
## Progress: 13200 / 27000
## Progress: 13300 / 27000
## Progress: 13400 / 27000
## Progress: 13500 / 27000
## Progress: 13600 / 27000
## Progress: 13700 / 27000
## Progress: 13800 / 27000
## Progress: 13900 / 27000
## Progress: 14000 / 27000
## Progress: 14100 / 27000
## Progress: 14200 / 27000
## Progress: 14300 / 27000
## Progress: 14400 / 27000
## Progress: 14500 / 27000
## Progress: 14600 / 27000
## Progress: 14700 / 27000
## Progress: 14800 / 27000
## Progress: 14900 / 27000
## Progress: 15000 / 27000
## Progress: 15100 / 27000
## Progress: 15200 / 27000
## Progress: 15300 / 27000
## Progress: 15400 / 27000
## Progress: 15500 / 27000
## Progress: 15600 / 27000
## Progress: 15700 / 27000
## Progress: 15800 / 27000
## Progress: 15900 / 27000
## Progress: 16000 / 27000
## Progress: 16100 / 27000
## Progress: 16200 / 27000
## Progress: 16300 / 27000
## Progress: 16400 / 27000
## Progress: 16500 / 27000
## Progress: 16600 / 27000
## Progress: 16700 / 27000
## Progress: 16800 / 27000
## Progress: 16900 / 27000
## Progress: 17000 / 27000
## Progress: 17100 / 27000
## Progress: 17200 / 27000
## Progress: 17300 / 27000
## Progress: 17400 / 27000
## Progress: 17500 / 27000
## Progress: 17600 / 27000
## Progress: 17700 / 27000
## Progress: 17800 / 27000
## Progress: 17900 / 27000
## Progress: 18000 / 27000
## Progress: 18100 / 27000
## Progress: 18200 / 27000
## Progress: 18300 / 27000
## Progress: 18400 / 27000
## Progress: 18500 / 27000
## Progress: 18600 / 27000
## Progress: 18700 / 27000
## Progress: 18800 / 27000
## Progress: 18900 / 27000
## Progress: 19000 / 27000
## Progress: 19100 / 27000
## Progress: 19200 / 27000
## Progress: 19300 / 27000
## Progress: 19400 / 27000
## Progress: 19500 / 27000
## Progress: 19600 / 27000
## Progress: 19700 / 27000
## Progress: 19800 / 27000
## Progress: 19900 / 27000
## Progress: 20000 / 27000
## Progress: 20100 / 27000
## Progress: 20200 / 27000
## Progress: 20300 / 27000
## Progress: 20400 / 27000
## Progress: 20500 / 27000
## Progress: 20600 / 27000
## Progress: 20700 / 27000
## Progress: 20800 / 27000
## Progress: 20900 / 27000
## Progress: 21000 / 27000
## Progress: 21100 / 27000
## Progress: 21200 / 27000
## Progress: 21300 / 27000
## Progress: 21400 / 27000
## Progress: 21500 / 27000
## Progress: 21600 / 27000
## Progress: 21700 / 27000
## Progress: 21800 / 27000
## Progress: 21900 / 27000
## Progress: 22000 / 27000
## Progress: 22100 / 27000
## Progress: 22200 / 27000
## Progress: 22300 / 27000
## Progress: 22400 / 27000
## Progress: 22500 / 27000
## Progress: 22600 / 27000
## Progress: 22700 / 27000
## Progress: 22800 / 27000
## Progress: 22900 / 27000
## Progress: 23000 / 27000
## Progress: 23100 / 27000
## Progress: 23200 / 27000
## Progress: 23300 / 27000
## Progress: 23400 / 27000
## Progress: 23500 / 27000
## Progress: 23600 / 27000
## Progress: 23700 / 27000
## Progress: 23800 / 27000
## Progress: 23900 / 27000
## Progress: 24000 / 27000
## Progress: 24100 / 27000
## Progress: 24200 / 27000
## Progress: 24300 / 27000
## Progress: 24400 / 27000
## Progress: 24500 / 27000
## Progress: 24600 / 27000
## Progress: 24700 / 27000
## Progress: 24800 / 27000
## Progress: 24900 / 27000
## Progress: 25000 / 27000
## Progress: 25100 / 27000
## Progress: 25200 / 27000
## Progress: 25300 / 27000
## Progress: 25400 / 27000
## Progress: 25500 / 27000
## Progress: 25600 / 27000
## Progress: 25700 / 27000
## Progress: 25800 / 27000
## Progress: 25900 / 27000
## Progress: 26000 / 27000
## Progress: 26100 / 27000
## Progress: 26200 / 27000
## Progress: 26300 / 27000
## Progress: 26400 / 27000
## Progress: 26500 / 27000
## Progress: 26600 / 27000
## Progress: 26700 / 27000
## Progress: 26800 / 27000
## Progress: 26900 / 27000
## Progress: 27000 / 27000
# Gabungkan setelah loop selesai
results <- do.call(rbind, results_list)
Data Hasil Simulasi
# 5 data teratas
head(results, 5)
## ARI Mean_Silhouette Mean_a SD_a Mean_b SD_b n noise k
## 1 0.56 0.6343822 3.001019 1.332255 8.364889 1.127164 45 0.5 2
## 2 0.56 0.6490691 3.038576 1.674560 8.900508 1.280366 45 0.5 2
## 3 0.56 0.6407476 3.097378 1.658248 8.887228 1.316755 45 0.5 2
## 4 0.56 0.6434983 3.028490 1.414859 8.741211 1.331052 45 0.5 2
## 5 0.56 0.6455947 2.985892 1.566584 8.665588 1.351188 45 0.5 2
## replication
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
# Lima Data Terakhir
tail(results, 5)
## ARI Mean_Silhouette Mean_a SD_a Mean_b SD_b n noise k
## 26996 0.4681075 0.3661214 4.297360 1.226547 7.081494 2.034390 450 3 4
## 26997 0.4423655 0.3577192 4.286901 1.090775 6.923746 1.913432 450 3 4
## 26998 0.3798746 0.3589400 4.075811 1.108540 6.556188 1.697817 450 3 4
## 26999 0.4406424 0.3606018 4.194952 1.087018 6.839994 1.837484 450 3 4
## 27000 0.4609846 0.3653984 4.299116 1.065736 7.074528 1.856457 450 3 4
## replication
## 26996 996
## 26997 997
## 26998 998
## 26999 999
## 27000 1000
Data Ringkasan Kombinasi
summary_results <-
results %>%
group_by(
n,
noise,
k
) %>%
summarise(
Mean_ARI =
mean(ARI),
SD_ARI =
sd(ARI),
`Mean_mean(Silhouette)` =
mean(Mean_Silhouette),
`Mean_mean(a)` =
mean(Mean_a),
`Mean_mean(b)` =
mean(Mean_b),
.groups = "drop"
)
readr::write_excel_csv(summary_results, file = "C:/Users/LENOVO/Documents/Output R/Ringkasan Kombinasi Perlakuan.csv")
summary_results
## # A tibble: 27 × 8
## n noise k Mean_ARI SD_ARI `Mean_mean(Silhouette)` `Mean_mean(a)`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 45 0.5 2 0.56 0 0.643 2.99
## 2 45 0.5 3 1 0 0.873 0.886
## 3 45 0.5 4 0.877 0.0187 0.725 0.797
## 4 45 1.5 2 0.519 0.0422 0.512 4.18
## 5 45 1.5 3 0.958 0.0544 0.613 2.64
## 6 45 1.5 4 0.824 0.0551 0.537 2.36
## 7 45 3 2 0.368 0.0773 0.410 5.84
## 8 45 3 3 0.528 0.135 0.422 4.63
## 9 45 3 4 0.432 0.109 0.403 4.04
## 10 180 0.5 2 0.569 0 0.646 2.96
## # ℹ 17 more rows
## # ℹ 1 more variable: `Mean_mean(b)` <dbl>
ANOVA dan Asumsinya
1. ANOVA ARI
anova_ari <- aov(
ARI ~
factor(n) *
factor(noise) *
factor(k),
data = results
)
cat("Hasil ANOVA ARI: \n")
## Hasil ANOVA ARI:
summary(anova_ari)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(n) 2 0.3 0.2 76.67 <2e-16 ***
## factor(noise) 2 696.0 348.0 161044.61 <2e-16 ***
## factor(k) 2 572.6 286.3 132506.54 <2e-16 ***
## factor(n):factor(noise) 4 0.2 0.1 27.54 <2e-16 ***
## factor(n):factor(k) 4 0.1 0.0 10.20 3e-08 ***
## factor(noise):factor(k) 4 78.6 19.7 9095.19 <2e-16 ***
## factor(n):factor(noise):factor(k) 8 0.4 0.1 23.62 <2e-16 ***
## Residuals 26973 58.3 0.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n ", rep("=",80), "\n Partial eta squared: \n", sep = "")
##
## ================================================================================
## Partial eta squared:
eta_squared(anova_ari, partial = TRUE)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------------------------------
## factor(n) | 5.65e-03 | [0.00, 1.00]
## factor(noise) | 0.92 | [0.92, 1.00]
## factor(k) | 0.91 | [0.91, 1.00]
## factor(n):factor(noise) | 4.07e-03 | [0.00, 1.00]
## factor(n):factor(k) | 1.51e-03 | [0.00, 1.00]
## factor(noise):factor(k) | 0.57 | [0.57, 1.00]
## factor(n):factor(noise):factor(k) | 6.96e-03 | [0.01, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
2. Silhoutte
anova_sil <- aov(
Mean_Silhouette ~
factor(n) *
factor(noise) *
factor(k),
data = results
)
cat("Hasil ANOVA Silhoutte: \n")
## Hasil ANOVA Silhoutte:
summary(anova_sil)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(n) 2 1.3 0.63 1604.49 <2e-16 ***
## factor(noise) 2 536.3 268.13 678998.89 <2e-16 ***
## factor(k) 2 70.2 35.10 88889.59 <2e-16 ***
## factor(n):factor(noise) 4 0.2 0.05 123.65 <2e-16 ***
## factor(n):factor(k) 4 0.8 0.20 508.60 <2e-16 ***
## factor(noise):factor(k) 4 35.4 8.85 22419.64 <2e-16 ***
## factor(n):factor(noise):factor(k) 8 0.0 0.01 13.35 <2e-16 ***
## Residuals 26973 10.7 0.00
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n ", rep("=",80), "\n Partial eta squared: \n", sep = "")
##
## ================================================================================
## Partial eta squared:
eta_squared(anova_sil, partial = TRUE)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Eta2 (partial) | 95% CI
## -----------------------------------------------------------------
## factor(n) | 0.11 | [0.10, 1.00]
## factor(noise) | 0.98 | [0.98, 1.00]
## factor(k) | 0.87 | [0.87, 1.00]
## factor(n):factor(noise) | 0.02 | [0.02, 1.00]
## factor(n):factor(k) | 0.07 | [0.07, 1.00]
## factor(noise):factor(k) | 0.77 | [0.77, 1.00]
## factor(n):factor(noise):factor(k) | 3.94e-03 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Plot
# Mengubah tipe data beberapa variabel sehingga dapat divisualisasikan
results <- results %>%
mutate(
n = factor(n),
noise = factor(noise),
k = factor(k)
)
Box-plot dan lince chart pengaruh ukuran sampel terhadap ARI dan
Silhoutte
Boxplot
- ARI
ggplot(results,
aes(x = n,
y = ARI)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = "ARI",
title = "Distribusi ARI Berdasarkan Ukuran Sampel"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/1.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = n,
y = Mean_Silhouette)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/2.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(n) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = n,
y = Mean_ARI,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Ukuran Sampel"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/3.png", width = 8, height = 6, dpi = 300)
- Silhouette
results %>%
group_by(n) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = n,
y = Mean_mean_Silhoutte,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Ukuran Sampel")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/4.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(n) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b)
)
## # A tibble: 3 × 3
## n Mean_mean_a Mean_mean_b
## <fct> <dbl> <dbl>
## 1 45 3.15 7.56
## 2 180 3.18 7.50
## 3 450 3.20 7.48
Box-plot dan line chart pengaruh tingkat noise terhadap ARI dan
Silhoutte
Box-plot
- Ari
ggplot(results,
aes(x = noise,
y = ARI)) +
geom_boxplot() +
labs(
x = "Tingkat Noise",
y = "ARI",
title = "Distribusi ARI Berdasarkan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/5.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = noise,
y = Mean_Silhouette)) +
geom_boxplot() +
labs(
x = "Tingkat Noise",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhouette Berdasarkan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/6.png", width = 8, height = 6, dpi = 300)
Line Chart
- Ari
results %>%
group_by(noise) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = noise,
y = Mean_ARI,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Tingkat Noise",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/7.png", width = 8, height = 6, dpi = 300)
- Silhoutte
results %>%
group_by(noise) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = noise,
y = Mean_mean_Silhoutte,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Tingkat Noise",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Tingkat Noise")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/8.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(noise) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b)
)
## # A tibble: 3 × 3
## noise Mean_mean_a Mean_mean_b
## <fct> <dbl> <dbl>
## 1 0.5 1.56 6.86
## 2 1.5 3.08 7.18
## 3 3 4.90 8.50
Box-plot dan lince chart pengaruh jumlah klaster pada algoritma
K-Means terhadap ARI dan Silhoutte
Box-plot
- ARI
ggplot(results,
aes(x = k,
y = ARI)) +
geom_boxplot() +
labs(
x = "Jumlah Klaster",
y = "ARI",
title = "Distribusi ARI Berdasarkan Jumlah Klaster Algoritma K-Means"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/9.png", width = 8, height = 6, dpi = 300)
- silhoutte
ggplot(results,
aes(x = k,
y = Mean_Silhouette)) +
geom_boxplot() +
labs(
x = "Jumlah Klaster",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhouette Berdasarkan Jumlah Klaster Algoritma K-means"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/10.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(k) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = k,
y = Mean_ARI,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Jumlah Klaster",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Jumlah Klaster Algoritma K-Means"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/11.png", width = 8, height = 6, dpi = 300)
- silhoutte
results %>%
group_by(k) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = k,
y = Mean_mean_Silhoutte,
group = 1
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Jumlah Klaster",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Berdasarkan Jumlah Klaster Algoritma K-Means")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/12.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(k) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b)
)
## # A tibble: 3 × 3
## k Mean_mean_a Mean_mean_b
## <fct> <dbl> <dbl>
## 1 2 4.35 9.21
## 2 3 2.73 7.50
## 3 4 2.45 5.83
Box-plot dan lince chart pengaruh interaksi antara ukuran sampel dan
tingkat noise terhadap ARI dan Silhoutte
Box-plot
- ARI
ggplot(results,
aes(x = n,
y = ARI,
fill = noise)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = "ARI",
title = "Distribusi ARI Berdasarkan Ukuran Sampel dan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/13.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = n,
y = Mean_Silhouette,
fill = noise)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel dan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/14.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(n, noise) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = n,
y = Mean_ARI,
group = noise,
colour = noise
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Interaksi Ukuran Sampel dan Tingkat Noise"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/15.png", width = 8, height = 6, dpi = 300)
- Silhoutte
results %>%
group_by(n, noise) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = n,
y = Mean_mean_Silhoutte,
group = noise,
colour = noise
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Ukuran Sampel dan Tingkat Noise")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/16.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(n, noise) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b),
.groups = 'drop'
)
## # A tibble: 9 × 4
## n noise Mean_mean_a Mean_mean_b
## <fct> <fct> <dbl> <dbl>
## 1 45 0.5 1.56 6.87
## 2 45 1.5 3.06 7.22
## 3 45 3 4.84 8.58
## 4 180 0.5 1.55 6.85
## 5 180 1.5 3.08 7.16
## 6 180 3 4.92 8.48
## 7 450 0.5 1.55 6.85
## 8 450 1.5 3.09 7.15
## 9 450 3 4.95 8.45
Box-plot dan lince chart pengaruh interaksi antara tingkat noise dan
jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte
Box-plot
- ARI
ggplot(results,
aes(x = noise,
y = ARI,
fill = k)) +
geom_boxplot() +
labs(
x = "Tingkat Noise",
y = "ARI",
title = "Distribusi ARI Berdasarkan Tingkat Noise dan Jumlah Klaster K-Means"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/17.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = noise,
y = Mean_Silhouette,
fill = k)) +
geom_boxplot() +
labs(
x = "Tingkat Noise",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhoutte Berdasarkan Tingkat Noise dan Jumlah Klaster"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/18.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(noise, k) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = noise,
y = Mean_ARI,
group = k,
colour = k
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Tingkat Noise",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Interaksi Tingkat Noise dan Jumlah Klaster"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/19.png", width = 8, height = 6, dpi = 300)
- Silhoutte
results %>%
group_by(noise, k) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = noise,
y = Mean_mean_Silhoutte,
group = k,
colour = k
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Tingkat Noise",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Tingkat Noise dan Jumlah Klaster K-Means")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/20.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(noise, k) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b),
.groups = 'drop'
)
## # A tibble: 9 × 4
## noise k Mean_mean_a Mean_mean_b
## <fct> <fct> <dbl> <dbl>
## 1 0.5 2 2.97 8.60
## 2 0.5 3 0.886 7.01
## 3 0.5 4 0.811 4.96
## 4 1.5 2 4.20 8.85
## 5 1.5 3 2.64 7.14
## 6 1.5 4 2.40 5.54
## 7 3 2 5.88 10.2
## 8 3 3 4.67 8.35
## 9 3 4 4.15 6.98
Box-plot dan lince chart pengaruh interaksi antara ukuran sampel dan
jumlah klaster pada algoritma K-Means terhadap ARI dan Silhoutte
Box-Plot
- ARI
ggplot(results,
aes(x = n,
y = ARI,
fill = k)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = "ARI",
title = "Distribusi ARI Berdasarkan Ukuran Sampel dan Jumlah Klaster K-Means"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/21.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = n,
y = Mean_Silhouette,
fill = k)) +
geom_boxplot() +
labs(
x = "Ukuran Sampel",
y = expression(bar(Silhouette)),
title = "Distribusi Mean Silhouette Berdasarkan Ukuran Sampel dan Jumlah Klaster"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/22.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(n, k) %>%
summarise(
Mean_ARI = mean(ARI)
) %>%
ggplot(
aes(
x = n,
y = Mean_ARI,
group = k,
colour = k
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(ARI)),
title = "Rata-rata ARI Berdasarkan Interaksi Ukuran Sampel dan Jumlah Klaster"
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = 'bold'))

ggsave("C:/Users/LENOVO/Documents/Output R/23.png", width = 8, height = 6, dpi = 300)
- Silhoutte
results %>%
group_by(n, k) %>%
summarise(
Mean_mean_Silhoutte = mean(Mean_Silhouette)
) %>%
ggplot(
aes(
x = n,
y = Mean_mean_Silhoutte,
group = k,
colour = k
)
) +
geom_line() +
geom_point(size = 3) +
labs(
x = "Ukuran Sampel",
y = expression(bar(bar(Silhouette))),
title = expression(bold(paste(bar(bar(Silhouette))," Interaksi Ukuran Sampel dan Jumlah Klaster K-Means")))
) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))

ggsave("C:/Users/LENOVO/Documents/Output R/24.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(n, k) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b),
.groups = 'drop'
)
## # A tibble: 9 × 4
## n k Mean_mean_a Mean_mean_b
## <fct> <fct> <dbl> <dbl>
## 1 45 2 4.34 9.28
## 2 45 3 2.72 7.52
## 3 45 4 2.40 5.87
## 4 180 2 4.35 9.19
## 5 180 3 2.74 7.49
## 6 180 4 2.47 5.81
## 7 450 2 4.35 9.16
## 8 450 3 2.75 7.49
## 9 450 4 2.50 5.79
Box-plot dan lince chart pengaruh interaksi antara ukuran sampel,
noise, dan jumlah klaster pada algoritma K-Means terhadap ARI dan
Silhoutte
Box-Plot
- ARI
ggplot(results,
aes(x = n,
y = ARI,
fill = k)) +
geom_boxplot(outlier.size = 1, alpha = 0.8) +
# Memisahkan panel berdasarkan tingkat Noise
facet_wrap(~ noise, labeller = label_both) +
labs(
x = "Ukuran Sampel (n)",
y = "ARI",
fill = "Jumlah Klaster (k)",
title = "Distribusi ARI berdasarkan Ukuran Sampel, Tingkat Noise, dan Jumlah Klaster"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
strip.text = element_text(face = 'bold', size = 10),
legend.position = "bottom"
)

ggsave("C:/Users/LENOVO/Documents/Output R/25.png", width = 8, height = 6, dpi = 300)
- Silhoutte
ggplot(results,
aes(x = n,
y = Mean_Silhouette,
fill = k)) +
geom_boxplot(outlier.size = 1, alpha = 0.8) +
# Memisahkan panel berdasarkan tingkat Noise
facet_wrap(~ noise, labeller = label_both) +
labs(
x = "Ukuran Sampel (n)",
y = expression(bar(bar(Silhouette))),
fill = "Jumlah Klaster (k)",
title = expression(bold(paste("Distribusi ", bar(bar(Silhouette)), " berdasarkan Ukuran Sampel, Tingkat Noise, dan Jumlah Klaster")))
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
strip.text = element_text(face = 'bold', size = 10),
legend.position = "bottom"
)

ggsave("C:/Users/LENOVO/Documents/Output R/26.png", width = 8, height = 6, dpi = 300)
Line Chart
- ARI
results %>%
group_by(n, noise, k) %>%
summarise(
Mean_ARI = mean(ARI),
.groups = "drop"
) %>%
ggplot(
aes(
x = n,
y = Mean_ARI,
group = k,
color = k
)
) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
# Memisahkan grafik berdasarkan tingkat Noise
facet_wrap(~ noise, labeller = label_both) +
labs(
x = "Ukuran Sampel (n)",
y = expression(bar(ARI)),
color = "Jumlah Klaster (k)",
title = expression(bold(paste("Interaksi n, Noise, dan Jumlah Klaster terhadap ", bar(ARI))))
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
strip.text = element_text(face = 'bold', size = 10) # Menebalkan judul panel noise
)

ggsave("C:/Users/LENOVO/Documents/Output R/27.png", width = 8, height = 6, dpi = 300)
- Silhoutte
results %>%
group_by(n, noise, k) %>%
summarise(
Mean_Sil = mean(Mean_Silhouette),
.groups = "drop"
) %>%
ggplot(
aes(
x = n,
y = Mean_Sil,
group = k,
color = k
)
) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
# Memisahkan grafik berdasarkan tingkat Noise
facet_wrap(~ noise, labeller = label_both) +
labs(
x = "Ukuran Sampel (n)",
y = expression(bar(bar(Silhouette))),
color = "Jumlah Klaster (k)",
title = expression(bold(paste("Interaksi n, Noise, dan Jumlah Klaster terhadap ", bar(bar(Silhouette)))))
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold', size = 12),
strip.text = element_text(face = 'bold', size = 10) # Menebalkan judul panel noise
)

ggsave("C:/Users/LENOVO/Documents/Output R/28.png", width = 8, height = 6, dpi = 300)
# Tabel Ringkasan
results %>%
group_by(n, noise, k) %>%
summarise(
Mean_mean_a = mean(Mean_a),
Mean_mean_b = mean(Mean_b),
.groups = 'drop'
)
## # A tibble: 27 × 5
## n noise k Mean_mean_a Mean_mean_b
## <fct> <fct> <fct> <dbl> <dbl>
## 1 45 0.5 2 2.99 8.62
## 2 45 0.5 3 0.886 7.01
## 3 45 0.5 4 0.797 4.97
## 4 45 1.5 2 4.18 8.91
## 5 45 1.5 3 2.64 7.15
## 6 45 1.5 4 2.36 5.59
## 7 45 3 2 5.84 10.3
## 8 45 3 3 4.63 8.39
## 9 45 3 4 4.04 7.06
## 10 180 0.5 2 2.96 8.59
## # ℹ 17 more rows