This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

MODIFIED FUZZY POSSIBILITY C-MEANS

# Memuat library
library(ppclust)
library(cluster)
library(fpc)
## 
## Attaching package: 'fpc'
## The following object is masked from 'package:ppclust':
## 
##     plotcluster
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
library(clusterSim)
## Loading required package: MASS
data <- read.csv('D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/datahasilolah.csv', sep = ",")
head(data)
x <- data[,3:8]

# minmax
min_max_norm <- function(x){
  (x - min(x)) / (max(x) - min(x))
}

x_scaled <- data.frame(x[0], lapply(x[0:6], min_max_norm))
head(x_scaled)
# Plot 2d
pca_result <- prcomp(x_scaled, center = TRUE, scale. = FALSE)

df_pca <- data.frame(
  PC1 = pca_result$x[, 1],
  PC2 = pca_result$x[, 2]
)

plot(df_pca$PC1, df_pca$PC2,
     xlab = "PC1",
     ylab = "PC2",
     main = "Scatter Plot 2D (PCA)",
     pch = 19,      # bentuk titik
     col = "blue")  # warna titik

grid()

PARAMETER MFPCM

#memntukan nilai k
k <- 3

# Parameter MFPCM
m <- 2.0  # Parameter fuzziness
eta <- 2.0  # Parameter typicality
dmetric <- "sqeuclidean"  # Metrik jarak
pw <- 2  # Parameter pangkat untuk metrik jarak
alginitv <- "kmpp"  # Algoritma inisialisasi pusat (K-means++)
alginitu <- "imembrand"  # Algoritma inisialisasi keanggotaan
nstart <- 1  # Jumlah percobaan dengan inisialisasi berbeda
iter.max <- 1000  # Iterasi maksimum
con.val <- 1e-9  # Nilai konvergensi
fixcent <- FALSE  # Pusat cluster tidak tetap
fixmemb <- FALSE  # Keanggotaan tidak tetap
stand <- FALSE  # Data tidak distandarisasi lagi (sudah distandarisasi)
numseed <- 123  # Seed untuk reproduktibilitas
# Menjalankan algoritma MFPCM
mfpcm_result <- mfpcm(
  x = x_scaled,
  centers = k,
  m = m,
  eta = eta,
  dmetric = dmetric,
  pw = pw,
  alginitv = alginitv,
  alginitu = alginitu,
  nstart = nstart,
  iter.max = iter.max,
  con.val = con.val,
  fixcent = fixcent,
  fixmemb = fixmemb,
  stand = stand,
  numseed = numseed
)

PLOT 2D HASIL MFPCM

plot 2D area dan tanpa centroid

#plot hasil
cluster_labels <- max.col(mfpcm_result$u)  
pca_result <- prcomp(x_scaled, center = TRUE, scale. = FALSE)
df_pca <- data.frame(pca_result$x[, 1:2], Cluster = as.factor(cluster_labels))

fviz_cluster(list(data = df_pca[, 1:2], cluster = cluster_labels),
             ellipse.type = "norm",
             geom = "point",
             palette = "jco",
             ggtheme = theme_minimal())

plot 2D dengan centroid

#PLOT DENGAN CENTROIDNYA

cluster_labels <- max.col(mfpcm_result$u)

pca_result <- prcomp(x_scaled, center = TRUE, scale. = FALSE)
df_pca <- data.frame(
  PC1 = pca_result$x[, 1],
  PC2 = pca_result$x[, 2],
  Cluster = as.factor(cluster_labels)
)

centroids <- mfpcm_result$v

centroids_pca <- as.matrix(scale(centroids, 
                                 center = pca_result$center,
                                 scale = FALSE)) %*% pca_result$rotation[, 1:2]

centroids_df <- data.frame(
  PC1 = centroids_pca[, 1],
  PC2 = centroids_pca[, 2],
  Cluster = as.factor(1:nrow(centroids_pca))
)

plot_hasil_mfpcm <- ggplot(df_pca, aes(PC1, PC2, color = Cluster)) +
  geom_point(alpha = 0.7) +
  stat_ellipse(type = "norm", linetype = 2) +
  geom_point(data = centroids_df, aes(PC1, PC2), 
             color = "black", fill = "yellow", 
             size = 5, shape = 21, stroke = 1.2) +
  geom_label(data = centroids_df, aes(PC1, PC2, label = Cluster),
             color = "black", fill = "white", fontface = "bold") +
  theme_minimal() +
  ggtitle("MFPCM Clustering with Centroids")

plot_hasil_mfpcm

ggsave(
  filename = "D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/plot_MFPCM_k3_deafult.png",
  plot = plot_hasil_mfpcm,
  width = 8,
  height = 6,
  dpi = 300
)

EVALUASI CLUSTER

BSS/TSS : ketika ratio bss/tss nya mendekati 1 menunjukkan nilai centroid klaster 1 dengan yang lain semakin terpisah. semakin terpisah centroidnya maka semakin tinggi nilai bss/tss

tss <- sum(scale(x_scaled, scale = FALSE)^2)

wss <- 0
for (k in unique(cluster_labels)) {
  cluster_data <- x_scaled[cluster_labels == k, ]
  center <- colMeans(cluster_data)
  wss <- wss + sum(scale(cluster_data, center = center, scale = FALSE)^2)
}

bss <- tss - wss

bss_tss_ratio <- bss / tss
bss_tss_ratio
## [1] 0.7584972

SILHOUETTE SCORE : nilai SS semakin mendekati 1 maka semakin baik. slihoutte yang tinggi menunjukkan semakin tidak terjadi overlapig pada setiap cluster

d <- dist(x_scaled)  # jarak Euclidean
sil <- silhouette(cluster_labels, d)

silhouette_score <- mean(sil[, 3])
silhouette_score
## [1] 0.5051579

DBI INDEX : cluster dikatakan lebih baik jika nilai DB Index semakin kecil. mengukur seberapa kompak cluster dan seberapa jauh antar centroid cluste

db_index <- index.DB(x_scaled, cluster_labels)$DB
db_index
## [1] 1.507572

BSS/TSS = 0.75 ( tinggi)
Silhouette = 0.50 (kurang)
DB Index = 1.50 (buruk)

Interpretasi:

  • centroid cluster jauh (makanya BSS/TSS tinggi)

  • tetapi data masing-masing cluster overlap (makanya silhouette rendah)

  • cluster juga tidak kompak / beda ukuran (makanya DB jelek)

→ artinya cluster “terpisah secara centroid”, tapi titik-titiknya saling campur.

Hyperparameter Tuning

data <- read.csv('D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/datahasilolah.csv', sep = ",")

x <- data[,3:8]
# minmax
min_max_norm <- function(x){
  (x - min(x)) / (max(x) - min(x))
}
x_scaled <- data.frame(x[0], lapply(x[-1], min_max_norm))


# tuning hyperparameter
library(inaparc)

k_values  <- 2:6
m_values  <- c(1.8, 2.0, 2.2)
eta_values <- c(1.5, 2.0, 2.5)


total_iter <- length(k_values) * length(m_values) * length(eta_values)


pb <- txtProgressBar(min = 0, max = total_iter, style = 3)
##   |                                                                              |                                                                      |   0%
hasil_tuning_mfpcm <- data.frame()
iter <- 0

for (k in k_values) {
  for (m in m_values) {
    for (eta in eta_values) {

      iter <- iter + 1
      setTxtProgressBar(pb, iter)  

      # Inisialisasi
      v <- inaparc::kmpp(x_scaled, k = k)$v
      u <- inaparc::imembrand(nrow(x_scaled), k = k)$u

      mfpcm_result <- mfpcm(
        x = x_scaled,
        centers = v,
        memberships = u,
        m = m,
        eta = eta
      )

      cluster_label <- max.col(mfpcm_result$u)

      # --- Silhouette ---
      sil <- silhouette(cluster_label, dist(x_scaled))
      sil_score <- mean(sil[,3])

      # --- DBI ---
      dbi <- clusterSim::index.DB(x_scaled, cluster_label)$DB

      hasil_tuning_mfpcm <- rbind(
        hasil_tuning_mfpcm,
        data.frame(
          K = k,
          m = m,
          eta = eta,
          Silhouette = sil_score,
          DBI = dbi
        )
      )
    }
  }
}
##   |                                                                              |==                                                                    |   2%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   9%  |                                                                              |========                                                              |  11%  |                                                                              |=========                                                             |  13%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |================                                                      |  22%  |                                                                              |=================                                                     |  24%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  29%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  33%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  38%  |                                                                              |============================                                          |  40%  |                                                                              |==============================                                        |  42%  |                                                                              |===============================                                       |  44%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  49%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  53%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  58%  |                                                                              |==========================================                            |  60%  |                                                                              |============================================                          |  62%  |                                                                              |=============================================                         |  64%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  69%  |                                                                              |==================================================                    |  71%  |                                                                              |===================================================                   |  73%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  78%  |                                                                              |========================================================              |  80%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  84%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  89%  |                                                                              |================================================================      |  91%  |                                                                              |=================================================================     |  93%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  98%  |                                                                              |======================================================================| 100%
close(pb)  
View(hasil_tuning_mfpcm)
write.csv(hasil_tuning_mfpcm, "D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/parameter_hasil_tuning_mfpcm(minmaxscaler).csv", row.names = FALSE)
#Menggunakan model dengan parameter hasil tuning
data <- read.csv('D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/datahasilolah.csv', sep = ",")
x <- data[,3:8]
min_max_norm <- function(x){
  (x - min(x)) / (max(x) - min(x))
}

x_scaled <- data.frame(x[0], lapply(x[0:6], min_max_norm))
set.seed(123)
# Parameter MFPCM
best_k <- 3
best_m <- 1.8  # Parameter fuzziness
best_eta <- 2.0  # Parameter typicality
dmetric <- "sqeuclidean"  # Metrik jarak
pw <- 2  # Parameter pangkat untuk metrik jarak
alginitv <- "kmpp"  # Algoritma inisialisasi pusat (K-means++)
alginitu <- "imembrand"  # Algoritma inisialisasi keanggotaan
nstart <- 1  # Jumlah percobaan dengan inisialisasi berbeda
iter.max <- 1000  # Iterasi maksimum
con.val <- 1e-9  # Nilai konvergensi
fixcent <- FALSE  # Pusat cluster tidak tetap
fixmemb <- FALSE  # Keanggotaan tidak tetap
stand <- FALSE  # Data tidak distandarisasi lagi (sudah distandarisasi)
numseed <- 123  # Seed untuk reproduktibilitas
# Menjalankan algoritma FPCM
mfpcm_best_result <- mfpcm(
  x = x_scaled,
  centers = best_k,
  m = best_m,
  eta = best_eta,
  dmetric = dmetric,
  pw = pw,
  alginitv = alginitv,
  alginitu = alginitu,
  nstart = nstart,
  iter.max = iter.max,
  con.val = con.val,
  fixcent = fixcent,
  fixmemb = fixmemb,
  stand = stand,
  numseed = numseed
)
best_cluster_label <- mfpcm_best_result$cluster 
tss <- sum(scale(x_scaled, scale = FALSE)^2)

wss <- 0
for (k_idx in unique(best_cluster_label)) {
  cluster_data <- x_scaled[best_cluster_label == k_idx, ]
  center <- colMeans(cluster_data)
  wss <- wss + sum(scale(cluster_data, center = center, scale = FALSE)^2)
}

bss <- tss - wss

bss_tss_ratio <- bss / tss
bss_tss_ratio
## [1] 0.7336564
# Cek apakah jumlah klaster sesuai dengan best_k
length(unique(best_cluster_label))
## [1] 3
# Cek parameter m
mfpcm_best_result$m
## [1] 1.8
# Cek parameter eta
mfpcm_best_result$eta
## [1] 2
#plot hasil
best_cluster_labels <- max.col(mfpcm_best_result$u)

pca_result <- prcomp(x_scaled, center = TRUE, scale. = FALSE)
df_pca <- data.frame(
  PC1 = pca_result$x[, 1],
  PC2 = pca_result$x[, 2],
  Cluster = as.factor(best_cluster_labels)
)

centroids <- mfpcm_best_result$v

centroids_pca <- as.matrix(scale(centroids, 
                                 center = pca_result$center,
                                 scale = FALSE)) %*% pca_result$rotation[, 1:2]

centroids_df <- data.frame(
  PC1 = centroids_pca[, 1],
  PC2 = centroids_pca[, 2],
  Cluster = as.factor(1:nrow(centroids_pca))
)

plot_hasil_best_mfpcm <- ggplot(df_pca, aes(PC1, PC2, color = Cluster)) +
  geom_point(alpha = 0.7) +
  stat_ellipse(type = "norm", linetype = 2) +
  geom_point(data = centroids_df, aes(PC1, PC2), 
             color = "black", fill = "yellow", 
             size = 5, shape = 21, stroke = 1.2) +
  geom_label(data = centroids_df, aes(PC1, PC2, label = Cluster),
             color = "black", fill = "white", fontface = "bold") +
  theme_minimal() +
  ggtitle("MFPCM Clustering with Centroids (Best m=1.8; eta=2.0 )")

print(plot_hasil_best_mfpcm)

ggsave(
  filename = "D:/E/PENS/Skripsi Proyek Akhir/Code/kode terbaru - data format new/plot_MFPCM_k3_BEST.png",
  plot = plot_hasil_mfpcm,
  width = 8,
  height = 6,
  dpi = 300
)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
best_cluster_labels <- max.col(mfpcm_best_result$u)

pca_result <- prcomp(x_scaled, center = TRUE, scale. = FALSE)

df_pca <- data.frame(
  PC1 = pca_result$x[,1],
  PC2 = pca_result$x[,2],
  PC3 = pca_result$x[,3],
  Cluster = as.factor(best_cluster_labels)
)

plot_ly(df_pca,
        x = ~PC1,
        y = ~PC2,
        z = ~PC3,
        color = ~Cluster,
        colors = "Set1",
        type = "scatter3d",
        mode = "markers",
        marker = list(size = 4)) %>%
  layout(scene = list(
    xaxis = list(title = "PC1"),
    yaxis = list(title = "PC2"),
    zaxis = list(title = "PC3")
  ))

Hasil Parameter Tuning

parameter tuning terbagus selain 2 adalah

K m Eta Silhouette DBI BSS/TSS
3 1.8 2.0 0.7512844 0.6089492 0.7336564
4 1.8 2.0 0.7547028 0.6842738 -
3 2.0 1.5, 2, 2.5 0.5139118 1.4856 -

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.