Normalisasi data menggunakan fungsi custom
data <- as.data.frame(lapply(dataa, minMax))
head(data)
## overweight Poverty sanitation stunted undernourished wasting
## 1 0.8469388 0.3970588 0.00000000 0.0000000 0.08571429 0.0000000
## 2 0.3877551 0.6715686 0.24175824 0.5233918 0.12755102 0.5342466
## 3 1.0000000 0.2794118 0.92307692 0.5877193 0.16326531 1.0000000
## 4 0.3061224 0.6568627 0.61538462 0.3333333 0.10714286 0.9178082
## 5 0.3265306 0.6813725 1.00000000 0.4912281 0.10204082 0.8356164
## 6 0.5000000 0.1960784 0.06593407 0.3216374 0.00000000 0.9315068
summary(data)
## overweight Poverty sanitation stunted
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.3112 1st Qu.:0.2169 1st Qu.:0.06868 1st Qu.:0.2646
## Median :0.4439 Median :0.5270 Median :0.42857 Median :0.3596
## Mean :0.4959 Mean :0.4658 Mean :0.46703 Mean :0.3906
## 3rd Qu.:0.7832 3rd Qu.:0.6681 3rd Qu.:0.87637 3rd Qu.:0.5154
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## undernourished wasting
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0898 1st Qu.:0.5548
## Median :0.1122 Median :0.6986
## Mean :0.1887 Mean :0.6479
## 3rd Qu.:0.1276 3rd Qu.:0.8973
## Max. :1.0000 Max. :1.0000
corr <- cor(data) # Asumsikan variabel dimulai dari kolom kedua
ggcorrplot(corr, type = "lower", lab = TRUE, lab_size = 5, title="Matriks
Korelasi Data", ggtheme=theme_bw)

# Visualisasi pasangan variabel
pairs.panels(data, method = "pearson")

# multikolinearitas
q <- data # Sesuaikan kolom sesuai dengan data Anda
f <- cor(q) # Matriks korelasi
vif <- diag(solve(f)) # Menghitung VIF
print(vif)
## overweight Poverty sanitation stunted undernourished
## 4.760005 6.082476 4.931250 5.403900 3.551688
## wasting
## 2.463725
# VIF di atas 10 umumnya menunjukkan multikolinearitas tinggi
if (any(vif > 10)) {
cat("Ada multikolinearitas di antara beberapa variabel.\n")
} else {
cat("Tidak ada multikolinearitas yang signifikan.\n")
}
## Tidak ada multikolinearitas yang signifikan.
d <- dist(data, method = "euclidean")
print(d)
## 1 2 3 4 5 6 7
## 2 0.9517148
## 3 1.4969194 1.1023456
## 4 1.3010076 0.5745738 0.8906331
## 5 1.5137653 0.8193193 0.8131000 0.4250394
## 6 1.0693968 0.6959841 1.0458416 0.7507401 1.0892426
## 7 1.4672316 0.7304112 1.3241792 0.5676820 0.5821773 1.2050675
## 8 0.7197933 0.8828406 1.1247793 1.0062943 1.2992232 0.5269185 1.4208282
## 9 1.9996568 1.2862065 1.4080846 1.2060176 1.0736695 1.6444028 1.2046251
## 10 0.5441613 0.8750688 1.2433493 1.1714460 1.3780061 0.7663051 1.4662362
## 8 9
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 1.8416859
## 10 0.4877851 1.8106489
fviz_dist(d, gradient = list(low = "red", mid = "white", high = "black"))

nrow(data)
## [1] 10
# Menentukan jumlah klaster optimal dengan metode silhouette
fviz_nbclust(data, kmeans, method = "silhouette", k.max = 5) +
labs(subtitle = "Menentukan Jumlah Klaster Optimal dengan Metode Silhouette")

optimal_k <- 2 # Masukkan jumlah klaster optimal dari plot silhouette di langkah sebelumnya
res.fcm <- fcm(data, centers = optimal_k, nstart = 5)
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 10
##
## Number of clusters: 2
##
## Crisp clustering vector:
## [1] 1 2 2 2 2 1 2 1 2 1
##
## Initial cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.05102041 0.6578431 0.95604396 1.00000000 1.000000 0.7397260
## Cluster 2 0.79591837 0.1176471 0.05494505 0.01754386 0.127551 0.6575342
##
## Final cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.7112850 0.2143818 0.1206429 0.1944470 0.1111401 0.4729956
## Cluster 2 0.2956172 0.6828418 0.7695536 0.5206095 0.2375043 0.7842739
##
## Distance between the final cluster prototypes
## Cluster 1
## Cluster 2 1.032564
##
## Difference between the initial and final cluster prototypes
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.6602646 -0.4434613 -0.8354011 -0.8055530 -0.8888599 -0.2667304
## Cluster 2 -0.5003012 0.5651947 0.7146086 0.5030657 0.1099533 0.1267396
##
## Root Mean Squared Deviations (RMSD): 1.44953
## Mean Absolute Deviation (MAD): 19.2604
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2
## 1 0.85172320 0.1482768
## 2 0.45090211 0.5490979
## 3 0.38781519 0.6121848
## 4 0.10307555 0.8969244
## 5 0.05290276 0.9470972
## ...
## Cluster 1 Cluster 2
## 6 0.7566357 0.24336432
## 7 0.1465492 0.85345078
## 8 0.9402973 0.05970269
## 9 0.2423085 0.75769146
## 10 0.9368554 0.06314457
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 4 0.7566357 0.8279513 0.8713779 0.8942893 0.9377159 0.9402973
## Cluster 2 6 0.5490979 0.6485615 0.7694078 0.8055711 0.8860560 0.9470972
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7276098 0.4552195
##
## Within cluster sum of squares by cluster:
## 1 2
## 0.7651561 2.4158570
## (between_SS / total_SS = 43.79%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
# Menampilkan matriks keanggotaan fuzzy untuk beberapa observasi pertama
as.data.frame(res.fcm$u)
## Cluster 1 Cluster 2
## 1 0.85172321 0.14827679
## 2 0.45090211 0.54909789
## 3 0.38781519 0.61218481
## 4 0.10307555 0.89692445
## 5 0.05290276 0.94709724
## 6 0.75663568 0.24336432
## 7 0.14654922 0.85345078
## 8 0.94029731 0.05970269
## 9 0.24230854 0.75769146
## 10 0.93685543 0.06314457
res.fcm_ppclust <- ppclust2(res.fcm, "kmeans")
fviz_cluster(res.fcm_ppclust, data = data,
ellipse.type = "convex",
palette = "jco",
repel = TRUE)

# Menjalankan FCM dengan 2 klaster
res.fcm <- fcm(data, centers = 2 )
head(as.data.frame(res.fcm$u), 10) # Menampilkan matriks keanggotaan untuk 10 observasi pertama
## Cluster 1 Cluster 2
## 1 0.85172321 0.14827679
## 2 0.45090211 0.54909789
## 3 0.38781519 0.61218481
## 4 0.10307555 0.89692445
## 5 0.05290276 0.94709724
## 6 0.75663568 0.24336432
## 7 0.14654922 0.85345078
## 8 0.94029731 0.05970269
## 9 0.24230854 0.75769146
## 10 0.93685543 0.06314457
# Mengonversi hasil FCM ke objek ppclust dengan metode KMeans
res.fcm_ppclust <- ppclust2(res.fcm, "kmeans")
# Menentukan klaster untuk setiap data
data$cluster <- res.fcm_ppclust$cluster
# Menghitung jumlah data dalam setiap klaster
cluster_counts <- table(data$cluster)
print(cluster_counts)
##
## 1 2
## 4 6
# Menampilkan prototipe (pusat) klaster pada iterasi awal dan akhir
res.fcm$v0 # Prototipe awal
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 1.00000000 0.2794118 0.9230769 0.5877193 0.1632653 1.000000
## Cluster 2 0.05102041 0.6578431 0.9560440 1.0000000 1.0000000 0.739726
res.fcm$v # Prototipe akhir
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.7112850 0.2143818 0.1206429 0.1944470 0.1111401 0.4729956
## Cluster 2 0.2956172 0.6828418 0.7695536 0.5206095 0.2375043 0.7842739
summary(res.fcm) # Ringkasan hasil FCM
## Summary for 'res.fcm'
##
## Number of data objects: 10
##
## Number of clusters: 2
##
## Crisp clustering vector:
## [1] 1 2 2 2 2 1 2 1 2 1
##
## Initial cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 1.00000000 0.2794118 0.9230769 0.5877193 0.1632653 1.000000
## Cluster 2 0.05102041 0.6578431 0.9560440 1.0000000 1.0000000 0.739726
##
## Final cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.7112850 0.2143818 0.1206429 0.1944470 0.1111401 0.4729956
## Cluster 2 0.2956172 0.6828418 0.7695536 0.5206095 0.2375043 0.7842739
##
## Distance between the final cluster prototypes
## Cluster 1
## Cluster 2 1.032564
##
## Difference between the initial and final cluster prototypes
## overweight Poverty sanitation stunted undernourished
## Cluster 1 -0.2887150 -0.06502997 -0.8024340 -0.3932723 -0.05212517
## Cluster 2 0.2445968 0.02499866 -0.1864903 -0.4793905 -0.76249567
## wasting
## Cluster 1 -0.52700436
## Cluster 2 0.04454784
##
## Root Mean Squared Deviations (RMSD): 1.018585
## Mean Absolute Deviation (MAD): 11.6133
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2
## 1 0.85172320 0.1482768
## 2 0.45090211 0.5490979
## 3 0.38781519 0.6121848
## 4 0.10307555 0.8969244
## 5 0.05290276 0.9470972
## ...
## Cluster 1 Cluster 2
## 6 0.7566357 0.24336432
## 7 0.1465492 0.85345078
## 8 0.9402973 0.05970269
## 9 0.2423085 0.75769146
## 10 0.9368554 0.06314457
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 4 0.7566357 0.8279513 0.8713779 0.8942893 0.9377159 0.9402973
## Cluster 2 6 0.5490979 0.6485615 0.7694078 0.8055711 0.8860560 0.9470972
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7276098 0.4552195
##
## Within cluster sum of squares by cluster:
## 1 2
## 0.7651561 2.4158570
## (between_SS / total_SS = 43.79%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
# Menjalankan FCM dengan multiple start (misalnya nstart = 5) untuk hasil yang lebih stabil
res.fcm <- fcm(data, centers = 2, nstart = 5)
res.fcm$func.val # Nilai fungsi objektif untuk setiap iterasi
## [1] 2.680743 2.680743 2.680743 2.680743 2.680743
res.fcm$iter # Jumlah iterasi hingga konvergen
## [1] 17 16 17 17 16
res.fcm$best.start
## [1] 2
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 10
##
## Number of clusters: 2
##
## Crisp clustering vector:
## [1] 2 1 1 1 1 2 1 2 1 2
##
## Initial cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.3265306 0.6813725 1.00000000 0.4912281 0.1020408 0.8356164
## Cluster 2 0.7448980 0.0000000 0.07692308 0.2456140 0.1173469 0.2465753
## cluster
## Cluster 1 2
## Cluster 2 1
##
## Final cluster prototypes:
## overweight Poverty sanitation stunted undernourished wasting
## Cluster 1 0.3199186 0.6750937 0.7441243 0.5289705 0.23465186 0.7743921
## Cluster 2 0.7225500 0.1831765 0.0784540 0.1616056 0.09825264 0.4621842
## cluster
## Cluster 1 1.992852
## Cluster 2 1.040235
##
## Distance between the final cluster prototypes
## Cluster 1
## Cluster 2 2.005726
##
## Difference between the initial and final cluster prototypes
## overweight Poverty sanitation stunted undernourished
## Cluster 1 -0.006612011 -0.006278843 -0.255875690 0.03774239 0.1326110
## Cluster 2 -0.022347999 0.183176517 0.001530919 -0.08400843 -0.0190943
## wasting cluster
## Cluster 1 -0.06122438 -0.007148195
## Cluster 2 0.21560889 0.040235078
##
## Root Mean Squared Deviations (RMSD): 0.2982857
## Mean Absolute Deviation (MAD): 3.757231
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2
## 1 0.09947554 0.90052446
## 2 0.81482054 0.18517946
## 3 0.75550784 0.24449216
## 4 0.95236693 0.04763307
## 5 0.96482149 0.03517851
## ...
## Cluster 1 Cluster 2
## 6 0.14363736 0.85636263
## 7 0.90176871 0.09823129
## 8 0.03039546 0.96960454
## 9 0.81043916 0.18956084
## 10 0.03541554 0.96458446
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 6 0.7555078 0.8115345 0.8666208 0.8582946 0.9397174 0.9648215
## Cluster 2 4 0.8563626 0.8894840 0.9227690 0.9325545 0.9658395 0.9696045
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.8133326 0.6266652
##
## Within cluster sum of squares by cluster:
## 1 2
## 2.4158570 0.7651561
## (between_SS / total_SS = 60.21%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
# Plot scatter untuk mengecek klaster pada beberapa variabel yang dipilih
plotcluster(res.fcm, cp = 1, trans = TRUE)

library(factoextra)
# Konversi hasil FCM ke format untuk visualisasi
res.fcm2 <- ppclust2(res.fcm, "kmeans")
# Visualisasi klaster dengan fviz_cluster
fviz_cluster(res.fcm2, data = data,
ellipse.type = "convex",
palette = "jco",
repel = TRUE) +
labs(title = "Fuzzy C-Means Clustering dengan 8 Klaster")

library(cluster)
# Menggunakan clusplot untuk plot klaster
res.fcm3 <- ppclust2(res.fcm, "fanny")
cluster::clusplot(scale(data), res.fcm3$cluster,
main = "Cluster Plot of Data Set with 8 Clusters",
color = TRUE, labels = 2, lines = 2, cex = 1)

# Validasi Hasil Klaster
# Validasi hasil klaster menggunakan beberapa indeks
res.fcm4 <- ppclust2(res.fcm, "fclust")
# Menghitung Partition Entropy, Partition Coefficient, dan lainnya
idxsf <- SIL.F(res.fcm4$Xca, res.fcm4$U, alpha = 1) # Fuzzy Silhouette Index
idxpe <- PE(res.fcm4$U) # Partition Entropy
idxpc <- PC(res.fcm4$U) # Partition Coefficient
idxmpc <- MPC(res.fcm4$U) # Modified Partition Coefficient
cat("Partition Entropy: ", idxpe, "\n")
## Partition Entropy: 0.3210562
cat("Partition Coefficient: ", idxpc, "\n")
## Partition Coefficient: 0.8133326
cat("Modified Partition Coefficient: ", idxmpc, "\n")
## Modified Partition Coefficient: 0.6266652
cat("Fuzzy Silhouette Index: ", idxsf, "\n")
## Fuzzy Silhouette Index: 0.7280901