Fuzzy c-means merupakan pengelompokan suatu data yang menggunakan salah satu jenis soft clustering.Fuzzy C-Means adalah salah satu metode fuzzy clustering untuk mengelompokkan dataset menjadi K cluster.
Jumlah cluster (K) harus lebih besar dari 1. Fuzzy C-Means adalah pengembangan dari metode k-means dengan beberapa modifikasi.
berikut ini adalah tahapan algotitma fuzzy C-Means
1.Menentukan banyak cluster (k) yang akan dibuat.
2.Menentukan nilai proporsi setiap data poin secara random untuk masuk dalam suatu cluster.
3.Menghitung nilai centroid. Dalam menghitung nilai centroid, menggunakan formula berikut : \[C_j = \frac{\sum u_{ij}^mx}{\sum u_{ij}^m}\]
4.Menghtung ulang nilai proporsi setiap data poin untuk masuk pada setiap cluster. menggunakan formula berikut : \[u_{ij}^m = \frac{1}{\sum (\frac{|x_i - c_j|}{|x_i - c_k|})^\frac{2}{m-1}}\]
library (ppclust)
library (factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library (dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (cluster)
library(fclust)
Sebelum melakukan clustering,Import terlebih dahulu data yang kita butuhkan. Pada kali ini kita menggunakan dataset Penguin.
library(readxl)
data <- read_excel("D:/dataset/Data Penguin.xlsx")
data
data_penguin = data[-1]
data_penguin
library (psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:ppclust':
##
## pca
library(ppclust)
res.fcm <-fcm (data_penguin, centers = 5)
as.data.frame(res.fcm$u)[1:10,]
res.fcm$v0
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 48.5 15.0 219 4850
## Cluster 2 38.1 17.0 181 3175
## Cluster 3 55.9 17.0 228 5600
## Cluster 4 42.7 18.3 196 4075
## Cluster 5 46.8 16.5 189 3650
res.fcm$v
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 45.91966 15.21606 213.0069 4847.306
## Cluster 2 39.59229 17.54392 187.8470 3231.889
## Cluster 3 50.11423 15.80701 222.3897 5606.411
## Cluster 4 44.01479 17.74764 200.8494 4263.106
## Cluster 5 42.54310 18.34748 191.2722 3722.744
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 342
##
## Number of clusters: 5
##
## Crisp clustering vector:
## [1] 5 5 2 2 5 5 1 2 4 2 5 2 5 4 5 2 4 2 4 2 5 5 5 5 5 5 2 2 5 2 5 2 5 2 4 5 5
## [38] 2 1 2 5 2 4 2 1 2 2 2 4 5 4 2 4 2 5 5 5 2 5 2 4 5 4 2 5 2 4 2 4 5 5 5 4 5
## [75] 4 5 5 5 4 2 1 5 4 2 5 5 5 5 5 5 4 2 4 2 4 5 4 2 4 5 1 2 4 2 5 5 5 2 1 5 1
## [112] 2 4 5 4 2 5 2 2 2 5 2 5 2 4 2 4 2 4 2 5 5 4 2 5 2 5 2 4 2 2 2 5 2 5 4 2 2
## [149] 5 5 4 5 5 5 5 5 5 2 5 4 5 5 5 5 4 5 4 2 5 2 4 5 2 2 5 2 4 2 5 5 4 2 4 2 4
## [186] 5 5 5 1 2 4 5 5 5 5 5 4 2 4 2 5 2 5 5 4 2 2 2 4 5 5 5 5 5 4 2 5 4 5 4 3 4
## [223] 3 3 4 1 1 4 1 1 3 1 3 4 3 4 3 1 3 3 1 4 1 1 1 4 3 1 3 3 1 1 3 1 3 1 3 4 3
## [260] 5 3 4 1 3 1 4 3 1 3 1 3 4 1 1 1 4 1 4 3 4 3 4 3 1 3 1 3 1 3 1 1 1 1 1 3 1
## [297] 3 1 3 1 3 1 3 1 3 1 3 1 3 1 3 1 3 1 1 1 1 1 3 1 3 1 3 1 3 1 3 1 3 1 3 1 3
## [334] 4 3 1 3 1 1 3 1 3
##
## Initial cluster prototypes:
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 48.5 15.0 219 4850
## Cluster 2 38.1 17.0 181 3175
## Cluster 3 55.9 17.0 228 5600
## Cluster 4 42.7 18.3 196 4075
## Cluster 5 46.8 16.5 189 3650
##
## Final cluster prototypes:
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 45.91966 15.21606 213.0069 4847.306
## Cluster 2 39.59229 17.54392 187.8470 3231.889
## Cluster 3 50.11423 15.80701 222.3897 5606.411
## Cluster 4 44.01479 17.74764 200.8494 4263.106
## Cluster 5 42.54310 18.34748 191.2722 3722.744
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## Cluster 2 2610251.1
## Cluster 3 576347.3 5639665.3
## Cluster 4 341446.8 1063598.8 1804973.2
## Cluster 5 1265133.8 240959.7 3549236.5 292086.3
##
## Difference between the initial and final cluster prototypes
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 -2.580337 0.2160584 -5.993145 -2.694207
## Cluster 2 1.492292 0.5439221 6.846952 56.888612
## Cluster 3 -5.785767 -1.1929917 -5.610251 6.411371
## Cluster 4 1.314789 -0.5523628 4.849409 188.106417
## Cluster 5 -4.256901 1.8474850 2.272209 72.743568
##
## Root Mean Squared Deviations (RMSD): 93.9904
## Mean Absolute Deviation (MAD): 297.7592
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5
## 1 0.000708671 0.003181015 0.000247691 0.003238805 0.992623818
## 2 0.005192422 0.017658436 0.001745818 0.026544496 0.948858828
## 3 0.000148537 0.997718099 0.000068250 0.000369270 0.001695844
## 4 0.013960078 0.572636354 0.005861659 0.041229723 0.366312185
## 5 0.003526914 0.028931337 0.001321089 0.013450693 0.952769967
## ...
## Cluster 1 Cluster 2 Cluster 3 Cluster 4 Cluster 5
## 338 0.96798199 0.002039450 0.012592211 0.013342114 0.004044234
## 339 0.99992528 0.000004904 0.000022447 0.000037267 0.000010103
## 340 0.02425893 0.003117231 0.958874264 0.008940266 0.004809307
## 341 0.50420497 0.016189767 0.379426944 0.071443495 0.028734824
## 342 0.11675218 0.007586244 0.835395400 0.027590316 0.012675862
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 64 0.5037126 0.7183871 0.8197986 0.8692388 0.9456157 0.9999388
## Cluster 2 71 0.4756544 0.7521029 0.8345414 0.8879665 0.9673164 0.9977756
## Cluster 3 49 0.5001229 0.8210079 0.8629821 0.9108374 0.9803829 0.9997904
## Cluster 4 64 0.4600043 0.6692306 0.8075202 0.8607078 0.9671469 0.9987311
## Cluster 5 94 0.5023993 0.7251313 0.8254332 0.8627523 0.9758493 0.9998970
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7451125 0.6813907
##
## Within cluster sum of squares by cluster:
## 1 2 3 4 5
## 2135028 2498762 2807058 1626772 1906951
## (between_SS / total_SS = 95.08%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
res.fcm <- fcm(data_penguin, centers=4, nstart=10)
res.fcm$func.val
## [1] 11739109 11739109 11739109 11739109 11739109 11739109 11739109 11739109
## [9] 11739109 11739109
res.fcm$iter
## [1] 130 127 132 132 129 132 134 135 90 131
res.fcm$best.start
## [1] 1
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 342
##
## Number of clusters: 4
##
## Crisp clustering vector:
## [1] 1 1 4 4 1 1 3 4 1 4 1 4 1 3 1 4 3 4 1 4 4 1 1 1 1 4 4 4 1 4 1 4 1 4 1 1 4
## [38] 4 3 4 1 4 3 4 3 4 4 4 1 4 3 4 1 4 1 4 1 4 1 4 3 4 1 4 1 4 1 4 3 4 1 4 1 1
## [75] 1 1 1 4 1 4 3 1 1 4 4 1 4 1 4 4 3 4 3 4 3 1 3 4 1 1 3 4 1 4 4 1 1 4 3 1 3
## [112] 4 1 1 1 4 1 4 4 4 4 4 1 4 1 4 3 4 1 4 4 4 3 4 1 4 1 4 1 4 4 4 1 4 1 1 4 4
## [149] 1 1 1 4 1 1 4 1 1 4 1 1 1 1 1 1 1 4 1 4 1 4 3 4 4 4 1 4 1 4 1 1 3 4 3 4 1
## [186] 4 1 1 3 4 3 1 1 4 4 1 3 4 3 4 1 4 1 4 1 4 4 4 1 1 4 1 1 1 1 4 1 1 1 3 2 3
## [223] 2 2 3 3 2 3 2 3 2 3 2 1 2 1 2 3 2 2 3 3 3 3 3 1 2 3 2 2 3 3 2 2 2 3 2 3 2
## [260] 1 2 3 3 2 3 1 2 3 2 3 2 3 3 3 3 3 3 3 2 1 2 3 2 3 2 3 2 3 2 3 3 3 2 3 2 3
## [297] 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 3 3 2 3 2 3 2 3 2 3 2 3 2 3 2 3 2
## [334] 3 2 3 2 3 3 2 2 2
##
## Initial cluster prototypes:
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 41.3 21.1 195 4400
## Cluster 2 48.7 15.7 208 5350
## Cluster 3 37.0 16.9 185 3000
## Cluster 4 52.7 19.8 197 3725
##
## Final cluster prototypes:
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 43.27426 18.45995 194.5110 3895.212
## Cluster 2 49.82006 15.74988 221.8621 5557.892
## Cluster 3 45.29771 15.78941 209.1899 4688.798
## Cluster 4 40.01420 17.70322 188.5384 3307.929
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2 Cluster 3
## Cluster 2 2765305.4
## Cluster 3 630005.6 755506.4
## Cluster 4 344947.4 5063544.0 1907255.7
##
## Difference between the initial and final cluster prototypes
## Culmen.Length Culmen.Depth Flipper.Length Body.Mass
## Cluster 1 1.974261 -2.64004585 -0.4890197 -504.7884
## Cluster 2 1.120056 0.04988389 13.8620970 207.8923
## Cluster 3 8.297707 -1.11058594 24.1898972 1688.7977
## Cluster 4 -12.685795 -2.09678437 -8.4616403 -417.0707
##
## Root Mean Squared Deviations (RMSD): 911.7446
## Mean Absolute Deviation (MAD): 2895.527
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.877555766 0.005712083 0.021174800 0.09555735
## 2 0.950263601 0.002813389 0.011002442 0.03592057
## 3 0.008077252 0.000631219 0.001624163 0.98966736
## 4 0.091073867 0.004062859 0.011763285 0.89309999
## 5 0.630347092 0.010416243 0.035133544 0.32410312
## ...
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 338 0.04336857 0.11484274 0.82419871 0.017589978
## 339 0.02614376 0.04757857 0.91625347 0.010024193
## 340 0.01021753 0.95267286 0.03121532 0.005894287
## 341 0.04700803 0.62434521 0.30629103 0.022355727
## 342 0.01035685 0.93791483 0.04636996 0.005358357
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 102 0.4799810 0.7487436 0.8423650 0.8882931 0.9517204 0.9998587
## Cluster 2 55 0.5162548 0.8184940 0.8831996 0.9379148 0.9866536 0.9998800
## Cluster 3 88 0.4597556 0.6853438 0.8094714 0.8620170 0.9775407 0.9995412
## Cluster 4 97 0.4820959 0.7864087 0.8511891 0.8930451 0.9694579 0.9997757
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7673983 0.6898644
##
## Within cluster sum of squares by cluster:
## 1 2 3 4
## 3416139 3707131 4904659 4361364
## (between_SS / total_SS = 92.73%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
plotcluster(res.fcm, cp=3, trans=TRUE)
library(clusterSim)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Registered S3 method overwritten by 'e1071':
## method from
## print.fclust fclust
cl2 <- pam(data_penguin[,1:4], 5)
print(index.DB(data_penguin[,1:4], cl2$clustering, centrotypes="centroids"))
## $DB
## [1] 0.6054911
##
## $r
## [1] 0.6707952 0.6707952 0.5626484 0.5626484 0.5605683
##
## $R
## [,1] [,2] [,3] [,4] [,5]
## [1,] Inf 0.6707952 0.2912618 0.5544725 0.2169387
## [2,] 0.6707952 Inf 0.2353416 0.3367806 0.1953817
## [3,] 0.2912618 0.2353416 Inf 0.5626484 0.5605683
## [4,] 0.5544725 0.3367806 0.5626484 Inf 0.3027583
## [5,] 0.2169387 0.1953817 0.5605683 0.3027583 Inf
##
## $d
## 1 2 3 4 5
## 1 0.0000 496.7568 1047.8005 500.6397 1797.3615
## 2 496.7568 0.0000 1544.5279 997.3800 2294.0980
## 3 1047.8005 1544.5279 0.0000 547.1632 749.5729
## 4 500.6397 997.3800 547.1632 0.0000 1296.7225
## 5 1797.3615 2294.0980 749.5729 1296.7225 0.0000
##
## $S
## [1] 137.4574 195.7647 167.7269 140.1336 252.4599
##
## $centers
## [,1] [,2] [,3] [,4]
## [1,] 42.61000 18.39889 191.9556 3766.111
## [2,] 39.79375 17.67375 188.4750 3269.375
## [3,] 45.79516 15.25968 212.0323 4813.710
## [4,] 44.34035 17.80877 200.9474 4266.667
## [5,] 49.73962 15.74906 221.8679 5563.208