require(ppclust)
## Loading required package: ppclust
## Warning: package 'ppclust' was built under R version 3.5.1
require(factoextra)
## Loading required package: factoextra
## Warning: package 'factoextra' was built under R version 3.5.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
require(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(cluster)
## Loading required package: cluster
## Warning: package 'cluster' was built under R version 3.5.1
require(fclust)
## Loading required package: fclust
data(iris)
x=iris[,-5]
x[1:5,]
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
pairs(x, col=iris[,5])
cor(iris[,1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
require(psych)
## Loading required package: psych
## Warning: package 'psych' was built under R version 3.5.1
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:ppclust':
##
## pca
pairs.panels(iris[,-5], method = "pearson")
(FCM) is a soft custering algorithm proposed by Bezdek (1974; 1981). Unlike K-means algorithm in which each data object is the member of only one cluster, a data object is the member of all clusters with varying degrees of fuzzy memberhip between 0 and 1 in FCM. Hence, the data objects closer to the centers of clusters have higher degrees of membership than objects scattered in the borders of clusters.
res.fcm <- fcm(x, centers=3)
as.data.frame(res.fcm$u)[1:6,]
## Cluster 1 Cluster 2 Cluster 3
## 1 0.002304380 0.9966236 0.001072034
## 2 0.016649509 0.9758525 0.007497947
## 3 0.013759500 0.9798259 0.006414579
## 4 0.022465031 0.9674274 0.010107523
## 5 0.003761709 0.9944704 0.001767935
## 6 0.044806233 0.9345741 0.020619654
res.fcm$v0
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 5.5 2.5 4.0 1.3
## Cluster 2 4.8 3.4 1.9 0.2
## Cluster 3 6.7 3.1 5.6 2.4
res.fcm$v
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 5.888932 2.761069 4.363952 1.3973150
## Cluster 2 5.003966 3.414089 1.482816 0.2535463
## Cluster 3 6.775011 3.052382 5.646782 2.0535467
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 150
##
## Number of clusters: 3
##
## Crisp clustering vector:
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 3 3
## [106] 3 1 3 3 3 3 3 3 1 3 3 3 3 3 1 3 1 3 1 3 3 1 1 3 3 3 3 3 1 3 3 3 3 1 3
## [141] 3 3 1 3 3 3 1 3 3 1
##
## Initial cluster prototypes:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 5.5 2.5 4.0 1.3
## Cluster 2 4.8 3.4 1.9 0.2
## Cluster 3 6.7 3.1 5.6 2.4
##
## Final cluster prototypes:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 5.888932 2.761069 4.363952 1.3973150
## Cluster 2 5.003966 3.414089 1.482816 0.2535463
## Cluster 3 6.775011 3.052382 5.646782 2.0535467
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2
## Cluster 2 10.818752
## Cluster 3 2.946292 23.846049
##
## Difference between the initial and final cluster prototypes
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 0.38893236 0.26106936 0.36395164 0.09731504
## Cluster 2 0.20396596 0.01408886 -0.41718447 0.05354632
## Cluster 3 0.07501122 -0.04761773 0.04678178 -0.34645334
##
## Root Mean Squared Deviations (RMSD): 0.4865456
## Mean Absolute Deviation (MAD): 3.087891
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3
## 1 0.002304380 0.9966236 0.001072034
## 2 0.016649509 0.9758525 0.007497947
## 3 0.013759500 0.9798259 0.006414579
## 4 0.022465031 0.9674274 0.010107523
## 5 0.003761709 0.9944704 0.001767935
## ...
## Cluster 1 Cluster 2 Cluster 3
## 146 0.1063871 0.01126223 0.8823507
## 147 0.5075252 0.02579593 0.4666788
## 148 0.1564396 0.01211367 0.8314467
## 149 0.1890364 0.02158126 0.7893823
## 150 0.5817811 0.02691888 0.3913000
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 60 0.5075252 0.6697398 0.7826035 0.7963157 0.9164202 0.9737972
## Cluster 2 50 0.8413450 0.9541261 0.9645018 0.9763228 0.9850474 0.9995473
## Cluster 3 40 0.5006317 0.7807561 0.8351480 0.8604619 0.9122633 0.9888134
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7833975 0.6750962
##
## Within cluster sum of squares by cluster:
## 1 2 3
## 36.81767 15.15100 27.05750
## (between_SS / total_SS = 88.04%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
res.fcm <- fcm(x, centers=3, nstart=5)
res.fcm$func.val
## [1] 60.50571 60.50571 60.50571 60.50571 60.50571
res.fcm$iter
## [1] 47 49 49 49 48
res.fcm$best.start
## [1] 1
summary(res.fcm)
## Summary for 'res.fcm'
##
## Number of data objects: 150
##
## Number of clusters: 3
##
## Crisp clustering vector:
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [36] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [71] 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 3 3 3
## [106] 3 1 3 3 3 3 3 3 1 3 3 3 3 3 1 3 1 3 1 3 3 1 1 3 3 3 3 3 1 3 3 3 3 1 3
## [141] 3 3 1 3 3 3 1 3 3 1
##
## Initial cluster prototypes:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 6.7 3.1 5.6 2.4
## Cluster 2 5.0 3.2 1.2 0.2
## Cluster 3 7.7 2.6 6.9 2.3
##
## Final cluster prototypes:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 5.888932 2.761069 4.363952 1.3973150
## Cluster 2 5.003966 3.414089 1.482816 0.2535463
## Cluster 3 6.775011 3.052382 5.646782 2.0535467
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2
## Cluster 2 10.818752
## Cluster 3 2.946292 23.846049
##
## Difference between the initial and final cluster prototypes
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Cluster 1 -0.811067639 -0.3389306 -1.2360484 -1.00268496
## Cluster 2 0.003965961 0.2140889 0.2828155 0.05354632
## Cluster 3 -0.924988776 0.4523823 -1.2532182 -0.24645334
##
## Root Mean Squared Deviations (RMSD): 1.429003
## Mean Absolute Deviation (MAD): 9.093588
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3
## 1 0.002304380 0.9966236 0.001072034
## 2 0.016649509 0.9758525 0.007497947
## 3 0.013759500 0.9798259 0.006414579
## 4 0.022465031 0.9674274 0.010107523
## 5 0.003761709 0.9944704 0.001767935
## ...
## Cluster 1 Cluster 2 Cluster 3
## 146 0.1063871 0.01126223 0.8823507
## 147 0.5075252 0.02579593 0.4666788
## 148 0.1564396 0.01211367 0.8314467
## 149 0.1890364 0.02158126 0.7893823
## 150 0.5817811 0.02691888 0.3913000
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 60 0.5075252 0.6697398 0.7826035 0.7963157 0.9164202 0.9737972
## Cluster 2 50 0.8413450 0.9541261 0.9645018 0.9763228 0.9850474 0.9995473
## Cluster 3 40 0.5006317 0.7807561 0.8351480 0.8604619 0.9122633 0.9888134
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.7833975 0.6750962
##
## Within cluster sum of squares by cluster:
## 1 2 3
## 36.81767 15.15100 27.05750
## (between_SS / total_SS = 88.04%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
plotcluster(res.fcm, cp=1, trans=TRUE)
res.fcm2 <- ppclust2(res.fcm, "kmeans")
fviz_cluster(res.fcm2, data = x,
ellipse.type = "convex",
palette = "jco",
repel = TRUE)
res.fcm3 <- ppclust2(res.fcm, "fanny")
cluster::clusplot(scale(x), res.fcm3$cluster,
main = "Cluster plot of Iris data set",
color=TRUE, labels = 2, lines = 2, cex=1)
res.fcm4 <- ppclust2(res.fcm, "fclust")
# Fuzzy Silhouette Index:
idxsf <- SIL.F(res.fcm4$Xca, res.fcm4$U, alpha=1)
paste("Fuzzy Silhouette Index: ",idxsf)
## [1] "Fuzzy Silhouette Index: 0.809144574010203"
# Partition Entropy:
idxsf <- PE(res.fcm4$U)
paste("Partition Entropy: ",idxsf)
## [1] "Partition Entropy: 0.395491581126072"
# Partition Coefficient:
idxpc <- PC(res.fcm4$U)
paste("Partition Coefficient : ",idxpc)
## [1] "Partition Coefficient : 0.783397486906811"
# Modified Partition Coefficient:
idxmpc <- MPC(res.fcm4$U)
paste("Modified Partition Coefficient :",idxmpc)
## [1] "Modified Partition Coefficient : 0.675096230360217"
library(clusterSim)
## Warning: package 'clusterSim' was built under R version 3.5.1
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## This is package 'modeest' written by P. PONCET.
## For a complete list of functions, use 'library(help = "modeest")' or 'help.start()'.
cl1<-pam(iris[,1:4],4)
cl2<-pam(iris[,1:4],5)
clall<-cbind(cl1$clustering,cl2$clustering)
g<-index.Gap(iris[,1:4], clall, reference.distribution="unif", B=10,method="pam")
print(g)
## $gap
## [1] 1.640108
##
## $diffu
## [1] -0.01264678
cl2 <- pam(iris[,1:4], 5)
print(index.DB(iris[,1:4], cl2$clustering, centrotypes="centroids"))
## $DB
## [1] 0.8521864
##
## $r
## [1] 0.4022061 1.0097546 0.9862158 1.0097546 0.8530008
##
## $R
## [,1] [,2] [,3] [,4] [,5]
## [1,] Inf 0.3035383 0.4022061 0.2205134 0.1948721
## [2,] 0.3035383 Inf 0.9862158 1.0097546 0.5224259
## [3,] 0.4022061 0.9862158 Inf 0.4755973 0.3381286
## [4,] 0.2205134 1.0097546 0.4755973 Inf 0.8530008
## [5,] 0.1948721 0.5224259 0.3381286 0.8530008 Inf
##
## $d
## 1 2 3 4 5
## 1 0.000000 3.770374 2.779774 4.786721 5.910060
## 2 3.770374 0.000000 1.177785 1.088427 2.287814
## 3 2.779774 1.177785 0.000000 2.255340 3.456682
## 4 4.786721 1.088427 2.255340 0.000000 1.296947
## 5 5.910060 2.287814 3.456682 1.296947 0.000000
##
## $S
## [1] 0.5504725 0.5939802 0.5675697 0.5050635 0.6012333
##
## $centers
## [,1] [,2] [,3] [,4]
## [1,] 5.006000 3.428000 1.462000 0.246000
## [2,] 6.165000 2.852500 4.742500 1.580000
## [3,] 5.512500 2.583333 3.883333 1.191667
## [4,] 6.633333 3.066667 5.548148 2.100000
## [5,] 7.577778 3.144444 6.433333 2.122222
c<- pam(iris[,1:4],10)
index.G1(iris[,1:4],c$clustering)
## [1] 376.7972