This is R code for K-clustering practice.
#set seed to make example reproducible
set.seed(1234)
x <- rnorm(24, mean=rep(1:3, each=4), sd=0.2)
y <- rnorm(24, mean=rep(c(1,2,1), each=4), sd=0.2)
data <- data.frame(x, y)
plot(x, y, col="blue", pch=19, cex=1)
text(x+0.05, y+0.05, labels=as.character(1:24))
# k-means clustering
kmeansObj <- kmeans(data, centers=3)
names(kmeansObj)
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
# variance within clusters
kmeansObj$withinss
## [1] 0.09936525 0.56531349 0.59692116
kmeansObj$cluster
## [1] 2 2 2 2 3 3 3 3 1 1 1 1 2 2 2 2 3 3 3 3 1 1 1 1
plot(x, y, col=kmeansObj$cluster, pch=19, cex=1)
points(kmeansObj$centers, col=1:3, pch=4, cex=3, lwd=3)
par(mfrow=c(2,2), mar=c(3,3,3,3))
for(i in 1:4){
chartName <- paste("Chart",i, sep="_")
kmeansObj <- kmeans(data, centers=4)
plot(x, y, col=kmeansObj$cluster, pch=19, cex=1, main=chartName)
points(kmeansObj$centers, col=1:5, pch=4, cex=3, lwd=3)
}
# import package "cluster"
library(cluster)
kmedoidObj <- pam(x=data, k=4) # Partition Around Medoids = pam
names(kmedoidObj)
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
# objective value
kmedoidObj$objective
## build swap
## 0.1722613 0.1704448
par(mfrow=c(2,2), mar=c(3,3,3,3))
for(i in 1:4){
plot(x, y, col=kmedoidObj$clustering, pch=19, cex=1)
points(kmedoidObj$medoids, col=1:4, pch=4, cex=3, lwd=3)
}