# a 2-dimensional example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
colnames(x) <- c("x", "y")
#
kmeans_dist = data.frame(num=1:50,error=1:50,x1=1:50,y1=1:50,x2=1:50,y2=1:50)
#
options(warn=-1)
for (i in 1:50){
kmeans_dist$num[i]=i
cl <- kmeans(x, 2,nstart=20)
kmeans_dist$error[i]=cl$withinss
kmeans_dist$x1[i]=cl$centers[1]
kmeans_dist$y1[i]=cl$centers[2]
kmeans_dist$x2[i]=cl$centers[3]
kmeans_dist$y2[i]=cl$centers[4]
}
optimize_center = kmeans_dist[which.min(kmeans_dist$error),]
center= matrix(c(optimize_center[,"x1"],optimize_center[,"x2"],optimize_center[,"y1"],optimize_center[,"y2"]),nrow=2,
ncol=2,byrow=TRUE,dimnames=list(c("row1","row2"),c("x","y")))
cl_best <- kmeans(x, centers=center,2)
cl_best
## K-means clustering with 2 clusters of sizes 49, 51
##
## Cluster means:
## x y
## 1 -0.007721516 -0.02078729
## 2 0.991086783 1.03941709
##
## Clustering vector:
## [1] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 8.471624 10.733281
## (between_SS / total_SS = 73.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"