library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# iris.labels
iris.labels = iris$Species
table(iris.labels)
## iris.labels
## setosa versicolor virginica
## 50 50 50
iris_data <- iris[1:4]
# Scale data
iris_data_scale <- scale(iris_data)
# Distance
iris_data <- dist(iris_data_scale)
# Calculate how many clusters you need
# Within Sum Squares
fviz_nbclust(iris_data_scale, kmeans, method = "wss")+
labs(subtitle="Elbow Method")

# Kmeans
km.out <- kmeans(iris_data_scale, centers=3,nstart=100)
print(km.out)
## K-means clustering with 3 clusters of sizes 53, 47, 50
##
## Cluster means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 -0.05005221 -0.88042696 0.3465767 0.2805873
## 2 1.13217737 0.08812645 0.9928284 1.0141287
## 3 -1.01119138 0.85041372 -1.3006301 -1.2507035
##
## Clustering vector:
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1
## [75] 1 2 2 2 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 2 1 2 2 2 2
## [112] 2 2 1 1 2 2 2 2 1 2 1 2 1 2 2 1 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 1 2 2 2 1 2
## [149] 2 1
##
## Within cluster sum of squares by cluster:
## [1] 44.08754 47.45019 47.35062
## (between_SS / total_SS = 76.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
# Visualize the clustering algorithm results.
km.clusters<-km.out$cluster
rownames(iris_data_scale)<-paste(iris$Species, 1:dim(iris)[1], sep = "_")
fviz_cluster(list(data=iris_data_scale, cluster = km.clusters))

table(km.clusters, iris$Species)
##
## km.clusters setosa versicolor virginica
## 1 0 39 14
## 2 0 11 36
## 3 50 0 0