data(iris)
summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
Min. :4.30 Min. :2.00 Min. :1.00 Min. :0.1 setosa :50
1st Qu.:5.10 1st Qu.:2.80 1st Qu.:1.60 1st Qu.:0.3 versicolor:50
Median :5.80 Median :3.00 Median :4.35 Median :1.3 virginica :50
Mean :5.84 Mean :3.06 Mean :3.76 Mean :1.2
3rd Qu.:6.40 3rd Qu.:3.30 3rd Qu.:5.10 3rd Qu.:1.8
Max. :7.90 Max. :4.40 Max. :6.90 Max. :2.5
plot(Sepal.Width ~ Sepal.Length, iris, pch = 16)
library(ggplot2)
ggplot(iris, aes(Sepal.Length, Sepal.Width)) + geom_point()
res.kmeans <- lapply(1:10, function(i) {
kmeans(iris[,c("Sepal.Length","Sepal.Width")], centers = i)
})
## SS for each cluster (1 cluster to 10 clusters)
lapply(res.kmeans, function(x) x$withinss)
[[1]]
[1] 130.48
[[2]]
[1] 30.044 28.404
[[3]]
[1] 13.129 12.622 11.300
[[4]]
[1] 10.6341 4.4517 4.6300 8.2506
[[5]]
[1] 3.8373 3.6096 3.1929 4.8527 6.0237
[[6]]
[1] 3.1440 3.8262 4.3523 1.2296 3.1762 2.3745
[[7]]
[1] 2.30828 2.54500 1.55520 3.09286 0.64875 2.28316 3.05517
[[8]]
[1] 1.42960 3.13687 0.58571 0.74800 1.31048 1.99529 1.72375 2.27067
[[9]]
[1] 0.83524 1.99529 2.37455 0.47429 0.90476 0.39333 0.42200 3.13687 1.50000
[[10]]
[1] 1.16917 1.00381 0.53143 0.60278 0.48800 0.81882 1.11130 2.37455 1.01444 1.51389
## Sum up SS
res.within.ss <- sapply(res.kmeans, function(x) sum(x$withinss))
plot(1:10, res.within.ss, type = "b", xlab = "Number of clusters", ylab = "Within SS")
ggplot(data.frame(cluster = 1:10, within.ss = res.within.ss), aes(cluster, within.ss)) +
geom_point() + geom_line() +
scale_x_continuous(breaks = 0:10)
cluster.colors <- lapply(res.kmeans, function(x) x$cluster)
library(plyr)
l_ply(cluster.colors,
function(colors) {
plot(Sepal.Width ~ Sepal.Length, iris, col = colors, main = paste(nlevels(factor(colors))), pch = 16)
})
l_ply(cluster.colors,
function(colors) {
plot.dat <- cbind(iris, cluster = factor(colors))
gg.obj <- ggplot(plot.dat, aes(Sepal.Length, Sepal.Width, color = cluster)) +
geom_point() + labs(title = paste(nlevels(factor(colors))))
print(gg.obj)
})
plot(Sepal.Width ~ Sepal.Length, iris, col = Species, pch = 16)
library(ggplot2)
ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + geom_point()