# partitioning clustering
# case 1
data(iris)
newiris <- iris
newiris$Species <- NULL
kc <- kmeans(newiris, 3)
kc
## K-means clustering with 3 clusters of sizes 38, 62, 50
##
## Cluster means:
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 6.850 3.074 5.742 2.071
## 2 5.902 2.748 4.394 1.434
## 3 5.006 3.428 1.462 0.246
##
## Clustering vector:
## [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
## [106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
## [141] 1 1 2 1 1 1 2 1 1 2
##
## Within cluster sum of squares by cluster:
## [1] 23.88 39.82 15.15
## (between_SS / total_SS = 88.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size"
table(iris$Species, kc$cluster)
##
## 1 2 3
## setosa 0 0 50
## versicolor 2 48 0
## virginica 36 14 0
plot(newiris[c("Sepal.Length", "Sepal.Width")], col = kc$cluster)
points(kc$centers[, c("Sepal.Length", "Sepal.Width")], col = 1:3, pch = 8, cex = 2)
# case 2
data(iris)
mydata <- na.omit(iris[, -5])
mydata <- scale(mydata)
wss <- 0
for (i in 1:15) wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
plot(1:15, wss, type = "b", xlab = "Number of Clusters", ylab = "Within group sum of squares")
fit <- kmeans(mydata, 3)
aggregate(mydata, by = list(fit$cluster), FUN = mean)
## Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 1 1.13218 0.08813 0.9928 1.0141
## 2 2 -1.01119 0.85041 -1.3006 -1.2507
## 3 3 -0.05005 -0.88043 0.3466 0.2806
aggregate(iris[, -5], by = list(fit$cluster), FUN = mean)
## Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 1 6.781 3.096 5.511 1.972
## 2 2 5.006 3.428 1.462 0.246
## 3 3 5.802 2.674 4.370 1.413
mydata.df <- data.frame(mydata, iris, fit$cluster)
table(mydata.df$fit.cluster, mydata.df$Species)
##
## setosa versicolor virginica
## 1 0 11 36
## 2 50 0 0
## 3 0 39 14
# hierarchical clustering
# case 3
mydata <- scale(iris[, -5])
d <- dist(mydata, method = "euclidean")
fit <- hclust(d, method = "ward")
plot(fit)
group <- cutree(fit, k = 3)
rect.hclust(fit, k = 3, border = "red")