variety clustering technique using iris data

# partitioning clustering

# case 1
data(iris)
newiris <- iris
newiris$Species <- NULL
kc <- kmeans(newiris, 3)
kc

## K-means clustering with 3 clusters of sizes 38, 62, 50
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1        6.850       3.074        5.742       2.071
## 2        5.902       2.748        4.394       1.434
## 3        5.006       3.428        1.462       0.246
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [71] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1
## [106] 1 2 1 1 1 1 1 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1
## [141] 1 1 2 1 1 1 2 1 1 2
## 
## Within cluster sum of squares by cluster:
## [1] 23.88 39.82 15.15
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"

table(iris$Species, kc$cluster)

##             
##               1  2  3
##   setosa      0  0 50
##   versicolor  2 48  0
##   virginica  36 14  0

plot(newiris[c("Sepal.Length", "Sepal.Width")], col = kc$cluster)
points(kc$centers[, c("Sepal.Length", "Sepal.Width")], col = 1:3, pch = 8, cex = 2)

plot of chunk unnamed-chunk-1



# case 2
data(iris)
mydata <- na.omit(iris[, -5])
mydata <- scale(mydata)
wss <- 0
for (i in 1:15) wss[i] <- sum(kmeans(mydata, centers = i)$withinss)
plot(1:15, wss, type = "b", xlab = "Number of Clusters", ylab = "Within group sum of squares")

plot of chunk unnamed-chunk-1

fit <- kmeans(mydata, 3)
aggregate(mydata, by = list(fit$cluster), FUN = mean)

##   Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1       1      1.13218     0.08813       0.9928      1.0141
## 2       2     -1.01119     0.85041      -1.3006     -1.2507
## 3       3     -0.05005    -0.88043       0.3466      0.2806

aggregate(iris[, -5], by = list(fit$cluster), FUN = mean)

##   Group.1 Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1       1        6.781       3.096        5.511       1.972
## 2       2        5.006       3.428        1.462       0.246
## 3       3        5.802       2.674        4.370       1.413

mydata.df <- data.frame(mydata, iris, fit$cluster)
table(mydata.df$fit.cluster, mydata.df$Species)

##    
##     setosa versicolor virginica
##   1      0         11        36
##   2     50          0         0
##   3      0         39        14


# hierarchical clustering

# case 3
mydata <- scale(iris[, -5])
d <- dist(mydata, method = "euclidean")
fit <- hclust(d, method = "ward")
plot(fit)
group <- cutree(fit, k = 3)
rect.hclust(fit, k = 3, border = "red")

plot of chunk unnamed-chunk-1