x = c(rnorm(20), rnorm(20, 3), seq(from = 1, to = 4, length.out = 20))
y = c(rnorm(20), rnorm(20, 3), seq(from = 0, to = 0.5, length.out = 20))
col = c(rep("red", 20), rep("blue", 20), rep("green", 20))
colLab = function(n, cols) {
leafs = order.dendrogram(n)
leaf.col = unique(cols[leafs])
if (length(leaf.col) == 1) {
attr(n, "edgePar") = list(col = leaf.col)
}
n
}
par(mfrow = c(2, 3), tck = -0.02, mgp = c(1.1, 0.2, 0), mar = c(2, 3, 1, 0),
oma = c(0, 0, 0, 1))
plot(x, y, col = col, pch = 19)
d = dist(cbind(x, y), method = "euclidean")
h = hclust(d)
plot(h)
dg = as.dendrogram(h)
dg = dendrapply(dg, colLab, col)
plot(dg, leaflab = "none", main = "Complete")
dg = dendrapply(as.dendrogram(hclust(d, method = "average")), colLab, col)
plot(dg, leaflab = "none", main = "average")
dg = dendrapply(as.dendrogram(hclust(d^2, method = "ward")), colLab, col)
plot(dg, leaflab = "none", main = "ward")
dg = dendrapply(as.dendrogram(hclust(d, method = "single")), colLab, col)
plot(dg, leaflab = "none", main = "Single")
par(mfrow = c(1, 3), tck = -0.02, mgp = c(1.1, 0.2, 0), mar = c(2, 3, 1, 0),
oma = c(0, 0, 0, 1))
h = hclust(d)
dg = dendrapply(as.dendrogram(h), colLab, col)
plot(dg, leaflab = "none", main = "Complete")
t = 4
abline(h = t, col = "red")
cl = cutree(h, h = t)
plot(x, y, col = cl, pch = 19, main = "by level")
cl = cutree(h, k = 6)
plot(x, y, col = cl, pch = 19, main = "by cluster count")
renameCl = function(x) {
ux = unique(x)
cl = ux
for (i in ux) cl[i] = mean(which(x == i))
cl = rank(cl)
r = x
for (i in 1:length(ux)) r[x == i] = cl[i]
r
}
# k-means
par(mfrow = c(2, 3), tck = -0.02, mgp = c(1.1, 0.2, 0), mar = c(2, 3, 1, 0),
oma = c(0, 0, 0, 1))
for (i in 1:6) {
cl = kmeans(cbind(x, y), centers = 4, nstart = 20)
plot(x, y, col = renameCl(cl$cluster), pch = 19)
cl$size
}
# k-medoids
library(cluster)
for (i in 1:6) {
cl = pam(d, k = 4)
plot(x, y, col = renameCl(cl$cluster), pch = 19)
}
par(mfrow = c(1, 1), tck = -0.02, mgp = c(1.1, 0.2, 0), mar = c(2, 3, 1, 0),
oma = c(0, 0, 0, 1))
cl = pam(d, k = 4)
slh = silhouette(cl$clustering, d)
plot(slh)
slh.m = matrix(ncol = 4, nrow = 15)
colnames(slh.m) = c("h.compl", "h.singl", "kmean", "kmed")
for (i in 1:15) {
d = dist(cbind(x, y))
slh.m[i, 1] = mean(silhouette(cutree(hclust(d, meth = "compl"), k = i +
1), d)[, 3])
slh.m[i, 2] = mean(silhouette(cutree(hclust(d, meth = "singl"), k = i +
1), d)[, 3])
slh.m[i, 3] = mean(silhouette(kmeans(cbind(x, y), i + 1, nstart = 50)$cluster,
d)[, 3])
slh.m[i, 4] = mean(silhouette(pam(d, i + 1)$clustering, d)[, 3])
}
## Warning: did not converge in 10 iterations
plot(2:16, slh.m[, 1], t = "l", ylim = range(slh.m), xlab = "# of clusters",
ylab = "mean silhouette")
lines(2:16, slh.m[, 2], t = "l", col = 2)
lines(2:16, slh.m[, 3], t = "l", col = 3)
lines(2:16, slh.m[, 4], t = "l", col = 4)
legend(10, 0.38, col = 1:4, lwd = 1, legend = colnames(slh.m))