Lab 2: Clustering

K-Means Clustering

set.seed(2)
x = matrix(rnorm(50*2), ncol = 2)
x[1:25, 1] = x[1:25, 1] + 3
x[1:25, 2] = x[1:25, 2] - 4
km.out = kmeans(x, 2, nstart = 20)
km.out$cluster
 [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[24] 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[47] 1 1 1 1
plot(x, col = (km.out$cluster + 1), main = "K-Means Clustering Results with K = 2", xlab = "", ylab = "", pch = 20, cex = 2)

set.seed(4)
km.out = kmeans(x, 3, nstart = 20)
km.out
K-means clustering with 3 clusters of sizes 10, 23, 17

Cluster means:
        [,1]        [,2]
1  2.3001545 -2.69622023
2 -0.3820397 -0.08740753
3  3.7789567 -4.56200798

Clustering vector:
 [1] 3 1 3 1 3 3 3 1 3 1 3 1 3 1 3 1 3 3 3 3 3 1 3
[24] 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1
[47] 2 2 2 2

Within cluster sum of squares by cluster:
[1] 19.56137 52.67700 25.74089
 (between_SS / total_SS =  79.3 %)

Available components:

[1] "cluster"      "centers"      "totss"       
[4] "withinss"     "tot.withinss" "betweenss"   
[7] "size"         "iter"         "ifault"      
plot(x, col = (km.out$cluster + 1), main = "K-Means Clustering Results with K = 3", xlab = "", ylab = "", pch = 20, cex = 2)

set.seed(3)
km.out = kmeans(x, 3, nstart = 1)
km.out$tot.withinss
[1] 104.3319
km.out = kmeans(x, 3, nstart = 20)
km.out$tot.withinss
[1] 97.97927

Hierarchical Clustering

hc.complete = hclust(dist(x), method = "complete")
hc.average = hclust(dist(x), method = "average")
hc.single = hclust(dist(x), method = "single")
par(mfrow = c(1,3))
plot(hc.complete, main = "Complete LInkage", xlab = "", sub = "", cex = 0.9)
plot(hc.average, main = "Average LInkage", xlab = "", sub = "", cex = 0.9)
plot(hc.single, main = "Single LInkage", xlab = "", sub = "", cex = 0.9)

cutree(hc.complete, 2)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[24] 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[47] 2 2 2 2
cutree(hc.average, 2)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[24] 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 1
[47] 2 2 2 2
cutree(hc.single, 2)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
[24] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[47] 1 1 1 1
cutree(hc.single, 4)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
[24] 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 3
[47] 3 3 3 3
xsc = scale(x)
plot(hclust(dist(xsc), method = "complete"), main = "Hierarchical Clustering with Scaled Features")

x = matrix(rnorm(30 * 3), ncol = 3)
dd = as.dist(1 - cor(t(x)))
plot(hclust(dd, method = "complete"), main = "Complete Linkage with Correlation-Based Distance", xlab = "", sub = "")

LS0tDQp0aXRsZTogIkNhcO10dWxvIDEwLCBMYWIgMiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KYXV0aG9yOiAiTHVpcyBKaW1lbmV6Ig0KLS0tDQoNCiMjIyBMYWIgMjogQ2x1c3RlcmluZw0KDQojIyMjIEstTWVhbnMgQ2x1c3RlcmluZw0KDQpgYGB7cn0NCnNldC5zZWVkKDIpDQp4ID0gbWF0cml4KHJub3JtKDUwKjIpLCBuY29sID0gMikNCnhbMToyNSwgMV0gPSB4WzE6MjUsIDFdICsgMw0KeFsxOjI1LCAyXSA9IHhbMToyNSwgMl0gLSA0DQpgYGANCg0KYGBge3J9DQprbS5vdXQgPSBrbWVhbnMoeCwgMiwgbnN0YXJ0ID0gMjApDQpgYGANCg0KYGBge3J9DQprbS5vdXQkY2x1c3Rlcg0KYGBgDQoNCmBgYHtyfQ0KcGxvdCh4LCBjb2wgPSAoa20ub3V0JGNsdXN0ZXIgKyAxKSwgbWFpbiA9ICJLLU1lYW5zIENsdXN0ZXJpbmcgUmVzdWx0cyB3aXRoIEsgPSAyIiwgeGxhYiA9ICIiLCB5bGFiID0gIiIsIHBjaCA9IDIwLCBjZXggPSAyKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoNCkNCmttLm91dCA9IGttZWFucyh4LCAzLCBuc3RhcnQgPSAyMCkNCmttLm91dA0KcGxvdCh4LCBjb2wgPSAoa20ub3V0JGNsdXN0ZXIgKyAxKSwgbWFpbiA9ICJLLU1lYW5zIENsdXN0ZXJpbmcgUmVzdWx0cyB3aXRoIEsgPSAzIiwgeGxhYiA9ICIiLCB5bGFiID0gIiIsIHBjaCA9IDIwLCBjZXggPSAyKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMykNCmttLm91dCA9IGttZWFucyh4LCAzLCBuc3RhcnQgPSAxKQ0Ka20ub3V0JHRvdC53aXRoaW5zcw0Ka20ub3V0ID0ga21lYW5zKHgsIDMsIG5zdGFydCA9IDIwKQ0Ka20ub3V0JHRvdC53aXRoaW5zcw0KYGBgDQoNCiMjIyMgSGllcmFyY2hpY2FsIENsdXN0ZXJpbmcNCg0KYGBge3J9DQpoYy5jb21wbGV0ZSA9IGhjbHVzdChkaXN0KHgpLCBtZXRob2QgPSAiY29tcGxldGUiKQ0KYGBgDQoNCmBgYHtyfQ0KaGMuYXZlcmFnZSA9IGhjbHVzdChkaXN0KHgpLCBtZXRob2QgPSAiYXZlcmFnZSIpDQpoYy5zaW5nbGUgPSBoY2x1c3QoZGlzdCh4KSwgbWV0aG9kID0gInNpbmdsZSIpDQpgYGANCg0KYGBge3J9DQpwYXIobWZyb3cgPSBjKDEsMykpDQpwbG90KGhjLmNvbXBsZXRlLCBtYWluID0gIkNvbXBsZXRlIExJbmthZ2UiLCB4bGFiID0gIiIsIHN1YiA9ICIiLCBjZXggPSAwLjkpDQpwbG90KGhjLmF2ZXJhZ2UsIG1haW4gPSAiQXZlcmFnZSBMSW5rYWdlIiwgeGxhYiA9ICIiLCBzdWIgPSAiIiwgY2V4ID0gMC45KQ0KcGxvdChoYy5zaW5nbGUsIG1haW4gPSAiU2luZ2xlIExJbmthZ2UiLCB4bGFiID0gIiIsIHN1YiA9ICIiLCBjZXggPSAwLjkpDQpgYGANCg0KYGBge3J9DQpjdXRyZWUoaGMuY29tcGxldGUsIDIpDQpjdXRyZWUoaGMuYXZlcmFnZSwgMikNCmN1dHJlZShoYy5zaW5nbGUsIDIpDQpgYGANCg0KYGBge3J9DQpjdXRyZWUoaGMuc2luZ2xlLCA0KQ0KYGBgDQoNCmBgYHtyfQ0KeHNjID0gc2NhbGUoeCkNCnBsb3QoaGNsdXN0KGRpc3QoeHNjKSwgbWV0aG9kID0gImNvbXBsZXRlIiksIG1haW4gPSAiSGllcmFyY2hpY2FsIENsdXN0ZXJpbmcgd2l0aCBTY2FsZWQgRmVhdHVyZXMiKQ0KYGBgDQoNCmBgYHtyfQ0KeCA9IG1hdHJpeChybm9ybSgzMCAqIDMpLCBuY29sID0gMykNCmRkID0gYXMuZGlzdCgxIC0gY29yKHQoeCkpKQ0KcGxvdChoY2x1c3QoZGQsIG1ldGhvZCA9ICJjb21wbGV0ZSIpLCBtYWluID0gIkNvbXBsZXRlIExpbmthZ2Ugd2l0aCBDb3JyZWxhdGlvbi1CYXNlZCBEaXN0YW5jZSIsIHhsYWIgPSAiIiwgc3ViID0gIiIpDQpgYGA=