CLUSTER METHODS

4.1 Dendograms

# Read data, compute and add features
gemIdGene <- read.csv(file = "gemIdGene.csv", row.names = 1)
genes <- gemIdGene[, 1:9]
augMat <- t(apply(genes[, 1:9], 1, diff))
geneMat <- cbind(genes[, 1:9], augMat)

# Sample the data to show smaller dendrograms
set.seed(137)
subs <- sample(1:nrow(geneMat), 60, replace = FALSE)
geneMat60 <- geneMat[subs, ]

# Distance matrix and cluster trees
eucDist <- dist(geneMat60)
library(cluster)
clus1 <- hclust(eucDist, method = "complete")
clus2 <- hclust(eucDist, method = "single")
clus3 <- hclust(eucDist, method = "ward")
clus4 <- hclust(eucDist, method = "average")
clus5 <- as.hclust(diana(eucDist))

source("panelFunctions.r")

# Dendrograms
par(cex = 0.5, las = 1)
pan <- panelLayout(nrow = 3, ncol = 1, topMar = 0.6, rowSep = c(0, 0.4, 0.4, 
    0))

panelSelect(pan, mar = "top")
panelScale()

## $rx
## [1] 0 1
## 
## $ry
## [1] 0 1

text(0.5, 0.85, "Dendrograms for Different Clustering Choices Set2", adj = 0.5, 
    cex = 1.2)

panelSelect(pan, 1, 1)
panelScale(c(-2, 64), c(-3.5, 3.5))

## $rx
## [1] -2 64
## 
## $ry
## [1] -3.5  3.5

par(new = TRUE)
plot(clus3, ann = FALSE)
mtext(side = 3, "Ward", cex = 1, font = 2)

panelSelect(pan, 2, 1)
panelScale(c(-2, 64), c(-3.5, 3.5))

## $rx
## [1] -2 64
## 
## $ry
## [1] -3.5  3.5

par(new = TRUE)
plot(clus4, ann = FALSE)
mtext(side = 3, "Average", cex = 1, font = 2)

panelSelect(pan, 3, 1)
panelScale(c(-2, 64), c(-3.5, 3.5))

## $rx
## [1] -2 64
## 
## $ry
## [1] -3.5  3.5

par(new = TRUE)
plot(clus5, ann = FALSE)
mtext(side = 3, "Devisive", font = 2, cex = 1)

plot of chunk unnamed-chunk-1

4.2 Horizontal layout with parabolic lines

source("ParabolaTree.r")
parabolaTree(clus3, lab = clus3$labels, cex = 0.75, title = "Rat Gene Expression for 60 Genes", 
    labelF = 0.12, xGrid = seq(2, 12, by = 2))
parabolaTree(clus3, lab = clus3$labels, cex = 0.75, title = "Rat Gene Expression for 60 Genes", 
    labelF = 0.12, xGrid = seq(0, 12, by = 2), parabolaF = 0.06, nameGapF = 0.015)

plot of chunk unnamed-chunk-2

5. Look for clusters, and thinking about the number of clusters

# Look at the distance matrix.
library(RColorBrewer)
hmcol <- colorRampPalette(brewer.pal(6, "RdBu"))(236)
heatmap(as.matrix(eucDist), sym = TRUE, col = hmcol, distfun = function(x) as.dist(x))

plot of chunk unnamed-chunk-3

*5.2 Look for clusters in a 2-D scaled view *

eucDistAll <- dist(geneMat)
ans <- cmdscale(eucDistAll, 3)
plot(ans[, 1], ans[, 2], main = "Multidimensional Scaling Plot", xlab = "x", 
    ylab = "y", pty = "sq")

plot of chunk unnamed-chunk-4

# identify(ans[, 1], ans[, 2], row.names(ans))

5.3 Look for a clusters in a 3-D scaled view.

library(rgl)
open3d()
plot3d(ans, type = "s", radius = 0.05, col = "green", zlab = "z", xlab = "y", 
    ylab = "z")

6. K-means discussion

km2 <- kmeans(geneMat, centers = 2, nstart = 10)
km3 <- kmeans(geneMat, centers = 3, nstart = 10)
km4 <- kmeans(geneMat, centers = 4, nstart = 10)
km5 <- kmeans(geneMat, centers = 5, nstart = 10)
km6 <- kmeans(geneMat, centers = 6, nstart = 10)
km7 <- kmeans(geneMat, centers = 7, nstart = 10)
km8 <- kmeans(geneMat, centers = 8, nstart = 10)
km9 <- kmeans(geneMat, centers = 9, nstart = 10)

plot(2:9, c(km2[6], km3[6], km4[6], km5[6], km6[6], km7[6], km8[6], km9[6]), 
    main = "K Means:  How many clusters?", las = 1, xlab = "Number of Clusters", 
    ylab = "Between to Total Sums of Squares")

plot of chunk unnamed-chunk-6

# Compute PAM clusters, compare with k-mean clusters
pam2 <- pam(eucDistAll, k = 2, diss = TRUE)
pam3 <- pam(eucDistAll, k = 3, diss = TRUE)
pam6 <- pam(eucDistAll, k = 6, diss = TRUE)
# all(names(km2$cluster) == names(pam2$clustering))

# Agglomerative trees and cluster number selection

hclResult <- hclust(eucDistAll, method = "ward")
agglom2 <- cutree(hclResult, k = 2)  # creates two clusters

# all(names(agglom2) == names(pam2$clustering))

## Cluster separation and cluster number selection cbind(hclResult$merge,
## hclResult$height)

Cluster criticism using the silhouette plot

silpam6 <- silhouette(pam6)
plot(silpam6, main = "")

plot of chunk unnamed-chunk-8