seminar 9.R

yifanzhang — Apr 14, 2014, 7:02 AM

library(RColorBrewer)
library(cluster)
library(pvclust)
library(xtable)
library(limma)
library(plyr)
prDat <- read.table("GSE4051_data.tsv",
                    header = TRUE, row.names = 1) # the whole enchilada
prDes <- readRDS("GSE4051_design.rds")
sprDat <- t(scale(t(prDat)))
round(data.frame(avgBefore = rowMeans(head(prDat)),
                 avgAfter = rowMeans(head(sprDat)),
                 varBefore = apply(head(prDat), 1, var),
                 varAfter = apply(head(sprDat), 1, var)), 2)
             avgBefore avgAfter varBefore varAfter
1415670_at        7.22        0      0.02        1
1415671_at        9.37        0      0.35        1
1415672_at        9.70        0      0.15        1
1415673_at        8.42        0      0.03        1
1415674_a_at      8.47        0      0.02        1
1415675_at        9.67        0      0.03        1
pr.dis <- dist(t(sprDat), method = 'euclidean')
prDes$grp <- with(prDes, interaction(gType, devStage))
pr.hc.s <- hclust(pr.dis, method = 'single')
pr.hc.c <- hclust(pr.dis, method = 'complete')
pr.hc.a <- hclust(pr.dis, method = 'average')
pr.hc.w <- hclust(pr.dis, method = 'ward')
op <- par(mar = c(0,4,4,2), mfrow = c(2,2))

plot(pr.hc.s, labels = FALSE, main = "Single", xlab = "")
plot(pr.hc.c, labels = FALSE, main = "Complete", xlab = "")
plot(pr.hc.a, labels = FALSE, main = "Average", xlab = "")
plot(pr.hc.w, labels = FALSE, main = "Ward", xlab = "")

plot of chunk unnamed-chunk-1


par(op)
op <- par(mar = c(1,4,4,1))
plot(pr.hc.w, labels = prDes$grp, cex = 0.6, 
     main = "Ward showing 10 clusters")
rect.hclust(pr.hc.w, k = 10)

plot of chunk unnamed-chunk-1

par(op)
jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Greys"))
gTypeCols <- brewer.pal(11, "RdGy")[c(4,7)]
heatmap(as.matrix(sprDat), Rowv = NA, col = jGraysFun(256),
        hclustfun = function(x) hclust(x, method = 'ward'),
        scale = "none", labCol = prDes$grp, labRow = NA, margins = c(8,1),
        ColSideColor = gTypeCols[unclass(prDes$gType)])
legend("topright", legend = levels(prDes$gType),
       col = gTypeCols, lty = 1, lwd = 5, cex = 0.5)

plot of chunk unnamed-chunk-1

devStageCols <- brewer.pal(11, "RdGy")[c(2,4,7,9,11)]
heatmap(as.matrix(sprDat), Rowv = NA, col = jGraysFun(256),
        hclustfun = function(x) hclust(x, method = 'ward'),
        scale = "none", labCol = prDes$grp, labRow = NA, margin = c(8,1),
        ColSideColor = devStageCols[unclass(prDes$devStage)])

plot of chunk unnamed-chunk-1

k <- 5
pr.pam <- pam(pr.dis, k = k)
op <- par(mar = c(5,1,4,4))
plot(pr.pam, main = "Silhouette Plot for 5 clusters")

plot of chunk unnamed-chunk-1

par(op)
avgwidth<-c()
number<-c()
for (i in 2:10){
  pr.pam<-pam(pr.dis, k=i)
  avgwidth[i]<-pr.pam$silinfo$avg.width
  number[i]<-i
}
plot(avgwidth~number)

plot of chunk unnamed-chunk-1

#2 is the best choice for the number of clusters