class 7

install stuff

library("gplots")
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
library("MASS")
library(cluster)

# source('http://bioconductor.org/biocLite.R') biocLite('marray')
library("marray")
## Loading required package: limma

Lets start

load crabs


data(crabs)

fac = as.factor(paste(crabs[, 1], crabs[, 2], sep = "."))

pcaCrabs = prcomp(scale(crabs[, 4:8]))


pcaAlldata = pcaCrabs$x

rownames(pcaAlldata) = as.character(fac)

hmcol <- maPalette(low = "red", high = "green", mid = "black", k = 100)

single = hclust(dist(pcaAlldata[, 2:3], method = "euclidean"), method = "single")
plot(single)

plot of chunk unnamed-chunk-2

singledend = as.dendrogram(single)
mybarcolor = fac

levels(mybarcolor) = c("lightblue", "darkblue", "darkorange", "darkorange4")

heatmap.2(as.matrix(t(pcaAlldata[, 2:3])), col = hmcol, trace = "none", key = TRUE, 
    ColSideColors = as.character(mybarcolor), Colv = singledend, density.info = "none", 
    labRow = NA, dendrogram = "col")

plot of chunk unnamed-chunk-3

ein anderes Clustering


#### 
single = hclust(dist(pcaAlldata[, 2:3], method = "manhattan"), method = "average")
singledend = as.dendrogram(single)

heatmap.2(as.matrix(t(pcaAlldata[, 2:3])), col = hmcol, trace = "none", key = TRUE, 
    ColSideColors = as.character(mybarcolor), Colv = singledend, density.info = "none", 
    labRow = NA, dendrogram = "col")

plot of chunk unnamed-chunk-4


ein paar falsche Clusteringansätze


kmeanresultPCA <- kmeans(pcaAlldata, 4)
plot(pcaAlldata[, 2:3], col = kmeanresultPCA$cluster, pch = as.numeric(fac))

plot of chunk unnamed-chunk-5

sieht schon besser aus -> es werden nur die 2 und 3 aus der PCA benutzt

die Farben werden jeweils anders verteilt und geclustert weil jeder Start anders ist (random)


kmeanresultPCA <- kmeans(pcaAlldata[, 2:3], 4)
plot(pcaAlldata[, 2:3], col = kmeanresultPCA$cluster, pch = as.numeric(fac))

plot of chunk unnamed-chunk-6

k mediods

doch nicht

pheatmap


library("pheatmap")
pheatmap(pcaAlldata[, 2:3], kmeans_k = 4, clustering_distance_rows = "correlation", 
    display_numbers = T)

plot of chunk unnamed-chunk-7

andere tolle Bibliothek APCLUSTER (bioconductor)


library("apcluster")
## 
## Attaching package: 'apcluster'
## 
## The following object is masked from 'package:stats':
## 
##     heatmap
apres = apcluster(negDistMat(pcaAlldata[, 2:3], r = 2))
plot(apres, pcaAlldata[, 2:3])

plot of chunk unnamed-chunk-8

plot(apres, pcaAlldata)

plot of chunk unnamed-chunk-8