exercise 7 - chen

m <- matrix(rnorm(50), 5, 10)
cor(m, m, method = "spearman")
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]  1.0 -0.9  0.1 -0.4  0.5  0.1 -0.7  0.2 -0.4   0.3
##  [2,] -0.9  1.0 -0.2  0.3 -0.6  0.2  0.5 -0.1  0.7  -0.5
##  [3,]  0.1 -0.2  1.0 -0.8 -0.6  0.2 -0.6 -0.1 -0.2   0.9
##  [4,] -0.4  0.3 -0.8  1.0  0.4 -0.2  0.9 -0.4  0.2  -0.6
##  [5,]  0.5 -0.6 -0.6  0.4  1.0 -0.6  0.2  0.3 -0.6  -0.3
##  [6,]  0.1  0.2  0.2 -0.2 -0.6  1.0 -0.4 -0.5  0.8   0.1
##  [7,] -0.7  0.5 -0.6  0.9  0.2 -0.4  1.0 -0.3  0.1  -0.5
##  [8,]  0.2 -0.1 -0.1 -0.4  0.3 -0.5 -0.3  1.0 -0.4  -0.3
##  [9,] -0.4  0.7 -0.2  0.2 -0.6  0.8  0.1 -0.4  1.0  -0.4
## [10,]  0.3 -0.5  0.9 -0.6 -0.3  0.1 -0.5 -0.3 -0.4   1.0

Spearman is better for outlier

euclidean distance

Do it

load("~/Dropbox/Uni/Master/HT_Course/Module_3_HypothesisTesting/breastCancerMAINZ_module2.RData")

eucDist <- dist(t(breast_expr), method = "euclidean")
spearCor <- cor(breast_expr)

Do it with chens code

aggMeth <- c("ward", "single", "complete", "average", "mcquitty", "median", 
    "centroid")

source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
fac <- as.factor(erStatus)

layout(matrix(1:8, 2, 4, byrow = T))

for (i in aggMeth) {
    hc <- hclust(eucDist, method = i)
    plothclust(hc, col = fac, main = i)

}

plot of chunk unnamed-chunk-3

Spearman correlation

layout(matrix(1:8, 2, 4, byrow = T))

for (i in aggMeth) {
    hc <- hclust(as.dist(spearCor), method = i)
    plothclust(hc, col = fac, main = i)

}

plot of chunk unnamed-chunk-4

library(genefilter)
pval <- rowttests(breast_expr, fac)
fdr <- p.adjust(pval$p.value, method = "fdr")
exprde <- breast_expr[fdr < 0.01, ]

de_euch_dist <- dist(t(exprde))
aggMeth <- c("ward", "single", "complete", "average", "mcquitty", "median", 
    "centroid")
fac <- as.factor(erStatus)
layout(matrix(1:8, 2, 4, byrow = T))

for (i in aggMeth) {
    hc <- hclust(de_euch_dist, method = i)
    plothclust(hc, col = fac, main = i)

}

plot of chunk unnamed-chunk-6

unspecific filtering

sds <- rowSds(breast_expr)

find a filter value

hist(sds, breaks = 100)

plot of chunk unnamed-chunk-8

exprhsd <- breast_expr[sds > 1.5, ]

hsd_euc_dist <- dist(t(exprde))
hc <- hclust(hsd_euc_dist)
plothclust(hc, col = fac)

plot of chunk unnamed-chunk-8

now with PCA stuff

We want to look at the patients –> not clustering the genes —> center and cluster each gen —> transponieren der Matrix

exprde_scale <- scale(t(exprde), scale = TRUE, center = TRUE)

pca <- prcomp(exprde_scale, scale. = FALSE, center = FALSE)

dim(pca$rotation)  #genes
## [1] 668  76
dim(pca$x)  ### PC of patients
## [1] 76 76
plot(pca$x[, c(1, 2)], col = fac)

plot of chunk unnamed-chunk-9

plot(pca$x[, c(1, 3)], col = fac)  # no clear seperation but a subcluster in er negativ

plot of chunk unnamed-chunk-9

plot(pca$x[, c(2, 3)], col = fac)

plot of chunk unnamed-chunk-9

Can we see the result of the pca in the clustering?


layout(matrix((1:2), 1, 2))
plot(pca$x[, c(1, 3)], col = fac)
hc = hclust(de_euch_dist)
plothclust(hc, col = fac)

plot of chunk unnamed-chunk-10

validation step

Jetzt schaut man sich die graphen mit der benennung aus dem cluster an

cut <- cutree(hc, 3)
identical(names(cut), colnames(breast_expr))  # ist die benennung noch die selbe? Jup
## [1] TRUE
layout(matrix((1:2), 1, 2))
plot(pca$x[, c(1, 3)], col = cut, pch = 20)
plothclust(hc, col = cut)

plot of chunk unnamed-chunk-11

es gibt eine subgruppe in den er-aktiv patients

–> Um den unterschied zu finden –> würde man die schwarzen und die grünen in ein gene enrichment schmeißen