m <- matrix(rnorm(50), 5, 10)
cor(m, m, method = "spearman")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1.0 -0.9 0.1 -0.4 0.5 0.1 -0.7 0.2 -0.4 0.3
## [2,] -0.9 1.0 -0.2 0.3 -0.6 0.2 0.5 -0.1 0.7 -0.5
## [3,] 0.1 -0.2 1.0 -0.8 -0.6 0.2 -0.6 -0.1 -0.2 0.9
## [4,] -0.4 0.3 -0.8 1.0 0.4 -0.2 0.9 -0.4 0.2 -0.6
## [5,] 0.5 -0.6 -0.6 0.4 1.0 -0.6 0.2 0.3 -0.6 -0.3
## [6,] 0.1 0.2 0.2 -0.2 -0.6 1.0 -0.4 -0.5 0.8 0.1
## [7,] -0.7 0.5 -0.6 0.9 0.2 -0.4 1.0 -0.3 0.1 -0.5
## [8,] 0.2 -0.1 -0.1 -0.4 0.3 -0.5 -0.3 1.0 -0.4 -0.3
## [9,] -0.4 0.7 -0.2 0.2 -0.6 0.8 0.1 -0.4 1.0 -0.4
## [10,] 0.3 -0.5 0.9 -0.6 -0.3 0.1 -0.5 -0.3 -0.4 1.0
Spearman is better for outlier
load("~/Dropbox/Uni/Master/HT_Course/Module_3_HypothesisTesting/breastCancerMAINZ_module2.RData")
eucDist <- dist(t(breast_expr), method = "euclidean")
spearCor <- cor(breast_expr)
aggMeth <- c("ward", "single", "complete", "average", "mcquitty", "median",
"centroid")
source("ftp://129.187.44.58/share/chen/R_Functions/plothclust.R")
fac <- as.factor(erStatus)
layout(matrix(1:8, 2, 4, byrow = T))
for (i in aggMeth) {
hc <- hclust(eucDist, method = i)
plothclust(hc, col = fac, main = i)
}
layout(matrix(1:8, 2, 4, byrow = T))
for (i in aggMeth) {
hc <- hclust(as.dist(spearCor), method = i)
plothclust(hc, col = fac, main = i)
}
library(genefilter)
pval <- rowttests(breast_expr, fac)
fdr <- p.adjust(pval$p.value, method = "fdr")
exprde <- breast_expr[fdr < 0.01, ]
de_euch_dist <- dist(t(exprde))
aggMeth <- c("ward", "single", "complete", "average", "mcquitty", "median",
"centroid")
fac <- as.factor(erStatus)
layout(matrix(1:8, 2, 4, byrow = T))
for (i in aggMeth) {
hc <- hclust(de_euch_dist, method = i)
plothclust(hc, col = fac, main = i)
}
sds <- rowSds(breast_expr)
hist(sds, breaks = 100)
exprhsd <- breast_expr[sds > 1.5, ]
hsd_euc_dist <- dist(t(exprde))
hc <- hclust(hsd_euc_dist)
plothclust(hc, col = fac)
We want to look at the patients –> not clustering the genes —> center and cluster each gen —> transponieren der Matrix
exprde_scale <- scale(t(exprde), scale = TRUE, center = TRUE)
pca <- prcomp(exprde_scale, scale. = FALSE, center = FALSE)
dim(pca$rotation) #genes
## [1] 668 76
dim(pca$x) ### PC of patients
## [1] 76 76
plot(pca$x[, c(1, 2)], col = fac)
plot(pca$x[, c(1, 3)], col = fac) # no clear seperation but a subcluster in er negativ
plot(pca$x[, c(2, 3)], col = fac)
layout(matrix((1:2), 1, 2))
plot(pca$x[, c(1, 3)], col = fac)
hc = hclust(de_euch_dist)
plothclust(hc, col = fac)
validation step
cut <- cutree(hc, 3)
identical(names(cut), colnames(breast_expr)) # ist die benennung noch die selbe? Jup
## [1] TRUE
layout(matrix((1:2), 1, 2))
plot(pca$x[, c(1, 3)], col = cut, pch = 20)
plothclust(hc, col = cut)
es gibt eine subgruppe in den er-aktiv patients
–> Um den unterschied zu finden –> würde man die schwarzen und die grünen in ein gene enrichment schmeißen