This script contains clustering with AU&BP p-values procedure for: (A)pre-normalized (B)post-normalized ©post-normalized and filtered by pairwise ctrl group comparison
setwd("C://STAT540/project/")
library(RColorBrewer)
library(pvclust)
## Warning: package 'pvclust' was built under R version 2.15.3
Clustering result for pre-normalized raw data:
### load result from clustering with bootstrapping
load("Data/result_raw.Rdata")
# plot clustering using ward method
par(mfrow = c(2, 1))
# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.raw$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
if (is.leaf(n)) {
a <- attributes(n)
# find group name
a.group <- substr(a$label, 1, 3)
# retrieve the corresponding color
attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50",
pch = sample(19:25, 1)))
attr(n, "frame.plot") <- TRUE
}
n
}
# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors,
title = "Sample Groups", box.col = "transparent")
rect.hclust(result.raw$hclust, k = 5, border = c("cyan"))
# show AU & BP values
plot(result.raw, main = "Ward Clustering for raw data with AU/Bp p-values")
rect.hclust(result.raw$hclust, k = 5, border = c("cyan"))
Figure for standard error vs. p-value
# plot for p-value vs. standard error
seplot(result.raw)
Clustering result for post-normalized norm data:
### load result from clustering with bootstrapping
load("Data/result_norm.Rdata")
# plot clustering using ward method
par(mfrow = c(2, 1))
# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.norm$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
if (is.leaf(n)) {
a <- attributes(n)
# find group name
a.group <- substr(a$label, 1, 3)
# retrieve the corresponding color
attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50",
pch = sample(19:25, 1)))
attr(n, "frame.plot") <- TRUE
}
n
}
# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors,
title = "Sample Groups", box.col = "transparent")
rect.hclust(result.norm$hclust, k = 5, border = c("cyan"))
# show AU & BP values
plot(result.norm, main = "Ward Clustering for normalized data with AU/Bp p-values")
rect.hclust(result.norm$hclust, k = 5, border = c("cyan"))
Figure for standard error vs. p-value
seplot(result.norm)
Clustering result for pre-normalized and Filtered data:
### load result from clustering with bootstrapping
load("Data/result_normFilt.Rdata")
# plot clustering using ward method
par(mfrow = c(2, 1))
# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.normFilt$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
if (is.leaf(n)) {
a <- attributes(n)
# find group name
a.group <- substr(a$label, 1, 3)
# retrieve the corresponding color
attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50",
pch = sample(19:25, 1)))
attr(n, "frame.plot") <- TRUE
}
n
}
# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors,
title = "Sample Groups", box.col = "transparent")
rect.hclust(result.normFilt$hclust, k = 5, border = c("cyan"))
# show AU & BP values
plot(result.normFilt, main = "Ward Clustering for normalized and filtered with AU/Bp p-values")
rect.hclust(result.normFilt$hclust, k = 5, border = c("cyan"))
Figure for standard error vs. p-value
seplot(result.normFilt)
As shown above, the clusters at the bottom of the tree all have high bootstrapping p-values, suggesting their statistical significance.