Clustering with bootstrapping

This script contains clustering with AU&BP p-values procedure for: (A)pre-normalized (B)post-normalized ©post-normalized and filtered by pairwise ctrl group comparison

setwd("C://STAT540/project/")
library(RColorBrewer)
library(pvclust)
## Warning: package 'pvclust' was built under R version 2.15.3

Clustering result for pre-normalized raw data:

Probe-based (485577 probes for the CPG sites)

### load result from clustering with bootstrapping
load("Data/result_raw.Rdata")

# plot clustering using ward method

par(mfrow = c(2, 1))

# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.raw$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
    if (is.leaf(n)) {
        a <- attributes(n)
        # find group name
        a.group <- substr(a$label, 1, 3)
        # retrieve the corresponding color
        attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50", 
            pch = sample(19:25, 1)))
        attr(n, "frame.plot") <- TRUE
    }
    n
}

# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors, 
    title = "Sample Groups", box.col = "transparent")
rect.hclust(result.raw$hclust, k = 5, border = c("cyan"))



# show AU & BP values
plot(result.raw, main = "Ward Clustering for raw data with AU/Bp p-values")
rect.hclust(result.raw$hclust, k = 5, border = c("cyan"))

plot of chunk pvclusterByProbe_raw

Figure for standard error vs. p-value

# plot for p-value vs. standard error
seplot(result.raw)

plot of chunk unnamed-chunk-2

Clustering result for post-normalized norm data:

Probe-based (485577 probes for the CPG sites)

### load result from clustering with bootstrapping
load("Data/result_norm.Rdata")

# plot clustering using ward method
par(mfrow = c(2, 1))

# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.norm$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
    if (is.leaf(n)) {
        a <- attributes(n)
        # find group name
        a.group <- substr(a$label, 1, 3)
        # retrieve the corresponding color
        attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50", 
            pch = sample(19:25, 1)))
        attr(n, "frame.plot") <- TRUE
    }
    n
}

# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors, 
    title = "Sample Groups", box.col = "transparent")
rect.hclust(result.norm$hclust, k = 5, border = c("cyan"))



# show AU & BP values
plot(result.norm, main = "Ward Clustering for normalized data with AU/Bp p-values")
rect.hclust(result.norm$hclust, k = 5, border = c("cyan"))

plot of chunk pvclusterByProbe_norm

Figure for standard error vs. p-value

seplot(result.norm)

plot of chunk unnamed-chunk-3

Clustering result for pre-normalized and Filtered data:

Probe-based (435739 probes for the CPG sites)

### load result from clustering with bootstrapping
load("Data/result_normFilt.Rdata")

# plot clustering using ward method
par(mfrow = c(2, 1))

# label the leaf nodes by coloring them according to their groups
result_dendrogram <- as.dendrogram(result.normFilt$hclust)
# color code each node of the dendrogram according to their group
colors <- brewer.pal(5, "Set1")
names(colors) <- c("ALL", "HBC", "HBM", "PLP", "PLR")
colLab <- function(n) {
    if (is.leaf(n)) {
        a <- attributes(n)
        # find group name
        a.group <- substr(a$label, 1, 3)
        # retrieve the corresponding color
        attr(n, "nodePar") <- c(a$nodePar, list(lab.col = colors[a.group], lab.bg = "grey50", 
            pch = sample(19:25, 1)))
        attr(n, "frame.plot") <- TRUE
    }
    n
}

# show leaf nodes colors
clusDendro <- dendrapply(result_dendrogram, colLab)
plot(clusDendro, main = "Ward Clustering showing 5 clusters")
legend("topright", legend = c("ALL", "HBC", "HBM", "PLP", "PLR"), fill = colors, 
    title = "Sample Groups", box.col = "transparent")
rect.hclust(result.normFilt$hclust, k = 5, border = c("cyan"))



# show AU & BP values
plot(result.normFilt, main = "Ward Clustering for normalized and filtered with AU/Bp p-values")
rect.hclust(result.normFilt$hclust, k = 5, border = c("cyan"))

plot of chunk pvclusterByProbe_normFilt

Figure for standard error vs. p-value

seplot(result.normFilt)

plot of chunk unnamed-chunk-4

As shown above, the clusters at the bottom of the tree all have high bootstrapping p-values, suggesting their statistical significance.