suppressPackageStartupMessages({
library(dplyr)
library(factoextra)
library(ComplexHeatmap)
})
bs_dir <- "~/data2/Genomic_Super_Signature/refinebio/bootstrap/data"
pos <- readRDS(file.path(bs_dir, "bootstrap_PCs_rowNorm_Pos.rds"))
neg <- readRDS(file.path(bs_dir, "bootstrap_PCs_rowNorm_Neg.rds"))
data <- lapply(c(pos, neg), function(x) x$rotation) %>% Reduce(cbind,.) %>% t
topPCs = 20
numOfDataset = 20
PCsToInclude = 5
ind <- c()
for (i in 1:numOfDataset) {new_ind = c(1:PCsToInclude)+topPCs*(i-1); ind = c(ind, new_ind)}
dat <- data[ind,]
source('~/data2/Genomic_Super_Signature/refinebio/methods/4_Hierarchical_Clustering_func.R')
res.dist <- factoextra::get_dist(dat, method = "euclidean")
res.hclust <- stats::hclust(res.dist, method = "ward.D2")
for (i in 81:90) {
res.hcut <- factoextra::hcut(res.dist, k = i, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "euclidean")
eval <- evaluateCluster(res.hcut, PCsToInclude, controlType = "Neg")
print(eval)
}
##
k = 84
res.hcut <- factoextra::hcut(res.dist, k = k, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "euclidean")
col = col_vector[1:res.hcut$nbclust]
factoextra::fviz_dend(res.hclust, k = k, cex = 0.5,
rect = TRUE, palette = col,
rect_fill = TRUE, rect_border = col)
## Warning in if (color == "cluster") color <- "default": the condition has length
## > 1 and only the first element will be used
evaluateCluster(res.hcut, PCsToInclude, controlType = "Pos")
res.dist <- factoextra::get_dist(dat, method = "manhattan")
res.hclust <- stats::hclust(res.dist, method = "ward.D2")
for (i in 81:90) {
res.hcut <- factoextra::hcut(res.dist, k = i, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "manhattan")
eval <- evaluateCluster(res.hcut, PCsToInclude, controlType = "Neg")
print(eval)
}
k = 87
res.hcut <- factoextra::hcut(res.dist, k = k, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "manhattan")
col = col_vector[1:res.hcut$nbclust]
factoextra::fviz_dend(res.hclust, k = k, cex = 0.5,
rect = TRUE, palette = col,
rect_fill = TRUE, rect_border = col)
## Warning in if (color == "cluster") color <- "default": the condition has length
## > 1 and only the first element will be used
evaluateCluster(res.hcut, PCsToInclude, controlType = "Pos")
res.dist <- factoextra::get_dist(dat, method = "pearson")
res.hclust <- stats::hclust(res.dist, method = "ward.D2")
for (i in 71:80) {
res.hcut <- factoextra::hcut(res.dist, k = i, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "pearson")
eval <- evaluateCluster(res.hcut, PCsToInclude, controlType = "Neg")
print(eval)
}
k = 77
res.hcut <- factoextra::hcut(res.dist, k = k, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "pearson")
col = col_vector[1:res.hcut$nbclust]
factoextra::fviz_dend(res.hclust, k = k, cex = 0.5,
rect = TRUE, palette = col,
rect_fill = TRUE, rect_border = col)
## Warning in if (color == "cluster") color <- "default": the condition has length
## > 1 and only the first element will be used
evaluateCluster(res.hcut, PCsToInclude, controlType = "Pos")
res.dist <- factoextra::get_dist(dat, method = "spearman")
res.hclust <- stats::hclust(res.dist, method = "ward.D2")
for (i in 35:45) {
res.hcut <- factoextra::hcut(res.dist, k = i, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "spearman")
eval <- evaluateCluster(res.hcut, PCsToInclude, controlType = "Neg")
print(eval)
}
k = 42
res.hcut <- factoextra::hcut(res.dist, k = k, hc_funct = "hclust", hc_method = "ward.D2", hc_metric = "spearman")
col = col_vector[1:res.hcut$nbclust]
factoextra::fviz_dend(res.hclust, k = k, cex = 0.5,
rect = TRUE, palette = col,
rect_fill = TRUE, rect_border = col)
## Warning in if (color == "cluster") color <- "default": the condition has length
## > 1 and only the first element will be used
evaluateCluster(res.hcut, PCsToInclude, controlType = "Pos")
For hierarchical clustering based on spearman correlation, all 8 agglomeration methods separate negative controls’ PC1s with 42 clusters.
res.dist <- factoextra::get_dist(dat, method = "spearman")
ag_methods <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid")
for (ag in ag_methods) {
res.hclust <- stats::hclust(res.dist, method = ag)
for (i in 41:43) {
res.hcut <- factoextra::hcut(res.dist, k = i, hc_funct = "hclust", hc_method = ag, hc_metric = "spearman")
eval <- evaluateCluster(res.hcut, PCsToInclude, controlType = "Neg",
agglomeration.method = ag)
print(eval)
}
}