Loading libraries and data
library(tidyverse)
library(magrittr)
library(fpc)
library(e1071)
library(gplots)
load("DataOnco/SJ_BC_Data.RData")
Getting dataset ready
feats <-
SJ_BC_Data %>%
select(-(ImageID:HER2P)) %>%
select_if(funs(!anyNA(.))) %>%
mutate(Oncotype = SJ_BC_Data$Oncotype) %>%
filter(!is.na(Oncotype)) %>%
mutate_if(is.numeric, scale)
Getting optimal k
res <- clustest::partition(dataset = feats, train_size = 0.25)
train <- res$train
test <- res$test
res <- clustest::find_k(dataset = train, iters = 30, max_k = 6)
k_stats <- res$k_stats
best_k <- res$best_k
all_best_ks <- res$all_best_ks
c_samples <- res$c_samples
remove(res)
Plotting optimal k s
k_stats %>%
ggplot(aes(x = method, y = mean_best_k)) +
geom_col(aes(fill = method)) +
guides(fill = F) +
geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

best_k
Fuzzy c-Means Clustering Bootstrap Interface
cmeansCBI <- function(data, k) {
result <- cmeans(x = data, center = k)
list(result = result,
partition = result$cluster,
clusterlist = map(1:k, ~. == result$cluster),
clustermethod = "fcmeans",
nc = k)
}
Jaccard evaluation of all algorithms
jac <- list(
KMeans = clusterboot(train, B = 30, clustermethod = kmeansCBI, k = best_k$KMeans)$bootresult,
FCMeans = clusterboot(train, B = 30, clustermethod = cmeansCBI, k = best_k$FCMeans)$bootresult,
HCWard = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCWard,
method = "ward.D")$bootresult,
HCSing = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCSing,
method = "single")$bootresult,
HCComp = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCComp,
method = "complete")$bootresult)
Plotting stability tests summary
get_stats <- function(jac_mat){
jac_mat %>%
t() %>%
as_tibble() %>%
summarise_all(funs(ci_min = t.test(.)$conf.int[1],
ci_max = t.test(.)$conf.int[2],
mean_jaccard = mean(.))) %>%
gather() %>%
separate(key, into = c("cluster", "stat"), extra = "merge") %>%
spread(stat, value)
}
mean_jac <-
jac %>%
map_df(get_stats, .id = "method") %>%
mutate_at(vars(ci_max, ci_min), funs(if_else(is.na(.), 0, .)))
mean_jac %>%
ggplot(aes(x = cluster, y = mean_jaccard)) +
facet_wrap(~method) +
geom_col(aes(fill = cluster)) +
guides(fill = F) +
geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

mean_jac %>%
group_by(method) %>%
summarise(ci_min = t.test(mean_jaccard)$conf.int[1],
ci_max = t.test(mean_jaccard)$conf.int[2],
mean_jaccard = mean(mean_jaccard)) %>%
ggplot(aes(x = method, y = mean_jaccard)) +
geom_col(aes(fill = method)) +
guides(fill = F) +
geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

ANOVA
Plot training sets ANOVA
a_o_v <-
c_samples %>%
group_by(sample) %>%
do(clustest::var_clust_aov(., methods = c("KMeans", "FCMeans", "HCWard", "HCSing", "HCComp"),
dep_vars = c("Oncotype", "lesion")))
a_o_v %>%
mutate(p.value = log10(p.value)) %>%
group_by(dep_var, term) %>%
summarise(signif_percent = sum(p.value <= log10(0.05))/n(),
ci_min = -t.test(p.value)$conf.int[1],
ci_max = -t.test(p.value)$conf.int[2],
mean_p_value = -mean(p.value)) %T>%
print() %>%
ggplot(aes(x = term, y = mean_p_value, fill = term)) +
facet_wrap(~dep_var, ncol = 2) +
geom_col() +
guides(fill = F) +
geom_errorbar(aes(ymin = ci_min, ymax = ci_max)) +
ylab("-mean log10(p-value)") +
xlab("method")

Print testing set ANOVA
d_test <- dist(test)
cls <- map(c("ward.D", "single", "complete"), ~hclust(d_test, .))
names(cls) <- c("HCWard", "HCSing", "HCComp")
clustered_test <-
test %>%
bind_cols(
KMeans = kmeans(., best_k$KMeans)$cluster,
FCMeans = cmeans(., best_k$FCMeans)$cluster,
HCWard =
cls$HCWard %>%
cutree(best_k$HCWard),
HCSing =
cls$HCSing %>%
cutree(best_k$HCSing),
HCComp =
cls$HCComp %>%
cutree(best_k$HCComp)
)
clustered_test %>%
clustest::var_clust_aov(c("KMeans", "FCMeans", "HCWard", "HCSing", "HCComp"),
c("Oncotype", "lesion")) %>%
filter(p.value <= 0.05)
Hierarchical Clustering Heatmap
plotfun <- function(method){
colorvec <- rainbow(best_k[[method]])[clustered_test[[method]]]
cindx <- get('colInd', parent.frame(2))
marg <- get('margins', parent.frame(2))
par(mar = c(0.25, 0, 0, marg[2]), cex = 0.66)
image(cbind(seq_along(colorvec)), col = colorvec[cindx], axes = F)
mtext(method, side = 4, line = 1, cex=0.4,las=1, col="black")
}
hmfun <- function(data, method){
data %>%
t() %>%
heatmap.2(dendrogram = "col", Colv = as.dendrogram(cls[[method]]), trace = "none",
scale = "none", breaks = seq(-2, 2, length.out = 101),
col = colorRampPalette(colors = c("cyan","blue","black","red","yellow")),
density.info = "density", cexRow = 0.4, extrafun = function(x) plotfun(method = method),
lmat = rbind(c(4, 3),
c(0, 5),
c(2, 1)),
lwid = c(1.2, 4),
lhei = c(1.5, 0.15, 4))
}
test %T>%
hmfun(method = "HCWard") %>%
hmfun(method = "HCComp")


LS0tCnRpdGxlOiAiQkNfQ2x1c3QiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgpMb2FkaW5nIGxpYnJhcmllcyBhbmQgZGF0YQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KGZwYykKbGlicmFyeShlMTA3MSkKbGlicmFyeShncGxvdHMpCgpsb2FkKCJEYXRhT25jby9TSl9CQ19EYXRhLlJEYXRhIikKCmBgYAoKR2V0dGluZyBkYXRhc2V0IHJlYWR5CgpgYGB7cn0KCmZlYXRzIDwtIAogIFNKX0JDX0RhdGEgJT4lIAogIHNlbGVjdCgtKEltYWdlSUQ6SEVSMlApKSAlPiUgCiAgc2VsZWN0X2lmKGZ1bnMoIWFueU5BKC4pKSkgJT4lCiAgbXV0YXRlKE9uY290eXBlID0gU0pfQkNfRGF0YSRPbmNvdHlwZSkgJT4lIAogIGZpbHRlcighaXMubmEoT25jb3R5cGUpKSAlPiUgCiAgbXV0YXRlX2lmKGlzLm51bWVyaWMsIHNjYWxlKQoKYGBgCgpHZXR0aW5nIG9wdGltYWwgawoKYGBge3J9CgpyZXMgICA8LSBjbHVzdGVzdDo6cGFydGl0aW9uKGRhdGFzZXQgPSBmZWF0cywgdHJhaW5fc2l6ZSA9IDAuMjUpCnRyYWluIDwtIHJlcyR0cmFpbgp0ZXN0ICA8LSByZXMkdGVzdAoKcmVzICAgICAgICAgPC0gY2x1c3Rlc3Q6OmZpbmRfayhkYXRhc2V0ID0gdHJhaW4sIGl0ZXJzID0gMzAsIG1heF9rID0gNikKa19zdGF0cyAgICAgPC0gcmVzJGtfc3RhdHMKYmVzdF9rICAgICAgPC0gcmVzJGJlc3RfayAKYWxsX2Jlc3Rfa3MgPC0gcmVzJGFsbF9iZXN0X2tzCmNfc2FtcGxlcyAgIDwtIHJlcyRjX3NhbXBsZXMKCnJlbW92ZShyZXMpCgpgYGAKCgpQbG90dGluZyBvcHRpbWFsIF9rXyBzCgpgYGB7cn0KCmtfc3RhdHMgJT4lIAogIGdncGxvdChhZXMoeCA9IG1ldGhvZCwgeSA9IG1lYW5fYmVzdF9rKSkgKwogICAgZ2VvbV9jb2woYWVzKGZpbGwgPSBtZXRob2QpKSArCiAgICBndWlkZXMoZmlsbCA9IEYpICsKICAgIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBjaV9taW4sIHltYXggPSBjaV9tYXgpKQoKYmVzdF9rCmBgYAoKCkZ1enp5IF9jXy1NZWFucyBDbHVzdGVyaW5nIEJvb3RzdHJhcCBJbnRlcmZhY2UKCmBgYHtyfQpjbWVhbnNDQkkgPC0gZnVuY3Rpb24oZGF0YSwgaykgewogIHJlc3VsdCA8LSBjbWVhbnMoeCA9IGRhdGEsIGNlbnRlciA9IGspCiAgCiAgbGlzdChyZXN1bHQgPSByZXN1bHQsIAogICAgICAgcGFydGl0aW9uID0gcmVzdWx0JGNsdXN0ZXIsCiAgICAgICBjbHVzdGVybGlzdCA9IG1hcCgxOmssIH4uID09IHJlc3VsdCRjbHVzdGVyKSwKICAgICAgIGNsdXN0ZXJtZXRob2QgPSAiZmNtZWFucyIsCiAgICAgICBuYyA9IGspCn0KYGBgCgpKYWNjYXJkIGV2YWx1YXRpb24gb2YgYWxsIGFsZ29yaXRobXMKCmBgYHtyfQpqYWMgPC0gbGlzdCgKICBLTWVhbnMgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGttZWFuc0NCSSwgayA9IGJlc3RfayRLTWVhbnMpJGJvb3RyZXN1bHQsCiAgRkNNZWFucyA9IGNsdXN0ZXJib290KHRyYWluLCBCID0gMzAsIGNsdXN0ZXJtZXRob2QgPSBjbWVhbnNDQkksIGsgPSBiZXN0X2skRkNNZWFucykkYm9vdHJlc3VsdCwKICBIQ1dhcmQgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ1dhcmQsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAid2FyZC5EIikkYm9vdHJlc3VsdCwKICBIQ1NpbmcgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ1NpbmcsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAic2luZ2xlIikkYm9vdHJlc3VsdCwKICBIQ0NvbXAgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ0NvbXAsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAiY29tcGxldGUiKSRib290cmVzdWx0KQpgYGAKClBsb3R0aW5nIHN0YWJpbGl0eSB0ZXN0cyBzdW1tYXJ5CgpgYGB7cn0KZ2V0X3N0YXRzIDwtIGZ1bmN0aW9uKGphY19tYXQpewogIGphY19tYXQgJT4lIAogICAgdCgpICU+JSAKICAgIGFzX3RpYmJsZSgpICU+JSAKICAgIHN1bW1hcmlzZV9hbGwoZnVucyhjaV9taW4gPSB0LnRlc3QoLikkY29uZi5pbnRbMV0sCiAgICAgICAgICAgICAgICAgICAgICAgY2lfbWF4ID0gdC50ZXN0KC4pJGNvbmYuaW50WzJdLAogICAgICAgICAgICAgICAgICAgICAgIG1lYW5famFjY2FyZCA9IG1lYW4oLikpKSAlPiUgCiAgICBnYXRoZXIoKSAlPiUgCiAgICBzZXBhcmF0ZShrZXksIGludG8gPSBjKCJjbHVzdGVyIiwgInN0YXQiKSwgZXh0cmEgPSAibWVyZ2UiKSAlPiUgCiAgICBzcHJlYWQoc3RhdCwgdmFsdWUpCn0KCm1lYW5famFjIDwtIAogIGphYyAlPiUgCiAgbWFwX2RmKGdldF9zdGF0cywgLmlkID0gIm1ldGhvZCIpICU+JSAKICBtdXRhdGVfYXQodmFycyhjaV9tYXgsIGNpX21pbiksIGZ1bnMoaWZfZWxzZShpcy5uYSguKSwgMCwgLikpKSAKCm1lYW5famFjICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBjbHVzdGVyLCB5ID0gbWVhbl9qYWNjYXJkKSkgKwogICAgZmFjZXRfd3JhcCh+bWV0aG9kKSArCiAgICBnZW9tX2NvbChhZXMoZmlsbCA9IGNsdXN0ZXIpKSArCiAgICBndWlkZXMoZmlsbCA9IEYpICsKICAgIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBjaV9taW4sIHltYXggPSBjaV9tYXgpKQoKbWVhbl9qYWMgJT4lIAogIGdyb3VwX2J5KG1ldGhvZCkgJT4lIAogIHN1bW1hcmlzZShjaV9taW4gPSB0LnRlc3QobWVhbl9qYWNjYXJkKSRjb25mLmludFsxXSwgCiAgICAgICAgICAgIGNpX21heCA9IHQudGVzdChtZWFuX2phY2NhcmQpJGNvbmYuaW50WzJdLAogICAgICAgICAgICBtZWFuX2phY2NhcmQgPSBtZWFuKG1lYW5famFjY2FyZCkpICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBtZXRob2QsIHkgPSBtZWFuX2phY2NhcmQpKSArCiAgICBnZW9tX2NvbChhZXMoZmlsbCA9IG1ldGhvZCkpICsKICAgIGd1aWRlcyhmaWxsID0gRikgKwogICAgZ2VvbV9lcnJvcmJhcihhZXMoeW1pbiA9IGNpX21pbiwgeW1heCA9IGNpX21heCkpCmBgYAoKIyMgQU5PVkEKCjwvYnI+CgpQbG90IHRyYWluaW5nIHNldHMgQU5PVkEKCmBgYHtyfQphX29fdiA8LSAKICBjX3NhbXBsZXMgJT4lIAogIGdyb3VwX2J5KHNhbXBsZSkgJT4lIAogIGRvKGNsdXN0ZXN0Ojp2YXJfY2x1c3RfYW92KC4sIG1ldGhvZHMgPSAgYygiS01lYW5zIiwgIkZDTWVhbnMiLCAiSENXYXJkIiwgIkhDU2luZyIsICJIQ0NvbXAiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVwX3ZhcnMgPSBjKCJPbmNvdHlwZSIsICJsZXNpb24iKSkpCgphX29fdiAlPiUgCiAgbXV0YXRlKHAudmFsdWUgPSBsb2cxMChwLnZhbHVlKSkgJT4lIAogIGdyb3VwX2J5KGRlcF92YXIsIHRlcm0pICU+JSAKICBzdW1tYXJpc2Uoc2lnbmlmX3BlcmNlbnQgPSBzdW0ocC52YWx1ZSA8PSBsb2cxMCgwLjA1KSkvbigpLAogICAgICAgICAgICBjaV9taW4gPSAtdC50ZXN0KHAudmFsdWUpJGNvbmYuaW50WzFdLAogICAgICAgICAgICBjaV9tYXggPSAtdC50ZXN0KHAudmFsdWUpJGNvbmYuaW50WzJdLAogICAgICAgICAgICBtZWFuX3BfdmFsdWUgPSAtbWVhbihwLnZhbHVlKSkgJVQ+JQogIHByaW50KCkgJT4lIAogIGdncGxvdChhZXMoeCA9IHRlcm0sIHkgPSBtZWFuX3BfdmFsdWUsIGZpbGwgPSB0ZXJtKSkgKwogICAgZmFjZXRfd3JhcCh+ZGVwX3ZhciwgbmNvbCA9IDIpICsKICAgIGdlb21fY29sKCkgKwogICAgZ3VpZGVzKGZpbGwgPSBGKSArCiAgICBnZW9tX2Vycm9yYmFyKGFlcyh5bWluID0gY2lfbWluLCB5bWF4ID0gY2lfbWF4KSkgKwogICAgeWxhYigiLW1lYW4gbG9nMTAocC12YWx1ZSkiKSArCiAgICB4bGFiKCJtZXRob2QiKQpgYGAKClByaW50IHRlc3Rpbmcgc2V0IEFOT1ZBCgpgYGB7cn0KCmRfdGVzdCA8LSBkaXN0KHRlc3QpCgpjbHMgPC0gbWFwKGMoIndhcmQuRCIsICJzaW5nbGUiLCAiY29tcGxldGUiKSwgfmhjbHVzdChkX3Rlc3QsIC4pKQpuYW1lcyhjbHMpIDwtIGMoIkhDV2FyZCIsICJIQ1NpbmciLCAiSENDb21wIikKCmNsdXN0ZXJlZF90ZXN0IDwtIAogIHRlc3QgJT4lIAogIGJpbmRfY29scygKICAgIEtNZWFucyAgPSBrbWVhbnMoLiwgYmVzdF9rJEtNZWFucykkY2x1c3RlciwKICAgIEZDTWVhbnMgPSBjbWVhbnMoLiwgYmVzdF9rJEZDTWVhbnMpJGNsdXN0ZXIsCiAgICBIQ1dhcmQgID0gCiAgICAgIGNscyRIQ1dhcmQgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDV2FyZCksCiAgICBIQ1NpbmcgID0gCiAgICAgIGNscyRIQ1NpbmcgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDU2luZyksCiAgICBIQ0NvbXAgID0gCiAgICAgIGNscyRIQ0NvbXAgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDQ29tcCkKICApCgpjbHVzdGVyZWRfdGVzdCAlPiUgIAogIGNsdXN0ZXN0Ojp2YXJfY2x1c3RfYW92KGMoIktNZWFucyIsICJGQ01lYW5zIiwgIkhDV2FyZCIsICJIQ1NpbmciLCAiSENDb21wIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgYygiT25jb3R5cGUiLCAibGVzaW9uIikpICU+JSAKICBmaWx0ZXIocC52YWx1ZSA8PSAwLjA1KQpgYGAKCjwvYnI+CgojIyBIaWVyYXJjaGljYWwgQ2x1c3RlcmluZyBIZWF0bWFwCgpgYGB7cn0KCnBsb3RmdW4gPC0gZnVuY3Rpb24obWV0aG9kKXsKICAKICBjb2xvcnZlYyA8LSByYWluYm93KGJlc3Rfa1tbbWV0aG9kXV0pW2NsdXN0ZXJlZF90ZXN0W1ttZXRob2RdXV0KICAKICBjaW5keCA8LSBnZXQoJ2NvbEluZCcsIHBhcmVudC5mcmFtZSgyKSkKICBtYXJnICA8LSBnZXQoJ21hcmdpbnMnLCBwYXJlbnQuZnJhbWUoMikpCiAgCiAgcGFyKG1hciA9IGMoMC4yNSwgMCwgMCwgbWFyZ1syXSksIGNleCA9IDAuNjYpCiAgaW1hZ2UoY2JpbmQoc2VxX2Fsb25nKGNvbG9ydmVjKSksIGNvbCA9IGNvbG9ydmVjW2NpbmR4XSwgYXhlcyA9IEYpCiAgbXRleHQobWV0aG9kLCBzaWRlID0gNCwgbGluZSA9IDEsIGNleD0wLjQsbGFzPTEsIGNvbD0iYmxhY2siKQp9CgpobWZ1biA8LSBmdW5jdGlvbihkYXRhLCBtZXRob2QpewogIGRhdGEgJT4lIAogICAgdCgpICU+JSAKICAgIGhlYXRtYXAuMihkZW5kcm9ncmFtID0gImNvbCIsIENvbHYgPSBhcy5kZW5kcm9ncmFtKGNsc1tbbWV0aG9kXV0pLCB0cmFjZSA9ICJub25lIiwgCiAgICAgICAgICAgIHNjYWxlID0gIm5vbmUiLCBicmVha3MgPSBzZXEoLTIsIDIsIGxlbmd0aC5vdXQgPSAxMDEpLCAKICAgICAgICAgICAgY29sID0gY29sb3JSYW1wUGFsZXR0ZShjb2xvcnMgPSBjKCJjeWFuIiwiYmx1ZSIsImJsYWNrIiwicmVkIiwieWVsbG93IikpLAogICAgICAgICAgICBkZW5zaXR5LmluZm8gPSAiZGVuc2l0eSIsIGNleFJvdyA9IDAuNCwgZXh0cmFmdW4gPSBmdW5jdGlvbih4KSBwbG90ZnVuKG1ldGhvZCA9IG1ldGhvZCksIAogICAgICAgICAgICBsbWF0ID0gcmJpbmQoYyg0LCAzKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICBjKDAsIDUpLCAKICAgICAgICAgICAgICAgICAgICAgICAgIGMoMiwgMSkpLCAKICAgICAgICAgICAgbHdpZCA9IGMoMS4yLCA0KSwgCiAgICAgICAgICAgIGxoZWkgPSBjKDEuNSwgMC4xNSwgNCkpCn0KCnRlc3QgJVQ+JSAKICBobWZ1bihtZXRob2QgPSAiSENXYXJkIikgJT4lIAogIGhtZnVuKG1ldGhvZCA9ICJIQ0NvbXAiKQoKYGBgCgo=