Loading libraries and data

library(tidyverse)
library(magrittr)
library(fpc)
library(e1071)
library(gplots)
load("DataOnco/SJ_BC_Data.RData")

Getting dataset ready

feats <- 
  SJ_BC_Data %>% 
  select(-(ImageID:HER2P)) %>% 
  select_if(funs(!anyNA(.))) %>%
  mutate(Oncotype = SJ_BC_Data$Oncotype) %>% 
  filter(!is.na(Oncotype)) %>% 
  mutate_if(is.numeric, scale)

Getting optimal k


res   <- clustest::partition(dataset = feats, train_size = 0.25)
train <- res$train
test  <- res$test

res         <- clustest::find_k(dataset = train, iters = 30, max_k = 6)
k_stats     <- res$k_stats
best_k      <- res$best_k 
all_best_ks <- res$all_best_ks
c_samples   <- res$c_samples

remove(res)

Plotting optimal k s

k_stats %>% 
  ggplot(aes(x = method, y = mean_best_k)) +
    geom_col(aes(fill = method)) +
    guides(fill = F) +
    geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

best_k

Fuzzy c-Means Clustering Bootstrap Interface

cmeansCBI <- function(data, k) {
  result <- cmeans(x = data, center = k)
  
  list(result = result, 
       partition = result$cluster,
       clusterlist = map(1:k, ~. == result$cluster),
       clustermethod = "fcmeans",
       nc = k)
}

Jaccard evaluation of all algorithms

jac <- list(
  KMeans  = clusterboot(train, B = 30, clustermethod = kmeansCBI, k = best_k$KMeans)$bootresult,
  FCMeans = clusterboot(train, B = 30, clustermethod = cmeansCBI, k = best_k$FCMeans)$bootresult,
  HCWard  = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCWard, 
                        method = "ward.D")$bootresult,
  HCSing  = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCSing, 
                        method = "single")$bootresult,
  HCComp  = clusterboot(train, B = 30, clustermethod = hclustCBI, k = best_k$HCComp, 
                        method = "complete")$bootresult)

Plotting stability tests summary

get_stats <- function(jac_mat){
  jac_mat %>% 
    t() %>% 
    as_tibble() %>% 
    summarise_all(funs(ci_min = t.test(.)$conf.int[1],
                       ci_max = t.test(.)$conf.int[2],
                       mean_jaccard = mean(.))) %>% 
    gather() %>% 
    separate(key, into = c("cluster", "stat"), extra = "merge") %>% 
    spread(stat, value)
}
mean_jac <- 
  jac %>% 
  map_df(get_stats, .id = "method") %>% 
  mutate_at(vars(ci_max, ci_min), funs(if_else(is.na(.), 0, .))) 
mean_jac %>% 
  ggplot(aes(x = cluster, y = mean_jaccard)) +
    facet_wrap(~method) +
    geom_col(aes(fill = cluster)) +
    guides(fill = F) +
    geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

mean_jac %>% 
  group_by(method) %>% 
  summarise(ci_min = t.test(mean_jaccard)$conf.int[1], 
            ci_max = t.test(mean_jaccard)$conf.int[2],
            mean_jaccard = mean(mean_jaccard)) %>% 
  ggplot(aes(x = method, y = mean_jaccard)) +
    geom_col(aes(fill = method)) +
    guides(fill = F) +
    geom_errorbar(aes(ymin = ci_min, ymax = ci_max))

ANOVA


Plot training sets ANOVA

a_o_v <- 
  c_samples %>% 
  group_by(sample) %>% 
  do(clustest::var_clust_aov(., methods =  c("KMeans", "FCMeans", "HCWard", "HCSing", "HCComp"), 
                                dep_vars = c("Oncotype", "lesion")))
a_o_v %>% 
  mutate(p.value = log10(p.value)) %>% 
  group_by(dep_var, term) %>% 
  summarise(signif_percent = sum(p.value <= log10(0.05))/n(),
            ci_min = -t.test(p.value)$conf.int[1],
            ci_max = -t.test(p.value)$conf.int[2],
            mean_p_value = -mean(p.value)) %T>%
  print() %>% 
  ggplot(aes(x = term, y = mean_p_value, fill = term)) +
    facet_wrap(~dep_var, ncol = 2) +
    geom_col() +
    guides(fill = F) +
    geom_errorbar(aes(ymin = ci_min, ymax = ci_max)) +
    ylab("-mean log10(p-value)") +
    xlab("method")

Print testing set ANOVA

d_test <- dist(test)
cls <- map(c("ward.D", "single", "complete"), ~hclust(d_test, .))
names(cls) <- c("HCWard", "HCSing", "HCComp")
clustered_test <- 
  test %>% 
  bind_cols(
    KMeans  = kmeans(., best_k$KMeans)$cluster,
    FCMeans = cmeans(., best_k$FCMeans)$cluster,
    HCWard  = 
      cls$HCWard %>% 
      cutree(best_k$HCWard),
    HCSing  = 
      cls$HCSing %>% 
      cutree(best_k$HCSing),
    HCComp  = 
      cls$HCComp %>% 
      cutree(best_k$HCComp)
  )
clustered_test %>%  
  clustest::var_clust_aov(c("KMeans", "FCMeans", "HCWard", "HCSing", "HCComp"),
                          c("Oncotype", "lesion")) %>% 
  filter(p.value <= 0.05)


Hierarchical Clustering Heatmap

plotfun <- function(method){
  
  colorvec <- rainbow(best_k[[method]])[clustered_test[[method]]]
  
  cindx <- get('colInd', parent.frame(2))
  marg  <- get('margins', parent.frame(2))
  
  par(mar = c(0.25, 0, 0, marg[2]), cex = 0.66)
  image(cbind(seq_along(colorvec)), col = colorvec[cindx], axes = F)
  mtext(method, side = 4, line = 1, cex=0.4,las=1, col="black")
}
hmfun <- function(data, method){
  data %>% 
    t() %>% 
    heatmap.2(dendrogram = "col", Colv = as.dendrogram(cls[[method]]), trace = "none", 
            scale = "none", breaks = seq(-2, 2, length.out = 101), 
            col = colorRampPalette(colors = c("cyan","blue","black","red","yellow")),
            density.info = "density", cexRow = 0.4, extrafun = function(x) plotfun(method = method), 
            lmat = rbind(c(4, 3), 
                         c(0, 5), 
                         c(2, 1)), 
            lwid = c(1.2, 4), 
            lhei = c(1.5, 0.15, 4))
}
test %T>% 
  hmfun(method = "HCWard") %>% 
  hmfun(method = "HCComp")

LS0tCnRpdGxlOiAiQkNfQ2x1c3QiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgpMb2FkaW5nIGxpYnJhcmllcyBhbmQgZGF0YQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KGZwYykKbGlicmFyeShlMTA3MSkKbGlicmFyeShncGxvdHMpCgpsb2FkKCJEYXRhT25jby9TSl9CQ19EYXRhLlJEYXRhIikKCmBgYAoKR2V0dGluZyBkYXRhc2V0IHJlYWR5CgpgYGB7cn0KCmZlYXRzIDwtIAogIFNKX0JDX0RhdGEgJT4lIAogIHNlbGVjdCgtKEltYWdlSUQ6SEVSMlApKSAlPiUgCiAgc2VsZWN0X2lmKGZ1bnMoIWFueU5BKC4pKSkgJT4lCiAgbXV0YXRlKE9uY290eXBlID0gU0pfQkNfRGF0YSRPbmNvdHlwZSkgJT4lIAogIGZpbHRlcighaXMubmEoT25jb3R5cGUpKSAlPiUgCiAgbXV0YXRlX2lmKGlzLm51bWVyaWMsIHNjYWxlKQoKYGBgCgpHZXR0aW5nIG9wdGltYWwgawoKYGBge3J9CgpyZXMgICA8LSBjbHVzdGVzdDo6cGFydGl0aW9uKGRhdGFzZXQgPSBmZWF0cywgdHJhaW5fc2l6ZSA9IDAuMjUpCnRyYWluIDwtIHJlcyR0cmFpbgp0ZXN0ICA8LSByZXMkdGVzdAoKcmVzICAgICAgICAgPC0gY2x1c3Rlc3Q6OmZpbmRfayhkYXRhc2V0ID0gdHJhaW4sIGl0ZXJzID0gMzAsIG1heF9rID0gNikKa19zdGF0cyAgICAgPC0gcmVzJGtfc3RhdHMKYmVzdF9rICAgICAgPC0gcmVzJGJlc3RfayAKYWxsX2Jlc3Rfa3MgPC0gcmVzJGFsbF9iZXN0X2tzCmNfc2FtcGxlcyAgIDwtIHJlcyRjX3NhbXBsZXMKCnJlbW92ZShyZXMpCgpgYGAKCgpQbG90dGluZyBvcHRpbWFsIF9rXyBzCgpgYGB7cn0KCmtfc3RhdHMgJT4lIAogIGdncGxvdChhZXMoeCA9IG1ldGhvZCwgeSA9IG1lYW5fYmVzdF9rKSkgKwogICAgZ2VvbV9jb2woYWVzKGZpbGwgPSBtZXRob2QpKSArCiAgICBndWlkZXMoZmlsbCA9IEYpICsKICAgIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBjaV9taW4sIHltYXggPSBjaV9tYXgpKQoKYmVzdF9rCmBgYAoKCkZ1enp5IF9jXy1NZWFucyBDbHVzdGVyaW5nIEJvb3RzdHJhcCBJbnRlcmZhY2UKCmBgYHtyfQpjbWVhbnNDQkkgPC0gZnVuY3Rpb24oZGF0YSwgaykgewogIHJlc3VsdCA8LSBjbWVhbnMoeCA9IGRhdGEsIGNlbnRlciA9IGspCiAgCiAgbGlzdChyZXN1bHQgPSByZXN1bHQsIAogICAgICAgcGFydGl0aW9uID0gcmVzdWx0JGNsdXN0ZXIsCiAgICAgICBjbHVzdGVybGlzdCA9IG1hcCgxOmssIH4uID09IHJlc3VsdCRjbHVzdGVyKSwKICAgICAgIGNsdXN0ZXJtZXRob2QgPSAiZmNtZWFucyIsCiAgICAgICBuYyA9IGspCn0KYGBgCgpKYWNjYXJkIGV2YWx1YXRpb24gb2YgYWxsIGFsZ29yaXRobXMKCmBgYHtyfQpqYWMgPC0gbGlzdCgKICBLTWVhbnMgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGttZWFuc0NCSSwgayA9IGJlc3RfayRLTWVhbnMpJGJvb3RyZXN1bHQsCiAgRkNNZWFucyA9IGNsdXN0ZXJib290KHRyYWluLCBCID0gMzAsIGNsdXN0ZXJtZXRob2QgPSBjbWVhbnNDQkksIGsgPSBiZXN0X2skRkNNZWFucykkYm9vdHJlc3VsdCwKICBIQ1dhcmQgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ1dhcmQsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAid2FyZC5EIikkYm9vdHJlc3VsdCwKICBIQ1NpbmcgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ1NpbmcsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAic2luZ2xlIikkYm9vdHJlc3VsdCwKICBIQ0NvbXAgID0gY2x1c3RlcmJvb3QodHJhaW4sIEIgPSAzMCwgY2x1c3Rlcm1ldGhvZCA9IGhjbHVzdENCSSwgayA9IGJlc3RfayRIQ0NvbXAsIAogICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAiY29tcGxldGUiKSRib290cmVzdWx0KQpgYGAKClBsb3R0aW5nIHN0YWJpbGl0eSB0ZXN0cyBzdW1tYXJ5CgpgYGB7cn0KZ2V0X3N0YXRzIDwtIGZ1bmN0aW9uKGphY19tYXQpewogIGphY19tYXQgJT4lIAogICAgdCgpICU+JSAKICAgIGFzX3RpYmJsZSgpICU+JSAKICAgIHN1bW1hcmlzZV9hbGwoZnVucyhjaV9taW4gPSB0LnRlc3QoLikkY29uZi5pbnRbMV0sCiAgICAgICAgICAgICAgICAgICAgICAgY2lfbWF4ID0gdC50ZXN0KC4pJGNvbmYuaW50WzJdLAogICAgICAgICAgICAgICAgICAgICAgIG1lYW5famFjY2FyZCA9IG1lYW4oLikpKSAlPiUgCiAgICBnYXRoZXIoKSAlPiUgCiAgICBzZXBhcmF0ZShrZXksIGludG8gPSBjKCJjbHVzdGVyIiwgInN0YXQiKSwgZXh0cmEgPSAibWVyZ2UiKSAlPiUgCiAgICBzcHJlYWQoc3RhdCwgdmFsdWUpCn0KCm1lYW5famFjIDwtIAogIGphYyAlPiUgCiAgbWFwX2RmKGdldF9zdGF0cywgLmlkID0gIm1ldGhvZCIpICU+JSAKICBtdXRhdGVfYXQodmFycyhjaV9tYXgsIGNpX21pbiksIGZ1bnMoaWZfZWxzZShpcy5uYSguKSwgMCwgLikpKSAKCm1lYW5famFjICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBjbHVzdGVyLCB5ID0gbWVhbl9qYWNjYXJkKSkgKwogICAgZmFjZXRfd3JhcCh+bWV0aG9kKSArCiAgICBnZW9tX2NvbChhZXMoZmlsbCA9IGNsdXN0ZXIpKSArCiAgICBndWlkZXMoZmlsbCA9IEYpICsKICAgIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBjaV9taW4sIHltYXggPSBjaV9tYXgpKQoKbWVhbl9qYWMgJT4lIAogIGdyb3VwX2J5KG1ldGhvZCkgJT4lIAogIHN1bW1hcmlzZShjaV9taW4gPSB0LnRlc3QobWVhbl9qYWNjYXJkKSRjb25mLmludFsxXSwgCiAgICAgICAgICAgIGNpX21heCA9IHQudGVzdChtZWFuX2phY2NhcmQpJGNvbmYuaW50WzJdLAogICAgICAgICAgICBtZWFuX2phY2NhcmQgPSBtZWFuKG1lYW5famFjY2FyZCkpICU+JSAKICBnZ3Bsb3QoYWVzKHggPSBtZXRob2QsIHkgPSBtZWFuX2phY2NhcmQpKSArCiAgICBnZW9tX2NvbChhZXMoZmlsbCA9IG1ldGhvZCkpICsKICAgIGd1aWRlcyhmaWxsID0gRikgKwogICAgZ2VvbV9lcnJvcmJhcihhZXMoeW1pbiA9IGNpX21pbiwgeW1heCA9IGNpX21heCkpCmBgYAoKIyMgQU5PVkEKCjwvYnI+CgpQbG90IHRyYWluaW5nIHNldHMgQU5PVkEKCmBgYHtyfQphX29fdiA8LSAKICBjX3NhbXBsZXMgJT4lIAogIGdyb3VwX2J5KHNhbXBsZSkgJT4lIAogIGRvKGNsdXN0ZXN0Ojp2YXJfY2x1c3RfYW92KC4sIG1ldGhvZHMgPSAgYygiS01lYW5zIiwgIkZDTWVhbnMiLCAiSENXYXJkIiwgIkhDU2luZyIsICJIQ0NvbXAiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZGVwX3ZhcnMgPSBjKCJPbmNvdHlwZSIsICJsZXNpb24iKSkpCgphX29fdiAlPiUgCiAgbXV0YXRlKHAudmFsdWUgPSBsb2cxMChwLnZhbHVlKSkgJT4lIAogIGdyb3VwX2J5KGRlcF92YXIsIHRlcm0pICU+JSAKICBzdW1tYXJpc2Uoc2lnbmlmX3BlcmNlbnQgPSBzdW0ocC52YWx1ZSA8PSBsb2cxMCgwLjA1KSkvbigpLAogICAgICAgICAgICBjaV9taW4gPSAtdC50ZXN0KHAudmFsdWUpJGNvbmYuaW50WzFdLAogICAgICAgICAgICBjaV9tYXggPSAtdC50ZXN0KHAudmFsdWUpJGNvbmYuaW50WzJdLAogICAgICAgICAgICBtZWFuX3BfdmFsdWUgPSAtbWVhbihwLnZhbHVlKSkgJVQ+JQogIHByaW50KCkgJT4lIAogIGdncGxvdChhZXMoeCA9IHRlcm0sIHkgPSBtZWFuX3BfdmFsdWUsIGZpbGwgPSB0ZXJtKSkgKwogICAgZmFjZXRfd3JhcCh+ZGVwX3ZhciwgbmNvbCA9IDIpICsKICAgIGdlb21fY29sKCkgKwogICAgZ3VpZGVzKGZpbGwgPSBGKSArCiAgICBnZW9tX2Vycm9yYmFyKGFlcyh5bWluID0gY2lfbWluLCB5bWF4ID0gY2lfbWF4KSkgKwogICAgeWxhYigiLW1lYW4gbG9nMTAocC12YWx1ZSkiKSArCiAgICB4bGFiKCJtZXRob2QiKQpgYGAKClByaW50IHRlc3Rpbmcgc2V0IEFOT1ZBCgpgYGB7cn0KCmRfdGVzdCA8LSBkaXN0KHRlc3QpCgpjbHMgPC0gbWFwKGMoIndhcmQuRCIsICJzaW5nbGUiLCAiY29tcGxldGUiKSwgfmhjbHVzdChkX3Rlc3QsIC4pKQpuYW1lcyhjbHMpIDwtIGMoIkhDV2FyZCIsICJIQ1NpbmciLCAiSENDb21wIikKCmNsdXN0ZXJlZF90ZXN0IDwtIAogIHRlc3QgJT4lIAogIGJpbmRfY29scygKICAgIEtNZWFucyAgPSBrbWVhbnMoLiwgYmVzdF9rJEtNZWFucykkY2x1c3RlciwKICAgIEZDTWVhbnMgPSBjbWVhbnMoLiwgYmVzdF9rJEZDTWVhbnMpJGNsdXN0ZXIsCiAgICBIQ1dhcmQgID0gCiAgICAgIGNscyRIQ1dhcmQgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDV2FyZCksCiAgICBIQ1NpbmcgID0gCiAgICAgIGNscyRIQ1NpbmcgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDU2luZyksCiAgICBIQ0NvbXAgID0gCiAgICAgIGNscyRIQ0NvbXAgJT4lIAogICAgICBjdXRyZWUoYmVzdF9rJEhDQ29tcCkKICApCgpjbHVzdGVyZWRfdGVzdCAlPiUgIAogIGNsdXN0ZXN0Ojp2YXJfY2x1c3RfYW92KGMoIktNZWFucyIsICJGQ01lYW5zIiwgIkhDV2FyZCIsICJIQ1NpbmciLCAiSENDb21wIiksCiAgICAgICAgICAgICAgICAgICAgICAgICAgYygiT25jb3R5cGUiLCAibGVzaW9uIikpICU+JSAKICBmaWx0ZXIocC52YWx1ZSA8PSAwLjA1KQpgYGAKCjwvYnI+CgojIyBIaWVyYXJjaGljYWwgQ2x1c3RlcmluZyBIZWF0bWFwCgpgYGB7cn0KCnBsb3RmdW4gPC0gZnVuY3Rpb24obWV0aG9kKXsKICAKICBjb2xvcnZlYyA8LSByYWluYm93KGJlc3Rfa1tbbWV0aG9kXV0pW2NsdXN0ZXJlZF90ZXN0W1ttZXRob2RdXV0KICAKICBjaW5keCA8LSBnZXQoJ2NvbEluZCcsIHBhcmVudC5mcmFtZSgyKSkKICBtYXJnICA8LSBnZXQoJ21hcmdpbnMnLCBwYXJlbnQuZnJhbWUoMikpCiAgCiAgcGFyKG1hciA9IGMoMC4yNSwgMCwgMCwgbWFyZ1syXSksIGNleCA9IDAuNjYpCiAgaW1hZ2UoY2JpbmQoc2VxX2Fsb25nKGNvbG9ydmVjKSksIGNvbCA9IGNvbG9ydmVjW2NpbmR4XSwgYXhlcyA9IEYpCiAgbXRleHQobWV0aG9kLCBzaWRlID0gNCwgbGluZSA9IDEsIGNleD0wLjQsbGFzPTEsIGNvbD0iYmxhY2siKQp9CgpobWZ1biA8LSBmdW5jdGlvbihkYXRhLCBtZXRob2QpewogIGRhdGEgJT4lIAogICAgdCgpICU+JSAKICAgIGhlYXRtYXAuMihkZW5kcm9ncmFtID0gImNvbCIsIENvbHYgPSBhcy5kZW5kcm9ncmFtKGNsc1tbbWV0aG9kXV0pLCB0cmFjZSA9ICJub25lIiwgCiAgICAgICAgICAgIHNjYWxlID0gIm5vbmUiLCBicmVha3MgPSBzZXEoLTIsIDIsIGxlbmd0aC5vdXQgPSAxMDEpLCAKICAgICAgICAgICAgY29sID0gY29sb3JSYW1wUGFsZXR0ZShjb2xvcnMgPSBjKCJjeWFuIiwiYmx1ZSIsImJsYWNrIiwicmVkIiwieWVsbG93IikpLAogICAgICAgICAgICBkZW5zaXR5LmluZm8gPSAiZGVuc2l0eSIsIGNleFJvdyA9IDAuNCwgZXh0cmFmdW4gPSBmdW5jdGlvbih4KSBwbG90ZnVuKG1ldGhvZCA9IG1ldGhvZCksIAogICAgICAgICAgICBsbWF0ID0gcmJpbmQoYyg0LCAzKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICBjKDAsIDUpLCAKICAgICAgICAgICAgICAgICAgICAgICAgIGMoMiwgMSkpLCAKICAgICAgICAgICAgbHdpZCA9IGMoMS4yLCA0KSwgCiAgICAgICAgICAgIGxoZWkgPSBjKDEuNSwgMC4xNSwgNCkpCn0KCnRlc3QgJVQ+JSAKICBobWZ1bihtZXRob2QgPSAiSENXYXJkIikgJT4lIAogIGhtZnVuKG1ldGhvZCA9ICJIQ0NvbXAiKQoKYGBgCgo=