Main

In this code, we mainly build the mutation tree.

Prepare data

suppressPackageStartupMessages(library(tidyverse))
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
sampABlist = readRDS("data/sampABlist.rds")


GC01 = readRDS("data/GC01.rds")

GC01 = GC01 %>% mutate(Chromosome =  str_c("chr", Chromosome) ) %>%
  mutate(mutation_id = str_c(Chromosome, Start_Position, Reference_Allele, sep = ":")) 
#get all adjust maf.
ccf.data.dscat = sampABlist$entire %>%
  mutate(mutation_id = str_c(chr , from , ref , sep = ":")) %>%
  dplyr::select(mutation_id, ends_with("ccf") & !starts_with("merge") ) %>%
  column_to_rownames(var = "mutation_id")

colnames(ccf.data.dscat) = str_remove( colnames(ccf.data.dscat), "ccf" )

#Filtering mutations which max CCF across samples <= 0.2
ccf.data.dscat.row = apply(ccf.data.dscat, 1, max)
ccf.data.dscat = ccf.data.dscat[ ccf.data.dscat.row>=0.2, ]

ccf.data.dscat[ccf.data.dscat >1] = 1

The heatmap fo all mutations

library(RColorBrewer)
library(pheatmap)

setcolor = colorRampPalette( brewer.pal(n = 7, name = "PuRd") )(7)
pheatmap::pheatmap(ccf.data.dscat, show_rownames = F, color = setcolor )

build sample trees from mutations.

Building phylogenetic tree based on mutations dist between samples. The main function is from MesKit (https://github.com/Niinleslie/MesKit)

library(ape)
library(phangorn)
source("data/phyloTreeObject.R") #get trees from binary data
suppressPackageStartupMessages(library(ggtree))


#CCF >=0.2 and Binary
ccf.data.binary = ccf.data.dscat

ccf.data.binary[ccf.data.binary >= 0.05] = 1
ccf.data.binary[ccf.data.binary < 0.05] = 0

treesinfo = getTrees( ccf.data.binary, method = "MP", bootstrap.rep.num = 2000)

#combined bootstrap
tr = treesinfo$tree
bp2 <- data.frame(node=1:Nnode(tr) + Ntip(tr), 
                  bootstrap = treesinfo$bootstrap.value)
tree <- dplyr::full_join(tr, bp2, by="node")

ggtree(tree) + geom_tiplab() +
  geom_nodelab((aes(label = bootstrap)))

ggtree(tree, size=1) +
  geom_tiplab(size=4) +
  geom_treescale(fontsize=6, linesize=1, offset=1) +
  geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
                 shape=21, size=2.5) +
  theme_tree(legend.position=c(0.8, 0.2))+
  scale_fill_manual(values=c("white", "grey", "black"),guide="legend",
                    name="Bootstrap Percentage(BP)",
                    breaks=c("(90,100]", "(70,90]", "(0,70]"),
                    labels=expression(BP>=90,70 <= BP * " < 90", BP < 70))  

#add groups

grp <- list(NORMAL = "NORMAL",
            Breast =  paste0("Breast_", 1:5),
            Coad = paste0("Coad_", 1:5),
            Lung  = paste0("Lung_", 1:5),
            OveryLM = paste0("OveryLM_", 1:5),
            OveryRM = paste0("OveryRM_", 1:6),
            UterusM = paste0("UterusM_", c(1:7))
)


p_iris <- ggtree(tree, size=1) +
  geom_tiplab(size=4) +
  geom_treescale(fontsize=4, linesize=1) 

p_iris = groupOTU(p_iris, grp, 'Sites') + aes(color=Sites)+
  geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
                 shape=21, size=2.5) +
  scale_colour_manual(
    values  = colorScale[29:36]
  )+
  scale_fill_manual(values=c("black", "grey", "white"),guide="legend",
                    name="Bootstrap Percentage(BP)",
                    breaks=c("(90,100]", "(70,90]", "(0,70]"),
                    labels=expression(BP>=90,70 <= BP * " < 90", BP < 70)) +
  theme_tree(legend.position=c(0.8, 0.25))

p_iris

pdf("plot/Phylotree_all_ccf_0.02.pdf", width = 6, height = 8)
p_iris
dev.off()
## svg 
##   2