Main
We built the sample trees from the allele-specific copy number alterations (CNAs) by the MEDICC. The bootstrap values are estimated by the re-sampling CNAs distances. The built trees are visualized by ggtree.
We built the sample trees from the allele-specific copy number alterations (CNAs) by the MEDICC. The bootstrap values are estimated by the re-sampling CNAs distances. The built trees are visualized by ggtree.
library(ggtree)
## Registered S3 method overwritten by 'treeio':
## method from
## root.phylo ape
## ggtree v3.0.1 For help: https://yulab-smu.top/treedata-book/
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## 1. Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi:10.1002/cpbi.96
## 2. Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## 3. Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
library(treeio)
## treeio v1.12.0 For help: https://yulab-smu.github.io/treedata-book/
##
## If you use treeio in published research, please cite:
##
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package for phylogenetic tree input and output with richly annotated and associated data. Molecular Biology and Evolution 2020, 37(2):599-603. doi: 10.1093/molbev/msz240
library(phangorn)
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:treeio':
##
## drop.tip
## The following object is masked from 'package:ggtree':
##
## rotate
library(MEDICCquant)
# set certain colors
colorScale <- c("#3C5488FF", "#00A087FF", "#F39B7fFF",
"#8491B4FF","#E64B35FF","#4DBBD5FF",
"#E41A1C", "#377EB8", "#7F0000",
"#35978f", "#FC8D62", "#2166ac",
"#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#8DD3C7", "#6E016B" ,
"#BEBADA", "#e08214", "#80B1D3",
"#d6604d", "#ffff99", "#FCCDE5",
"#FF6A5A", "#BC80BD", "#CCEBC5" ,
"#fb9a99", "#B6646A", "#9F994E",
"#7570B3" , "#c51b7d" ,"#66A61E" ,
"#E6AB02" , "#003c30", "#666666")
The output of the medicc is the trees with phyloXML formate. In this code, we read phyloXML trees by the ggtree and visualization the trees based on CNV distances.
ggtree: read tree formate: see reference: https://yulab-smu.top/treedata-book/chapter1.html
mtree = read.newick("results/Pan/tree_final.new")
ggtree(mtree, size=1) +
geom_tiplab(size=4) +
geom_treescale(fontsize=6, linesize=1, offset=1)
#add groups
grp <- list(NORMAL = "diploid",
Breast = paste0("Breast_", 1:5),
Coad = paste0("Coad_", 1:5),
Lung = paste0("Lung_", 1:5),
OveryLM = paste0("OveryLM_", 1:5),
OveryRM = paste0("OveryRM_", 1:6),
UterusM = paste0("UterusM_", c(1:7))
)
p_trees <- ggtree(mtree, size=1) +
geom_tiplab(size=4) +
geom_treescale(fontsize=4, linesize=1)
p1 = groupOTU(p_trees, grp, 'Sites') + aes(color=Sites)+
#geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
# shape=21, size=2.5) +
scale_colour_manual(
values = colorScale[29:36]
)+
#geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
# shape=21, size=2.5) +
scale_fill_manual(values=c("black", "grey", "white"),guide="legend",
name="Bootstrap Percentage(BP)",
breaks=c("(90,100]", "(70,90]", "(0,70]"),
labels=expression(BP>=90,70 <= BP * " < 90", BP < 70)) +
theme_tree(legend.position=c(0.8, 0.25))
p1
pdf("pan1.medicc.pdf", width = 8, height = 8)
p1
dev.off()
## svg
## 2
Functions to get the bootstrap values
medicc.resample.distance.matrices = function(D, niter=100) {
result = list()
#pb=txtProgressBar(min=0,max=niter,style=3)
for (s in 1:niter) {
#setTxtProgressBar(pb,s)
Dnew = as.matrix(D)
for (i in 1:nrow(Dnew)) {
for (j in 1:i) {
Dnew[i,j] = rnorm(1, mean=Dnew[i,j], sd=sqrt(Dnew[i,j]))
Dnew[j,i] = Dnew[i,j]
}
}
Dnew[Dnew<0]=0
Dnew=round(Dnew)
Dnew = as.dist(Dnew)
result[[s]] = Dnew
}
return(result)
}
bootstrap.trees = function(dist, bootstrap.rep.num = 1000, title = "Cancer"){
#using NJ to create a new tree with bootstrap values.
bootstrap.rep.num = 1000
getTrees = function(D){
matTree <- nj(D)
root_num <- which(matTree$tip.label == "diploid")
matTree <- root(matTree, root_num)
matTree
}
D = medicc.read.distance.matrix(dist)
colnames(D) =rownames(D)
matTree = getTrees(D)
plot(matTree)
#bootstrap
#resampled trees.
resampled = medicc.resample.distance.matrices(D, bootstrap.rep.num)
bootstrap.value = prop.clades(matTree, lapply(resampled, getTrees) , rooted = is.rooted(matTree))/bootstrap.rep.num*100
#bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){nj(dist.gene(e))},B = bootstrap.rep.num,quiet = TRUE,rooted = TRUE)/(bootstrap.rep.num)*100
plot( matTree, main = title)
nodelabels(bootstrap.value)
#for MesKit to plot trees.
matTree$tip.label[which( matTree$tip.label == "diploid") ] = "NORMAL"
phyloTree = list(
tree = matTree,
bootstrap.value = bootstrap.value[1:(length(bootstrap.value)-1)],
patientID = title
)
phyloTree
}
Get trees and using NJ to get the bootstraps.
phyloTree = bootstrap.trees(dist = "results/Pan/tree_final.dist",
title = "Three"
)
#add groups
mtree = phyloTree$tree
bootstrap.value = phyloTree$bootstrap.value
plot( mtree)
nodelabels(bootstrap.value)
## Warning in XX - width * adj[1]: longer object length is not a multiple of
## shorter object length
## Warning in xl + width: longer object length is not a multiple of shorter object
## length
## Warning in YY - height * adj[2]: longer object length is not a multiple of
## shorter object length
## Warning in yb + height: longer object length is not a multiple of shorter object
## length
#combined bootstrap
bp2 <- data.frame(node=1:(Nnode(mtree)-1) + Ntip(mtree),
bootstrap = bootstrap.value )
mtree <- dplyr::full_join(mtree, bp2, by="node")
grp <- list(NORMAL = "NORMAL",
Breast = paste0("Breast_", 1:5),
Coad = paste0("Coad_", 1:5),
Lung = paste0("Lung_", 1:5),
OveryLM = paste0("OveryLM_", 1:5),
OveryRM = paste0("OveryRM_", 1:6),
UterusM = paste0("UterusM_", c(1:7))
)
p_trees <- ggtree(mtree, size=1) +
geom_tiplab(size=4) +
geom_treescale(fontsize=4, linesize=1)
p1 = groupOTU(p_trees, grp, 'Sites') + aes(color=Sites)+
geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
shape=21, size=2.5) +
#geom_nodelab( aes(label = round(bootstrap)), hjust = -0.2, size = 3.5) +
scale_colour_manual(
values = colorScale[29:36]
)+
geom_nodepoint(aes(fill=cut(bootstrap, c(0, 70, 90, 100))),
shape=21, size=2.5) +
scale_fill_manual(values=c("black", "grey", "white"),guide="legend",
name="Bootstrap Percentage(BP)",
breaks=c("(90,100]", "(70,90]", "(0,70]"),
labels=expression(BP>=90,70 <= BP * " < 90", BP < 70)) +
theme_tree(legend.position=c(0.8, 0.25))
p1
pdf("pan1.medicc-add-bootstrap.pdf", width = 8, height = 8)
p1
dev.off()
## svg
## 2