Main
In this code, we want to show the main difference between samples.
Prepare data
library(MesKit)
library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.4.3
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(patchwork)
library(ggpubr)
library(ggrepel)
sampABlist= readRDS("data/sampABlist.rds")
data.type = "split"
#between regions.
colorScale <- c("#3C5488FF", "#00A087FF", "#F39B7fFF",
"#8491B4FF","#E64B35FF","#4DBBD5FF",
"#E41A1C", "#377EB8", "#7F0000",
"#35978f", "#FC8D62", "#2166ac",
"#E78AC3", "#A6D854", "#FFD92F",
"#E5C494", "#8DD3C7", "#6E016B" ,
"#BEBADA", "#e08214", "#80B1D3",
"#d6604d", "#ffff99", "#FCCDE5",
"#FF6A5A", "#BC80BD", "#CCEBC5" ,
"#fb9a99", "#B6646A", "#9F994E",
"#7570B3" , "#c51b7d" ,"#66A61E" ,
"#E6AB02" , "#003c30", "#666666")
cols_samples = setNames(
colorScale[31:36],
nm = c("Breast","Coad","Lung","OveryLM","OveryRM","UterusM")
)
1 Plot putative driver mutations
Driver genes of CRC collected from [IntOGen] (https://www.intogen.org/search) (v.2020.2)
# Maf object with CCF information
data.type = "split"
maf <- readMaf(mafFile = sprintf( "MesKit/meskit.%s.mutation.txt", data.type),
ccfFile = sprintf("MesKit/meskit.%s.CCF.txt", data.type),
clinicalFile = sprintf("MesKit/meskit.%s.clinical.txt", data.type),
refBuild = "hg19",
ccf.conf.level = 0.95
)
driverGene = read.delim("data/IntOGen-Drivers-Cancer_Genes.tsv", header = T) %>%
filter(CANCER_TYPE %in% c("BRCA","COREAD","LUAD", "LUSC") ) %>%
pull(SYMBOL) %>% unique()
driverGene = driverGene[!driverGene %in% c("BRCA1")]
mut.class <- classifyMut(maf, class = "SP", patient.id = 'Breast')
head(mut.class)
plotMutProfile(maf, class = "SPCS", geneList = driverGene, use.tumorSampleLabel = TRUE, removeEmptyCols = FALSE)
pdf("MesKit/MutProfile.pdf", width = 10, height = 6)
plotMutProfile(maf, class = "SPCS", geneList = driverGene, use.tumorSampleLabel = TRUE, removeEmptyCols = FALSE)
dev.off()
## svg
## 2
2 Plot the total mutation numbers
#Get the summary of each mutations.
#get data
maf_input = subMaf(maf, mafObj = FALSE, use.tumorSampleLabel = TRUE)
#get mutation classifications.
maf_class = classifyMut(maf, patient.id = NULL, class = "SPCS", classByTumor = FALSE)
maf_merge = list()
for(i in names(maf_input) ){
message(i)
maf_merge[[i]] = maf_input[[i]] %>%
mutate(Mut_ID = str_c(Hugo_Symbol, Chromosome, Start_Position, Reference_Allele, Tumor_Seq_Allele2, sep = ":" ) ) %>%
left_join(
maf_class[[i]]
) %>%
dplyr::select(
Hugo_Symbol, Chromosome, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode, Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF
)
}
## Breast
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
## Coad
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
## Lung
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
## OveryLM
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
## OveryRM
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
## UterusM
## Joining, by = c("Tumor_Sample_Barcode", "Patient_ID", "Mut_ID")
maf_merge <- purrr::reduce(maf_merge, rbind)
cols_types = setNames(c("#00A087FF", "#3C5488FF", "#8491B4FF", "#F39B7FFF", "#E64B35FF", "#4DBBD5FF")
, nm = c("Public_Clonal","Public_Subclonal","Shared_Clonal","Shared_Subclonal","Private_Clonal","Private_Subclonal") )
p1 = maf_merge %>%
mutate(Mutation_Type = factor(Mutation_Type, levels = names(cols_types) ) ) %>%
ggplot() + theme_classic() +
geom_bar(aes(x = Tumor_Sample_Barcode , fill = Mutation_Type) ) +
facet_grid( ~ Patient_ID, scales = "free_x", space = "free_x" ) +
scale_fill_manual(
values = cols_types
) +
labs(y = "Mutation Counts") +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, size = 14, hjust = 0.5)
)
p1
pdf("MesKit/MutNumber.pdf", width = 10, height = 4)
p1
dev.off()
## svg
## 2
#Proportion of driver mutations across different groups.
mutation_group = data.frame(
Mutation_Type = c("Public_Clonal","Public_Subclonal","Shared_Clonal","Shared_Subclonal","Private_Clonal","Private_Subclonal"),
Type = c("Shared_Clonal",NA,"Shared_Clonal",NA,"Private_Clonal","Private_Subclonal")
)
maf_merge1 = maf_merge %>%
mutate(
is.driver = ifelse( (Hugo_Symbol %in% driverGene) & ( !Variant_Classification %in% c("Silent", "3'Flank", "IGR", "Intron", "RNA") ) , TRUE, FALSE)
) %>%
left_join(mutation_group) %>%
filter(!is.na(Type))
## Joining, by = "Mutation_Type"
3 Measurement of the neutral evolution test
library(neutralitytestr)
#tt = neutralitytest(VAFselection)
#plot_all(tt)
neutral = maf_merge %>%
#filter(!Mutation_Type %in% c("Public_Clonal") ) %>%
group_by(Tumor_Sample_Barcode) %>%
summarise(
num = sum(VAF >= 0.1),
R2 = as.numeric(neutralitytestr::neutralitytest(VAF, fmin = 0.1, fmax = 0.22 )$rsq[1]),
R2_pval = as.numeric( neutralitytestr::neutralitytest(VAF, fmin = 0.1, fmax = 0.22)$rsq[2])
) %>%
mutate(
Patient_ID = mapply(function(x) x[1], str_split(Tumor_Sample_Barcode, "_") )
)
p1 = neutral %>%
ggplot(aes(x = Patient_ID, y = R2, col = Patient_ID, shape = Patient_ID) ) +
ggpubr::theme_classic2() +
geom_violin() +
geom_jitter(width = 0.2, size = 2) +
labs(x = NULL, y = latex2exp::TeX("$R^{2}$") ) +
scale_color_manual( values = cols_samples ) +
geom_hline( yintercept = 0.98, linetype = 2, size = 1) +
theme(
legend.position = "none",
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = 1, angle = 45)
)
p1
pdf("MesKit/NeutralTest.pdf", width = 5, height = 3.5)
p1
dev.off()
## svg
## 2
4 Measurement of intra-tumor heterogeneity
#within tumors.
scores = mathScore(maf, min.vaf = 0.05) %>%
purrr::reduce(rbind)
p1 = scores %>%
ggplot(aes(x = Patient_ID, y = MATH_Score, col = Patient_ID, shape = Patient_ID) ) + theme_classic2() +
geom_violin() +
geom_jitter(width = 0.2, size = 2) +
labs(x = NULL, y = "MATH scores" ) +
scale_color_manual( values = cols_samples ) +
theme(
legend.position = "none",
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = 1, angle = 45)
)
pdf("MesKit/ITH.MATHscores.pdf", width = 5, height = 3.5)
p1
dev.off()
## svg
## 2
scores.ccfs = ccfAUC(maf, use.tumorSampleLabel = TRUE, min.ccf = 0.1)
scores.ccfs = lapply(scores.ccfs, function(x) x$AUC.value ) %>%
purrr::reduce(rbind)
p1 = scores.ccfs %>%
ggplot(aes(x = Patient_ID, y = AUC, col = Patient_ID, shape = Patient_ID) ) + theme_classic2() +
geom_violin() +
geom_jitter(width = 0.2, size = 2) +
labs(x = NULL, y = "AUC (ccf)" ) +
scale_color_manual( values = cols_samples ) +
theme(
legend.position = "none",
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = 1, angle = 45)
)
p1
pdf("MesKit/ITH.AUCccf.pdf", width = 5, height = 3.5)
p1
dev.off()
## svg
## 2
5 Between Regions different, Fst and Nei’s distance
library(ggpubr)
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
scores.Fst = calFst(maf, plot = TRUE, use.tumorSampleLabel = TRUE,
withinTumor = FALSE, number.cex = 10)
Fst.data = list()
for(i in names(scores.Fst) ){
Fst = scores.Fst[[i]]$Fst.pair
Fst = Fst[lower.tri(Fst)]
Fst.data[[i]] = data.frame(
Fst = Fst,
tumor = i
)
}
Fst.data = purrr::reduce(Fst.data, rbind)
#statistical test
stat.test = Fst.data %>%
wilcox_test(Fst ~ tumor) %>%
add_significance("p") %>%
add_xy_position(fun ="mean") %>%
dplyr::filter(group1 == "Coad" & group2 != "Lung")
p1 = Fst.data %>%
ggplot(aes(x = tumor, y = Fst) ) + theme_classic2() +
geom_boxplot(aes(col = tumor, shape = tumor), width = 0.6 ) +
geom_jitter(aes(col = tumor, shape = tumor),
width = 0.1, size = 2) +
labs(x = NULL, y = "Fst" ) +
scale_color_manual( values = cols_samples ) +
stat_pvalue_manual(stat.test, label = "p.signif") +
theme(
legend.position = "none",
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = 1, angle = 45)
)
p1
pdf("MesKit/ITH.Fst-1.pdf", width = 7, height = 5.2)
p1
dev.off()
## svg
## 2
##Nei's distance.
scores.Nei = calNeiDist(maf, plot = TRUE, use.tumorSampleLabel = TRUE,
withinTumor = FALSE, number.cex = 10)
Nei.data = list()
for(i in names(scores.Nei) ){
Nei = scores.Nei[[i]]$Nei.dist
Nei = Nei[lower.tri(Nei)]
Nei.data[[i]] = data.frame(
Nei = Nei,
tumor = i
)
}
Nei.data = purrr::reduce(Nei.data, rbind)
p1 = Nei.data %>%
ggplot(aes(x = tumor, y = Nei, col = tumor, shape = tumor) ) + theme_classic2() +
geom_violin() +
geom_jitter(width = 0.2, size = 2) +
labs(x = NULL, y = "Nei's distance" ) +
scale_color_manual( values = cols_samples ) +
theme(
legend.position = "none",
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = 1, angle = 45)
)
p1
pdf("MesKit/ITH.Nei.distance.pdf", width = 5, height = 3.5)
p1
dev.off()
## svg
## 2
6 Metastatic routes inference
# retains samples from COAD
# Maf object with CCF information
data.type = "split1"
maf <- readMaf(mafFile = sprintf( "MesKit/meskit.%s.mutation.txt", data.type),
ccfFile = sprintf("MesKit/meskit.%s.CCF.txt", data.type),
clinicalFile = sprintf("MesKit/meskit.%s.clinical.txt", data.type),
refBuild = "hg19",
ccf.conf.level = 0.95
)
JSI.res <- calJSI(maf, patient.id = 'Met1', pairByTumor = TRUE, min.ccf = 0.02,
use.adjVAF = TRUE, use.indel = FALSE, use.tumorSampleLabel = TRUE)
names(JSI.res)
## [1] "JSI.multi" "JSI.pair"
ccf.list <- compareCCF(maf, patient.id = 'Met1', pairByTumor = TRUE, min.ccf = 0.02,
use.adjVAF = TRUE, use.indel = FALSE)
References: Quantitative evidence for early metastatic seeding in colorectal cancer
Calculate the H(Lm and Lp) index for the early or layer divergence.
# calHindex
#' @param PrimaryId Primary tumor IDs to indicate the primary-metastases relationships.
#' @param CCF_cutoff The minimal cutoffs for the present status.
calHindex = function(ccf.list , JSI.res, maf_merge1, PrimaryId = "Coad", CCF_cutoff = 0.25 ){
plist = list()
sample.pairs = names(ccf.list)[grepl(PrimaryId , names(ccf.list))]
stats = list()
for(i in sample.pairs ){
nums = c()
message(i)
pairs = as.character( str_split(i, "-", simplify = T))
JSI = JSI.res$JSI.pair[pairs[1],pairs[2]]
#merge driver info
Met1_C_OL = ccf.list[[i]] %>%
left_join( maf_merge1 %>%
mutate(Mut_ID = str_c(Chromosome, Start_Position, Reference_Allele , Tumor_Seq_Allele2, sep = ":") ) %>%
dplyr::select(Mut_ID, is.driver) %>%
unique.data.frame()
)
Met1_group = Met1_C_OL %>%
dplyr::select( all_of(c(PrimaryId, pairs[!pairs %in% PrimaryId ]))) %>%
setNames(nm = c("Primary","Metastases")) %>%
#Set Lp and Lm
mutate(
mut_group = ifelse(Primary>=CCF_cutoff & Metastases<CCF_cutoff, "Lp",
ifelse(Metastases >= CCF_cutoff & Primary<CCF_cutoff, "Lm",
ifelse(Primary>=CCF_cutoff & Metastases >= CCF_cutoff, "La", "Ls")
) )) %>%
mutate(
S1 = ifelse(mut_group == "Lp" & Primary >=0.1, 1, 0 ),
S2 = ifelse(mut_group == "Lp" & Primary >=0.2, 1, 0 ),
S3 = ifelse(mut_group == "Lp" & Primary >=0.4, 1, 0 ),
S4 = ifelse(mut_group == "Lp" & Primary >=0.6, 1, 0 ),
S5 = ifelse(mut_group == "Lm" & Metastases >=0.1, 1, 0 ),
S6 = ifelse(mut_group == "Lm" & Metastases >=0.2, 1, 0 ),
S7 = ifelse(mut_group == "Lm" & Metastases >=0.4, 1, 0 ),
S8 = ifelse(mut_group == "Lm" & Metastases >=0.6, 1, 0 ),
S9 = ifelse( Metastases >0.6 & Primary <0.6 & Primary> CCF_cutoff , 1, 0 )
)
nums[1] = sum(Met1_group$mut_group == "Lp")
nums[2] = sum(Met1_group$mut_group == "Lm")
nums[3] = sum(Met1_group$mut_group == "La")
nums[4] = sum(Met1_group$mut_group == "Ls")
nums = setNames(nums, c("Lp","Lm","La","Ls") )
nums = c(nums,
Met1_group %>%
dplyr::select(all_of( paste0("S",1:9) ) ) %>%
colSums())
stats[[i]] = nums
plist[[i]] = Met1_C_OL %>%
ggplot(aes_string(x = pairs[1] , y = pairs[2]) ) + theme_classic2() +
stat_density2d(aes(fill = ..density..^2), geom = "tile", contour = FALSE, n = 100) +
scale_fill_continuous(low = "white", high = "red") +
geom_hline(yintercept = CCF_cutoff, linetype = 2, size = 0.5) +
geom_vline(xintercept = CCF_cutoff, linetype = 2, size = 0.5) +
geom_point(alpha = 0.5) +
#geom_point(col = "blue", data = Met1_C_OL %>% filter(is.driver), size = 3, shape = 5 ) +
#geom_label_repel(aes(label = Hugo_Symbol), data = Met1_C_OL %>% filter(is.driver),
# nudge_x = 0.1, nudge_y = 0.1
#) +
labs(title = sprintf("JSI = %.3f\nH = %.3f", JSI, nums[2]/nums[1]) ) +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5)
)
}
return( list(plist = plist,
stats = stats
)
)
}
Hindex = calHindex(ccf.list = ccf.list$Met1, JSI.res, maf_merge1,
PrimaryId = "Coad",
CCF_cutoff = 0.2 )
## Coad-OveryLM
## Joining, by = "Mut_ID"
## Coad-OveryRM
## Joining, by = "Mut_ID"
## Coad-UterusM
## Joining, by = "Mut_ID"
wrap_plots(Hindex$plist) + plot_layout(nrow = 1)
pdf("MesKit/JSI.Met1.Paired-2.pdf", width = 8*1.1, height = 3.5*1.1)
wrap_plots(Hindex$plist) + plot_layout(nrow = 1)
dev.off()
## svg
## 2
stats = Hindex$stats %>%
purrr::reduce(rbind) %>%
as.data.frame() %>%
mutate(Tumor = names(Hindex$stats))
# inference the parameters of u and Nd
# see https://github.com/cancersysbio/SCIMET for parameter estimation.
write.table(stats,
file = "MesKit/CRC.summary_statistics.txt",
row.names = F, quote = F, sep = "\t"
)