Load libraries
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(viridis)
# Optional: theme for plots
theme_set(theme_bw())
Load AD files
# Define files
files <- list(
N1 = "N1.AD.txt",
T1 = "T1.AD.txt",
X1 = "X1.AD.txt",
XX1 = "XX1.AD.txt"
)
# Function to read and process AD file
read_ad <- function(file){
read_tsv(file, col_names = c("CHROM","POS","REF","ALT","AD")) %>%
separate(AD, into=c("ref_count","alt_count"), sep=",", convert=TRUE) %>%
mutate(VAF = alt_count / (ref_count + alt_count))
}
# Load all samples into a list
ad_list <- lapply(files, read_ad)
# Name list elements
names(ad_list) <- names(files)
Compute basic
statistics
# Compute basic statistics
vaf_summary <- lapply(ad_list, function(df){
summary(df$VAF)
})
vaf_summary
$N1
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.3596 0.4902 0.5537 0.9951 1.0000 1
$T1
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.3529 0.5000 0.5663 0.9942 1.0000
$X1
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.3626 0.5000 0.5692 1.0000 1.0000 1
$XX1
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.3448 0.5000 0.5650 1.0000 1.0000 3
Plot VAF
distributions
# Combine all VAFs for plotting
vaf_df <- bind_rows(
lapply(names(ad_list), function(n) {
ad_list[[n]] %>% select(VAF) %>% mutate(Sample = n)
})
)
ggplot(vaf_df, aes(x=VAF, fill=Sample)) +
geom_histogram(bins=50, alpha=0.6, position="identity") +
scale_fill_viridis(discrete = TRUE) +
labs(title="VAF Distribution Across Samples",
x="Variant Allele Frequency (VAF)",
y="Number of Variants") +
theme_minimal()

NA
NA
NA
Identify shared
variants
library(dplyr)
# Remove duplicate variants by CHROM and POS
T1_unique <- ad_list$T1 %>% distinct(CHROM, POS, .keep_all = TRUE)
XX1_unique <- ad_list$XX1 %>% distinct(CHROM, POS, .keep_all = TRUE)
# Find shared variants between tumor (T1) and cell line (XX1)
shared_T1_XX1 <- inner_join(
T1_unique %>% select(CHROM, POS),
XX1_unique %>% select(CHROM, POS),
by = c("CHROM", "POS"),
relationship = "many-to-many" # explicitly allow many-to-many
)
cat("Number of shared variants between T1 and XX1:", nrow(shared_T1_XX1), "\n")
Number of shared variants between T1 and XX1: 44650
Compute shared vs
unique variants
get_variant_id <- function(df){
df %>% mutate(VariantID = paste(CHROM, POS, sep="_")) %>% select(VariantID)
}
variant_ids <- lapply(ad_list, get_variant_id)
variant_ids <- lapply(variant_ids, pull)
shared_T1_X1 <- intersect(variant_ids$T1, variant_ids$X1)
shared_T1_XX1 <- intersect(variant_ids$T1, variant_ids$XX1)
unique_T1 <- setdiff(variant_ids$T1, union(variant_ids$X1, variant_ids$XX1))
unique_X1 <- setdiff(variant_ids$X1, variant_ids$T1)
unique_XX1 <- setdiff(variant_ids$XX1, variant_ids$T1)
variant_summary <- data.frame(
Comparison = c("Shared_T1_X1","Shared_T1_XX1","Unique_T1","Unique_X1","Unique_XX1"),
Count = c(length(shared_T1_X1), length(shared_T1_XX1),
length(unique_T1), length(unique_X1), length(unique_XX1))
)
variant_summary
library(VennDiagram)
library(grid)
variants_sets <- list(
T1 = variant_ids$T1,
X1 = variant_ids$X1,
XX1 = variant_ids$XX1
)
venn.diagram(
x = variants_sets,
filename = "Venn_Tumor_CellLines.png",
fill = c("red", "green", "blue"),
alpha = 0.5,
cex = 1.5,
cat.cex = 1.2,
main = "Overlap of Variants: Tumor and Cell Lines"
)
[1] 1
# Use grid.draw to render
grid.newpage()
grid.draw(venn.plot)

VAF Heatmap for Shared
Variants
library(tidyverse)
library(pheatmap)
# Combine all AD data
ad_df <- bind_rows(
lapply(names(ad_list), function(s) {
ad_list[[s]] %>% mutate(Sample = s)
})
) %>%
mutate(VariantID = paste(CHROM, POS, sep="_"))
# Shared variants between tumor and at least one cell line
shared_variants <- union(shared_T1_X1, shared_T1_XX1)
# FIX: Remove duplicates before pivoting
vaf_df <- ad_df %>%
filter(VariantID %in% shared_variants) %>%
group_by(VariantID, Sample) %>%
summarise(VAF = mean(VAF, na.rm = TRUE), .groups = "drop") %>% # FIX
pivot_wider(names_from = Sample, values_from = VAF, values_fill = 0)
# Convert to matrix
vaf_matrix <- as.matrix(vaf_df[,-1])
rownames(vaf_matrix) <- vaf_df$VariantID
# Heatmap
pheatmap(
vaf_matrix,
cluster_rows = TRUE,
cluster_cols = TRUE,
color = colorRampPalette(c("white","red"))(50),
main = "VAF Heatmap: Shared Variants Tumor vs Cell Lines"
)

NA
NA
NA
LS0tCnRpdGxlOiAiV0VTIFZBRiBBbmFseXNpczogUGF0aWVudDEgYW5kIENlbGwgTGluZXMgTDEtTDIiCmF1dGhvcjogIk5hc2lyIE1haG1vb2QgQWJiYXNpIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclQiAlZCwgJVknKWAiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiB0cnVlCiAgICB0aGVtZTogam91cm5hbAotLS0KCiMgTG9hZCBsaWJyYXJpZXMKYGBge3IgLCBtZXNzYWdlPUZBTFNFfQoKbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5cikKbGlicmFyeShyZWFkcikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHZpcmlkaXMpCgojIE9wdGlvbmFsOiB0aGVtZSBmb3IgcGxvdHMKdGhlbWVfc2V0KHRoZW1lX2J3KCkpCgpgYGAKCgoKIyBMb2FkIEFEIGZpbGVzCgpgYGB7ciAsIG1lc3NhZ2U9RkFMU0V9CgojIERlZmluZSBmaWxlcwoKZmlsZXMgPC0gbGlzdCgKTjEgPSAiTjEuQUQudHh0IiwKVDEgPSAiVDEuQUQudHh0IiwKWDEgPSAiWDEuQUQudHh0IiwKWFgxID0gIlhYMS5BRC50eHQiCikKCiMgRnVuY3Rpb24gdG8gcmVhZCBhbmQgcHJvY2VzcyBBRCBmaWxlCgpyZWFkX2FkIDwtIGZ1bmN0aW9uKGZpbGUpewpyZWFkX3RzdihmaWxlLCBjb2xfbmFtZXMgPSBjKCJDSFJPTSIsIlBPUyIsIlJFRiIsIkFMVCIsIkFEIikpICU+JQpzZXBhcmF0ZShBRCwgaW50bz1jKCJyZWZfY291bnQiLCJhbHRfY291bnQiKSwgc2VwPSIsIiwgY29udmVydD1UUlVFKSAlPiUKbXV0YXRlKFZBRiA9IGFsdF9jb3VudCAvIChyZWZfY291bnQgKyBhbHRfY291bnQpKQp9CgojIExvYWQgYWxsIHNhbXBsZXMgaW50byBhIGxpc3QKCmFkX2xpc3QgPC0gbGFwcGx5KGZpbGVzLCByZWFkX2FkKQoKIyBOYW1lIGxpc3QgZWxlbWVudHMKCm5hbWVzKGFkX2xpc3QpIDwtIG5hbWVzKGZpbGVzKQoKCmBgYAoKIyBDb21wdXRlIGJhc2ljIHN0YXRpc3RpY3MKCmBgYHtyICwgbWVzc2FnZT1GQUxTRX0KCiMgQ29tcHV0ZSBiYXNpYyBzdGF0aXN0aWNzCgp2YWZfc3VtbWFyeSA8LSBsYXBwbHkoYWRfbGlzdCwgZnVuY3Rpb24oZGYpewpzdW1tYXJ5KGRmJFZBRikKfSkKCnZhZl9zdW1tYXJ5CgoKYGBgCgoKCiMgUGxvdCBWQUYgZGlzdHJpYnV0aW9ucwoKYGBge3IgLCBtZXNzYWdlPUZBTFNFfQoKIyBDb21iaW5lIGFsbCBWQUZzIGZvciBwbG90dGluZwoKdmFmX2RmIDwtIGJpbmRfcm93cygKbGFwcGx5KG5hbWVzKGFkX2xpc3QpLCBmdW5jdGlvbihuKSB7CmFkX2xpc3RbW25dXSAlPiUgc2VsZWN0KFZBRikgJT4lIG11dGF0ZShTYW1wbGUgPSBuKQp9KQopCgpnZ3Bsb3QodmFmX2RmLCBhZXMoeD1WQUYsIGZpbGw9U2FtcGxlKSkgKwpnZW9tX2hpc3RvZ3JhbShiaW5zPTUwLCBhbHBoYT0wLjYsIHBvc2l0aW9uPSJpZGVudGl0eSIpICsKc2NhbGVfZmlsbF92aXJpZGlzKGRpc2NyZXRlID0gVFJVRSkgKwpsYWJzKHRpdGxlPSJWQUYgRGlzdHJpYnV0aW9uIEFjcm9zcyBTYW1wbGVzIiwKeD0iVmFyaWFudCBBbGxlbGUgRnJlcXVlbmN5IChWQUYpIiwKeT0iTnVtYmVyIG9mIFZhcmlhbnRzIikgKwp0aGVtZV9taW5pbWFsKCkKCgoKYGBgCgojIElkZW50aWZ5IHNoYXJlZCB2YXJpYW50cwoKYGBge3IgLCBtZXNzYWdlPUZBTFNFfQoKbGlicmFyeShkcGx5cikKCiMgUmVtb3ZlIGR1cGxpY2F0ZSB2YXJpYW50cyBieSBDSFJPTSBhbmQgUE9TClQxX3VuaXF1ZSA8LSBhZF9saXN0JFQxICU+JSBkaXN0aW5jdChDSFJPTSwgUE9TLCAua2VlcF9hbGwgPSBUUlVFKQpYWDFfdW5pcXVlIDwtIGFkX2xpc3QkWFgxICU+JSBkaXN0aW5jdChDSFJPTSwgUE9TLCAua2VlcF9hbGwgPSBUUlVFKQoKIyBGaW5kIHNoYXJlZCB2YXJpYW50cyBiZXR3ZWVuIHR1bW9yIChUMSkgYW5kIGNlbGwgbGluZSAoWFgxKQpzaGFyZWRfVDFfWFgxIDwtIGlubmVyX2pvaW4oCiAgVDFfdW5pcXVlICU+JSBzZWxlY3QoQ0hST00sIFBPUyksCiAgWFgxX3VuaXF1ZSAlPiUgc2VsZWN0KENIUk9NLCBQT1MpLAogIGJ5ID0gYygiQ0hST00iLCAiUE9TIiksCiAgcmVsYXRpb25zaGlwID0gIm1hbnktdG8tbWFueSIgICMgZXhwbGljaXRseSBhbGxvdyBtYW55LXRvLW1hbnkKKQoKY2F0KCJOdW1iZXIgb2Ygc2hhcmVkIHZhcmlhbnRzIGJldHdlZW4gVDEgYW5kIFhYMToiLCBucm93KHNoYXJlZF9UMV9YWDEpLCAiXG4iKQoKCgpgYGAKCgoKIyBDb21wdXRlIHNoYXJlZCB2cyB1bmlxdWUgdmFyaWFudHMKCmBgYHtyICwgbWVzc2FnZT1GQUxTRX0KCiMgRnVuY3Rpb24gdG8gZ2V0IENIUk9NK1BPUyBhcyB1bmlxdWUgaWRlbnRpZmllcgpnZXRfdmFyaWFudF9pZCA8LSBmdW5jdGlvbihkZil7CiAgZGYgJT4lIG11dGF0ZShWYXJpYW50SUQgPSBwYXN0ZShDSFJPTSwgUE9TLCBzZXA9Il8iKSkgJT4lIHNlbGVjdChWYXJpYW50SUQpCn0KCiMgR2V0IHZhcmlhbnQgSURzIGZvciBlYWNoIHNhbXBsZQp2YXJpYW50X2lkcyA8LSBsYXBwbHkoYWRfbGlzdCwgZ2V0X3ZhcmlhbnRfaWQpCgojIENvbnZlcnQgbGlzdCB0byBuYW1lZCB2ZWN0b3JzCnZhcmlhbnRfaWRzIDwtIGxhcHBseSh2YXJpYW50X2lkcywgcHVsbCkKCiMgRmluZCBzaGFyZWQgYmV0d2VlbiB0dW1vciBhbmQgZWFjaCBjZWxsIGxpbmUKc2hhcmVkX1QxX1gxIDwtIGludGVyc2VjdCh2YXJpYW50X2lkcyRUMSwgdmFyaWFudF9pZHMkWDEpCnNoYXJlZF9UMV9YWDEgPC0gaW50ZXJzZWN0KHZhcmlhbnRfaWRzJFQxLCB2YXJpYW50X2lkcyRYWDEpCgojIEZpbmQgdmFyaWFudHMgdW5pcXVlIHRvIFQxICh0dW1vcikKdW5pcXVlX1QxIDwtIHNldGRpZmYodmFyaWFudF9pZHMkVDEsIHVuaW9uKHZhcmlhbnRfaWRzJFgxLCB2YXJpYW50X2lkcyRYWDEpKQoKIyBGaW5kIHZhcmlhbnRzIHVuaXF1ZSB0byBlYWNoIGNlbGwgbGluZQp1bmlxdWVfWDEgPC0gc2V0ZGlmZih2YXJpYW50X2lkcyRYMSwgdmFyaWFudF9pZHMkVDEpCnVuaXF1ZV9YWDEgPC0gc2V0ZGlmZih2YXJpYW50X2lkcyRYWDEsIHZhcmlhbnRfaWRzJFQxKQoKIyBTdW1tYXJ5CnZhcmlhbnRfc3VtbWFyeSA8LSBkYXRhLmZyYW1lKAogIENvbXBhcmlzb24gPSBjKCJTaGFyZWRfVDFfWDEiLCJTaGFyZWRfVDFfWFgxIiwiVW5pcXVlX1QxIiwiVW5pcXVlX1gxIiwiVW5pcXVlX1hYMSIpLAogIENvdW50ID0gYyhsZW5ndGgoc2hhcmVkX1QxX1gxKSwgbGVuZ3RoKHNoYXJlZF9UMV9YWDEpLAogICAgICAgICAgICBsZW5ndGgodW5pcXVlX1QxKSwgbGVuZ3RoKHVuaXF1ZV9YMSksIGxlbmd0aCh1bmlxdWVfWFgxKSkKKQpgYGAKCgoKCmBgYHtyICwgZmlnLmhlaWdodD00LCBmaWcud2lkdGg9NH0KCmxpYnJhcnkoVmVubkRpYWdyYW0pCmxpYnJhcnkoZ3JpZCkKCnZhcmlhbnRzX3NldHMgPC0gbGlzdCgKICBUMSA9IHZhcmlhbnRfaWRzJFQxLAogIFgxID0gdmFyaWFudF9pZHMkWDEsCiAgWFgxID0gdmFyaWFudF9pZHMkWFgxCikKCnZlbm4uZGlhZ3JhbSgKICB4ID0gdmFyaWFudHNfc2V0cywKICBmaWxlbmFtZSA9ICJWZW5uX1R1bW9yX0NlbGxMaW5lcy5wbmciLAogIGZpbGwgPSBjKCJyZWQiLCAiZ3JlZW4iLCAiYmx1ZSIpLAogIGFscGhhID0gMC41LAogIGNleCA9IDEuNSwKICBjYXQuY2V4ID0gMS4yLAogIG1haW4gPSAiT3ZlcmxhcCBvZiBWYXJpYW50czogVHVtb3IgYW5kIENlbGwgTGluZXMiCikKCiMgVXNlIGdyaWQuZHJhdyB0byByZW5kZXIKZ3JpZC5uZXdwYWdlKCkKZ3JpZC5kcmF3KHZlbm4ucGxvdCkKCmBgYAojIFZBRiBIZWF0bWFwIGZvciBTaGFyZWQgVmFyaWFudHMKCmBgYHtyICwgbWVzc2FnZT1GQUxTRX0KCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHBoZWF0bWFwKQoKIyBDb21iaW5lIGFsbCBBRCBkYXRhCmFkX2RmIDwtIGJpbmRfcm93cygKICBsYXBwbHkobmFtZXMoYWRfbGlzdCksIGZ1bmN0aW9uKHMpIHsKICAgIGFkX2xpc3RbW3NdXSAlPiUgbXV0YXRlKFNhbXBsZSA9IHMpCiAgfSkKKSAlPiUKICBtdXRhdGUoVmFyaWFudElEID0gcGFzdGUoQ0hST00sIFBPUywgc2VwPSJfIikpCgojIFNoYXJlZCB2YXJpYW50cyBiZXR3ZWVuIHR1bW9yIGFuZCBhdCBsZWFzdCBvbmUgY2VsbCBsaW5lCnNoYXJlZF92YXJpYW50cyA8LSB1bmlvbihzaGFyZWRfVDFfWDEsIHNoYXJlZF9UMV9YWDEpCgojIEZJWDogUmVtb3ZlIGR1cGxpY2F0ZXMgYmVmb3JlIHBpdm90aW5nCnZhZl9kZiA8LSBhZF9kZiAlPiUKICBmaWx0ZXIoVmFyaWFudElEICVpbiUgc2hhcmVkX3ZhcmlhbnRzKSAlPiUKICBncm91cF9ieShWYXJpYW50SUQsIFNhbXBsZSkgJT4lICAgICAgCiAgc3VtbWFyaXNlKFZBRiA9IG1lYW4oVkFGLCBuYS5ybSA9IFRSVUUpLCAuZ3JvdXBzID0gImRyb3AiKSAlPiUgICMgRklYCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IFNhbXBsZSwgdmFsdWVzX2Zyb20gPSBWQUYsIHZhbHVlc19maWxsID0gMCkKCiMgQ29udmVydCB0byBtYXRyaXgKdmFmX21hdHJpeCA8LSBhcy5tYXRyaXgodmFmX2RmWywtMV0pCnJvd25hbWVzKHZhZl9tYXRyaXgpIDwtIHZhZl9kZiRWYXJpYW50SUQKCiMgSGVhdG1hcApwaGVhdG1hcCgKICB2YWZfbWF0cml4LAogIGNsdXN0ZXJfcm93cyA9IFRSVUUsCiAgY2x1c3Rlcl9jb2xzID0gVFJVRSwKICBjb2xvciA9IGNvbG9yUmFtcFBhbGV0dGUoYygid2hpdGUiLCJyZWQiKSkoNTApLAogIG1haW4gPSAiVkFGIEhlYXRtYXA6IFNoYXJlZCBWYXJpYW50cyBUdW1vciB2cyBDZWxsIExpbmVzIgopCgoKCmBgYA==