Introduction
Sample Information: * N1: Normal
(Constitutional DNA) * T1: Primary Tumor (Fresh Sézary
Cells) * X1: In-vivo derived Cell Line (from Mouse) *
XX1: Secondary Cell Line (cultured from X1)
Variant Allele
Frequency (VAF) Distribution Plot
library(vcfR)
library(ggplot2)
library(dplyr)
library(tidyr)
# Define paths
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"
extract_vaf <- function(vcf_file, sample_name) {
vcf <- read.vcfR(vcf_file, verbose = FALSE)
pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
gt_info <- extract.gt(pass_vcf)
ad <- extract.gt(pass_vcf, element = "AD")
vaf_values <- sapply(ad[, sample_name], function(x) {
if (is.na(x)) return(NA)
depths <- as.numeric(strsplit(x, ",")[[1]])
if (length(depths) < 2 || sum(depths) == 0) return(NA)
return(depths[2] / sum(depths))
})
return(data.frame(sample = sample_name, VAF = vaf_values))
}
# Extract VAF for all samples
vaf_data <- rbind(
extract_vaf(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
extract_vaf(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
extract_vaf(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)
vaf_data <- na.omit(vaf_data)
# Create violin plot with jitter
p <- ggplot(vaf_data, aes(x = sample, y = VAF, fill = sample)) +
geom_violin(alpha = 0.6) +
geom_jitter(width = 0.2, alpha = 0.4, size = 1) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
labs(
title = "Variant Allele Frequency Distribution",
subtitle = "Red line indicates clonal threshold (VAF = 0.5)",
x = "Sample",
y = "Variant Allele Frequency (VAF)"
) +
theme_bw() +
theme(legend.position = "none")
ggsave(file.path(VIS_DIR, "VAF_distribution.pdf"), p, width = 10, height = 6)
ggsave(file.path(VIS_DIR, "VAF_distribution.png"), p, width = 10, height = 6)
print(p)

Circos Plot for
CNV
#!/bin/bash
source /home/bioinfo/miniconda3/etc/profile.d/conda.sh
conda activate wes_env
BASE_DIR="/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1"
CNV_DIR="$BASE_DIR/hg19_analysis/cnv_results"
VIS_DIR="$BASE_DIR/hg19_analysis/visualizations"
echo "--- Generating Circos plots for each sample ---"
for SAMPLE in T1 X1 XX1; do
cnvkit.py diagram \
"$CNV_DIR/$SAMPLE/${SAMPLE}.with_rg.cnr" \
-s "$CNV_DIR/$SAMPLE/${SAMPLE}.with_rg.cns" \
-o "$VIS_DIR/${SAMPLE}_circos.pdf"
done
echo "Circos plots saved to $VIS_DIR"
Mutational Signature
Analysis
library(deconstructSigs)
library(vcfR)
library(BSgenome.Hsapiens.UCSC.hg19)
library(ggplot2)
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"
# Convert VCF to deconstructSigs format
vcf_to_sigs_input <- function(vcf_file, sample_name) {
message(paste("Processing", sample_name, "..."))
vcf <- read.vcfR(vcf_file, verbose = FALSE)
# CORRECTED: Use getFILTER() instead of getFIXED()
pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
message(paste(" Found", nrow(pass_vcf@fix), "PASS variants"))
df <- data.frame(
Sample = sample_name,
chr = getCHROM(pass_vcf),
pos = getPOS(pass_vcf),
ref = getREF(pass_vcf),
alt = getALT(pass_vcf),
stringsAsFactors = FALSE
)
# Filter to only SNVs (single nucleotide variants)
df <- df[nchar(df$ref) == 1 & nchar(df$alt) == 1, ]
message(paste(" Kept", nrow(df), "SNVs for signature analysis"))
return(df)
}
# Load all mutations
message("Loading mutations from VCF files...")
all_muts <- rbind(
vcf_to_sigs_input(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
vcf_to_sigs_input(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
vcf_to_sigs_input(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)
message(paste("Total mutations:", nrow(all_muts)))
# Convert to trinucleotide context
message("Computing trinucleotide contexts from hg19 reference...")
sigs_input <- mut.to.sigs.input(
mut.ref = all_muts,
sample.id = "Sample",
chr = "chr",
pos = "pos",
ref = "ref",
alt = "alt",
bsg = BSgenome.Hsapiens.UCSC.hg19
)
message("Trinucleotide matrix dimensions:")
print(dim(sigs_input))
# Determine signatures for each sample
message("Performing signature decomposition...")
pdf(file.path(VIS_DIR, "Mutational_Signatures.pdf"), width = 12, height = 8)
for (sample in c("T1", "X1", "XX1")) {
message(paste(" Analyzing", sample))
output <- whichSignatures(
tumor.ref = sigs_input,
signatures.ref = signatures.cosmic,
sample.id = sample,
contexts.needed = TRUE,
tri.counts.method = "default"
)
plotSignatures(output, sub = paste("Sample:", sample))
makePie(output, sub = paste("Sample:", sample))
}
dev.off()
message("✓ Mutational signature analysis complete!")
message(paste("Results saved to:", file.path(VIS_DIR, "Mutational_Signatures.pdf")))
Ti/Tv Ratio
Analysis
library(vcfR)
library(ggplot2)
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"
calculate_titv <- function(vcf_file, sample_name) {
vcf <- read.vcfR(vcf_file, verbose = FALSE)
pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
ref <- getREF(pass_vcf)
alt <- getALT(pass_vcf)
# Only keep SNVs
snv_idx <- nchar(ref) == 1 & nchar(alt) == 1
ref <- ref[snv_idx]
alt <- alt[snv_idx]
# Classify transitions and transversions
transitions <- c("AG", "GA", "CT", "TC")
transversions <- c("AC", "CA", "AT", "TA", "GC", "CG", "GT", "TG")
change <- paste0(ref, alt)
ti_count <- sum(change %in% transitions)
tv_count <- sum(change %in% transversions)
return(data.frame(
sample = sample_name,
Transitions = ti_count,
Transversions = tv_count,
TiTv_ratio = ti_count / tv_count
))
}
titv_data <- rbind(
calculate_titv(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
calculate_titv(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
calculate_titv(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)
# Plot
p <- ggplot(titv_data, aes(x = sample, y = TiTv_ratio, fill = sample)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 2, linetype = "dashed", color = "red") +
geom_text(aes(label = round(TiTv_ratio, 2)), vjust = -0.5) +
labs(
title = "Transition/Transversion Ratio",
subtitle = "Red line indicates expected WES ratio (~2.0)",
x = "Sample",
y = "Ti/Tv Ratio"
) +
theme_bw() +
theme(legend.position = "none")
print(p)
ggsave(file.path(VIS_DIR, "TiTv_ratio.pdf"), p, width = 8, height = 6)

Clonal Evolution Tree
(Phylogenetic)
library(vcfR)
library(ape)
library(phangorn)
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"
# Create binary mutation matrix (present=1, absent=0)
create_mutation_matrix <- function(vcf_files, sample_names) {
all_variants <- list()
for (i in seq_along(vcf_files)) {
vcf <- read.vcfR(vcf_files[i], verbose = FALSE)
pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
variants <- paste(getCHROM(pass_vcf), getPOS(pass_vcf), sep = ":")
all_variants[[sample_names[i]]] <- variants
}
# Get all unique variants
unique_variants <- unique(unlist(all_variants))
# Create binary matrix
mat <- matrix(0, nrow = length(sample_names), ncol = length(unique_variants))
rownames(mat) <- sample_names
colnames(mat) <- unique_variants
for (i in seq_along(sample_names)) {
mat[i, colnames(mat) %in% all_variants[[sample_names[i]]]] <- 1
}
return(mat)
}
# Create matrix
vcf_files <- c(
file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"),
file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"),
file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz")
)
mutation_matrix <- create_mutation_matrix(vcf_files, c("T1", "X1", "XX1"))
# Calculate distance and build tree
dist_matrix <- dist(mutation_matrix, method = "binary")
tree <- nj(dist_matrix)
# Root the tree with Normal (N1 as outgroup conceptually)
# Plot
pdf(file.path(VIS_DIR, "Clonal_Evolution_Tree.pdf"), width = 10, height = 8)
plot(tree, type = "phylogram", edge.width = 2,
main = "Clonal Evolution: T1 → X1 → XX1",
cex = 1.5)
add.scale.bar()
dev.off()
tree
library(vcfR)
library(ape)
library(phangorn)
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"
# Create binary mutation matrix (present=1, absent=0)
create_mutation_matrix <- function(vcf_files, sample_names) {
all_variants <- list()
message("Loading variants from VCF files...")
for (i in seq_along(vcf_files)) {
vcf <- read.vcfR(vcf_files[i], verbose = FALSE)
pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
# Use chr:pos:ref:alt as unique identifier
variants <- paste(getCHROM(pass_vcf), getPOS(pass_vcf),
getREF(pass_vcf), getALT(pass_vcf), sep = ":")
all_variants[[sample_names[i]]] <- variants
message(paste(" ", sample_names[i], ":", length(variants), "somatic mutations"))
}
# Get all unique variants across all samples
unique_variants <- unique(unlist(all_variants))
message(paste("Total unique somatic variants:", length(unique_variants)))
# Create binary matrix
mat <- matrix(0, nrow = length(sample_names), ncol = length(unique_variants))
rownames(mat) <- sample_names
colnames(mat) <- unique_variants
for (i in seq_along(sample_names)) {
mat[i, colnames(mat) %in% all_variants[[sample_names[i]]]] <- 1
}
return(list(matrix = mat, variants = all_variants))
}
# Load mutation data
vcf_files <- c(
file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"),
file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"),
file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz")
)
sample_names <- c("T1", "X1", "XX1")
result <- create_mutation_matrix(vcf_files, sample_names)
mutation_matrix <- result$matrix
all_variants <- result$variants
# Add N1 (Normal) as all zeros - representing germline state (no somatic mutations)
mutation_matrix_with_normal <- rbind(
N1 = rep(0, ncol(mutation_matrix)), # N1 has ZERO somatic mutations by definition
mutation_matrix
)
message("\nMutation matrix summary:")
print(rowSums(mutation_matrix_with_normal))
# Calculate shared mutations
shared_T1_X1 <- sum(mutation_matrix["T1", ] == 1 & mutation_matrix["X1", ] == 1)
shared_X1_XX1 <- sum(mutation_matrix["X1", ] == 1 & mutation_matrix["XX1", ] == 1)
shared_T1_XX1 <- sum(mutation_matrix["T1", ] == 1 & mutation_matrix["XX1", ] == 1)
shared_all <- sum(mutation_matrix["T1", ] == 1 &
mutation_matrix["X1", ] == 1 &
mutation_matrix["XX1", ] == 1)
message("\nShared mutations analysis:")
message(paste(" T1 ∩ X1:", shared_T1_X1))
message(paste(" X1 ∩ XX1:", shared_X1_XX1))
message(paste(" T1 ∩ XX1:", shared_T1_XX1))
message(paste(" All three (T1 ∩ X1 ∩ XX1):", shared_all, "- TRUNK mutations"))
# Private mutations
private_T1 <- sum(mutation_matrix["T1", ] == 1 &
mutation_matrix["X1", ] == 0 &
mutation_matrix["XX1", ] == 0)
private_X1 <- sum(mutation_matrix["X1", ] == 1 &
mutation_matrix["T1", ] == 0 &
mutation_matrix["XX1", ] == 0)
private_XX1 <- sum(mutation_matrix["XX1", ] == 1 &
mutation_matrix["T1", ] == 0 &
mutation_matrix["X1", ] == 0)
message("\nPrivate mutations:")
message(paste(" T1 only:", private_T1))
message(paste(" X1 only:", private_X1))
message(paste(" XX1 only:", private_XX1))
# Calculate distance matrix
dist_matrix <- dist(mutation_matrix_with_normal, method = "binary")
# Build neighbor-joining tree
tree <- nj(dist_matrix)
# Root at N1
rooted_tree <- root(tree, outgroup = "N1", resolve.root = TRUE)
# Create better plot
pdf(file.path(VIS_DIR, "Clonal_Evolution_Tree_Rooted.pdf"), width = 12, height = 10)
par(mar = c(5, 4, 4, 2) + 0.1)
plot(rooted_tree,
type = "phylogram",
direction = "rightwards",
edge.width = 4,
cex = 1.8,
font = 2,
tip.color = c("black", "red", "blue", "darkgreen"),
edge.color = "grey30",
main = "Clonal Evolution: N1 (Germline) → T1 → X1 → XX1",
cex.main = 1.5)
# Add node point at root
nodelabels(text = "Root",
node = length(rooted_tree$tip.label) + 1,
frame = "circle",
bg = "lightblue",
cex = 1.2,
font = 2)
# Add scale bar
add.scale.bar(x = 0, y = 1, cex = 1.2, lwd = 2)
mtext("Genetic Distance (Binary)", side = 1, line = 3, cex = 0.9)
# Add legend with mutation counts
# Change legend position to bottom right
legend("bottomright",
legend = c(
paste0("N1 (Normal): 0 somatic"),
paste0("T1 (Primary): ", sum(mutation_matrix["T1", ]), " somatic"),
paste0("X1 (In-vivo): ", sum(mutation_matrix["X1", ]), " somatic"),
paste0("XX1 (Secondary): ", sum(mutation_matrix["XX1", ]), " somatic")
),
col = c("black", "red", "blue", "darkgreen"),
pch = 19,
cex = 1.1,
bty = "n",
pt.cex = 2)
# Add text box with shared mutation info
text(x = max(rooted_tree$edge[,2]) * 0.6,
y = 0.5,
labels = paste0(
"Trunk mutations (all 3): ", shared_all, "\n",
"T1-X1 shared: ", shared_T1_X1, "\n",
"X1-XX1 shared: ", shared_X1_XX1
),
cex = 1,
adj = 0,
font = 2,
col = "darkred")
dev.off()
message("\n✓ Rooted phylogenetic tree created successfully!")
message(paste("Output:", file.path(VIS_DIR, "Clonal_Evolution_Tree_Rooted.pdf")))
# Print tree structure
print(rooted_tree)
Pathway Enrichment
Bubble Plot

CNV-Mutation
Integration Plot

Python Alternative for
Custom Multi-Panel Plot
library(ggplot2)
library(dplyr)
library(patchwork)
library(vcfR)
BASE_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1"
CNV_DIR <- paste0(BASE_DIR, "/hg19_analysis/cnv_results")
VCF_DIR <- paste0(BASE_DIR, "/hg19_analysis/somatic_variants")
VIS_DIR <- paste0(BASE_DIR, "/hg19_analysis/visualizations")
create_comprehensive_plot <- function(sample_id) {
# Load CNV data
cnr_file <- file.path(CNV_DIR, sample_id, paste0(sample_id, ".with_rg.cnr"))
cns_file <- file.path(CNV_DIR, sample_id, paste0(sample_id, ".with_rg.cns"))
cnr_data <- read.delim(cnr_file, header = TRUE)
cns_data <- read.delim(cns_file, header = TRUE)
# Prepare chromosome order
cnr_data$chromosome <- factor(cnr_data$chromosome,
levels = paste0("chr", c(1:22, "X", "Y")))
cns_data$chromosome <- factor(cns_data$chromosome,
levels = paste0("chr", c(1:22, "X", "Y")))
# Panel 1: Coverage depth
p1 <- ggplot(cnr_data, aes(x = start, y = depth, color = chromosome)) +
geom_point(size = 0.3, alpha = 0.5) +
facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
labs(title = paste(sample_id, "- Coverage Depth"), y = "Coverage") +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none",
panel.spacing = unit(0.1, "lines")
)
# Panel 2: VAF (from VCF)
vcf_file <- file.path(VCF_DIR, paste0(sample_id, ".hg19.filtered.vcf.gz"))
vcf <- read.vcfR(vcf_file, verbose = FALSE)
# Extract VAF
ad <- extract.gt(vcf, element = "AD")
vaf <- sapply(ad[, sample_id], function(x) {
if (is.na(x)) return(NA)
depths <- as.numeric(strsplit(x, ",")[[1]])
if (length(depths) < 2) return(NA)
return(depths[2] / sum(depths))
})
vaf_data <- data.frame(
chromosome = factor(getCHROM(vcf), levels = paste0("chr", c(1:22, "X", "Y"))),
position = getPOS(vcf),
VAF = vaf
)
p2 <- ggplot(vaf_data, aes(x = position, y = VAF, color = chromosome)) +
geom_point(size = 0.5, alpha = 0.6) +
facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
labs(y = "VAF") +
ylim(0, 1) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none",
panel.spacing = unit(0.1, "lines")
)
# Panel 3: Copy ratio (log2)
p3 <- ggplot(cnr_data, aes(x = start, y = log2, color = chromosome)) +
geom_point(size = 0.3, alpha = 0.3) +
geom_segment(data = cns_data,
aes(x = start, xend = end, y = log2, yend = log2),
color = "red", linewidth = 1) +
facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
labs(y = "Copy ratio (log2)") +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "none",
panel.spacing = unit(0.1, "lines")
)
# Panel 4: Tumor fraction (from segments)
# Calculate estimated tumor fraction per segment
cns_data$tumor_fraction <- pmin(abs(cns_data$log2) * 0.5, 1)
p4 <- ggplot(cns_data, aes(x = start, xend = end, y = tumor_fraction, yend = tumor_fraction)) +
geom_segment(aes(color = chromosome), linewidth = 2) +
facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
labs(y = "Tumor fraction", x = "Chromosome") +
ylim(0, 1) +
theme_minimal() +
theme(
legend.position = "none",
panel.spacing = unit(0.1, "lines")
)
# Combine all panels
combined_plot <- p1 / p2 / p3 / p4 +
plot_annotation(
title = paste("Comprehensive Genomic Profile -", sample_id),
theme = theme(plot.title = element_text(size = 16, face = "bold"))
)
# Save
ggsave(
file.path(VIS_DIR, paste0(sample_id, "_comprehensive_profile.pdf")),
combined_plot,
width = 16,
height = 12
)
return(combined_plot)
}
# Generate for all samples
for (sample in c("T1", "X1", "XX1")) {
print(create_comprehensive_plot(sample))
}



NA
NA
---
title: "WES Analysis for Patient 1 CNV Visualizations and Somatic Mutations"
author: "Nasir Mahmood Abbasi"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---

# Introduction

**Sample Information:**
*   **N1:** Normal (Constitutional DNA)
*   **T1:** Primary Tumor (Fresh Sézary Cells)
*   **X1:** In-vivo derived Cell Line (from Mouse)
*   **XX1:** Secondary Cell Line (cultured from X1)


```{r setup, include=FALSE}
# Set default options for code chunks
knitr::opts_chunk$set(
  echo = TRUE,
  eval = FALSE, # IMPORTANT: Set to FALSE to prevent accidental execution.
  engine = "bash"
)
```


# Variant Allele Frequency (VAF) Distribution Plot

```{r VAF, echo=TRUE, message=FALSE, warning=FALSE, eval=TRUE}
library(vcfR)
library(ggplot2)
library(dplyr)
library(tidyr)

# Define paths
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

extract_vaf <- function(vcf_file, sample_name) {
  vcf <- read.vcfR(vcf_file, verbose = FALSE)
  pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
  
  gt_info <- extract.gt(pass_vcf)
  ad <- extract.gt(pass_vcf, element = "AD")
  
  vaf_values <- sapply(ad[, sample_name], function(x) {
    if (is.na(x)) return(NA)
    depths <- as.numeric(strsplit(x, ",")[[1]])
    if (length(depths) < 2 || sum(depths) == 0) return(NA)
    return(depths[2] / sum(depths))
  })
  
  return(data.frame(sample = sample_name, VAF = vaf_values))
}

# Extract VAF for all samples
vaf_data <- rbind(
  extract_vaf(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
  extract_vaf(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
  extract_vaf(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)

vaf_data <- na.omit(vaf_data)

# Create violin plot with jitter
p <- ggplot(vaf_data, aes(x = sample, y = VAF, fill = sample)) +
  geom_violin(alpha = 0.6) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 1) +
  geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  labs(
    title = "Variant Allele Frequency Distribution",
    subtitle = "Red line indicates clonal threshold (VAF = 0.5)",
    x = "Sample",
    y = "Variant Allele Frequency (VAF)"
  ) +
  theme_bw() +
  theme(legend.position = "none")

ggsave(file.path(VIS_DIR, "VAF_distribution.pdf"), p, width = 10, height = 6)
ggsave(file.path(VIS_DIR, "VAF_distribution.png"), p, width = 10, height = 6)


print(p)
```

# Circos Plot for CNV

```{bash, chunk_name="Circos_plot_cnv"}
#!/bin/bash
source /home/bioinfo/miniconda3/etc/profile.d/conda.sh
conda activate wes_env

BASE_DIR="/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1"
CNV_DIR="$BASE_DIR/hg19_analysis/cnv_results"
VIS_DIR="$BASE_DIR/hg19_analysis/visualizations"

echo "--- Generating Circos plots for each sample ---"

for SAMPLE in T1 X1 XX1; do
  cnvkit.py diagram \
    "$CNV_DIR/$SAMPLE/${SAMPLE}.with_rg.cnr" \
    -s "$CNV_DIR/$SAMPLE/${SAMPLE}.with_rg.cns" \
    -o "$VIS_DIR/${SAMPLE}_circos.pdf"
done

echo "Circos plots saved to $VIS_DIR"

```

# Mutational Signature Analysis

```{r, chunk_name="Mutational_Signature"}
library(deconstructSigs)
library(vcfR)
library(BSgenome.Hsapiens.UCSC.hg19)
library(ggplot2)

VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

# Convert VCF to deconstructSigs format
vcf_to_sigs_input <- function(vcf_file, sample_name) {
  message(paste("Processing", sample_name, "..."))
  vcf <- read.vcfR(vcf_file, verbose = FALSE)
  
  # CORRECTED: Use getFILTER() instead of getFIXED()
  pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
  
  message(paste("  Found", nrow(pass_vcf@fix), "PASS variants"))
  
  df <- data.frame(
    Sample = sample_name,
    chr = getCHROM(pass_vcf),
    pos = getPOS(pass_vcf),
    ref = getREF(pass_vcf),
    alt = getALT(pass_vcf),
    stringsAsFactors = FALSE
  )
  
  # Filter to only SNVs (single nucleotide variants)
  df <- df[nchar(df$ref) == 1 & nchar(df$alt) == 1, ]
  message(paste("  Kept", nrow(df), "SNVs for signature analysis"))
  
  return(df)
}

# Load all mutations
message("Loading mutations from VCF files...")
all_muts <- rbind(
  vcf_to_sigs_input(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
  vcf_to_sigs_input(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
  vcf_to_sigs_input(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)

message(paste("Total mutations:", nrow(all_muts)))

# Convert to trinucleotide context
message("Computing trinucleotide contexts from hg19 reference...")
sigs_input <- mut.to.sigs.input(
  mut.ref = all_muts,
  sample.id = "Sample",
  chr = "chr",
  pos = "pos",
  ref = "ref",
  alt = "alt",
  bsg = BSgenome.Hsapiens.UCSC.hg19
)

message("Trinucleotide matrix dimensions:")
print(dim(sigs_input))

# Determine signatures for each sample
message("Performing signature decomposition...")
pdf(file.path(VIS_DIR, "Mutational_Signatures.pdf"), width = 12, height = 8)

for (sample in c("T1", "X1", "XX1")) {
  message(paste("  Analyzing", sample))
  
  output <- whichSignatures(
    tumor.ref = sigs_input,
    signatures.ref = signatures.cosmic,
    sample.id = sample,
    contexts.needed = TRUE,
    tri.counts.method = "default"
  )
  
  plotSignatures(output, sub = paste("Sample:", sample))
  makePie(output, sub = paste("Sample:", sample))
}

dev.off()

message("✓ Mutational signature analysis complete!")
message(paste("Results saved to:", file.path(VIS_DIR, "Mutational_Signatures.pdf")))

```



# Ti/Tv Ratio Analysis

```{r, chunk_name="Ti/Tv", eval=TRUE}

library(vcfR)
library(ggplot2)

VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

calculate_titv <- function(vcf_file, sample_name) {
  vcf <- read.vcfR(vcf_file, verbose = FALSE)
  pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
  
  ref <- getREF(pass_vcf)
  alt <- getALT(pass_vcf)
  
  # Only keep SNVs
  snv_idx <- nchar(ref) == 1 & nchar(alt) == 1
  ref <- ref[snv_idx]
  alt <- alt[snv_idx]
  
  # Classify transitions and transversions
  transitions <- c("AG", "GA", "CT", "TC")
  transversions <- c("AC", "CA", "AT", "TA", "GC", "CG", "GT", "TG")
  
  change <- paste0(ref, alt)
  ti_count <- sum(change %in% transitions)
  tv_count <- sum(change %in% transversions)
  
  return(data.frame(
    sample = sample_name,
    Transitions = ti_count,
    Transversions = tv_count,
    TiTv_ratio = ti_count / tv_count
  ))
}

titv_data <- rbind(
  calculate_titv(file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"), "T1"),
  calculate_titv(file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"), "X1"),
  calculate_titv(file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz"), "XX1")
)

# Plot
p <- ggplot(titv_data, aes(x = sample, y = TiTv_ratio, fill = sample)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 2, linetype = "dashed", color = "red") +
  geom_text(aes(label = round(TiTv_ratio, 2)), vjust = -0.5) +
  labs(
    title = "Transition/Transversion Ratio",
    subtitle = "Red line indicates expected WES ratio (~2.0)",
    x = "Sample",
    y = "Ti/Tv Ratio"
  ) +
  theme_bw() +
  theme(legend.position = "none")

print(p)
ggsave(file.path(VIS_DIR, "TiTv_ratio.pdf"), p, width = 8, height = 6)
```




# Clonal Evolution Tree (Phylogenetic)

```{r, chunk_name="Tree"}

library(vcfR)
library(ape)
library(phangorn)

VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

# Create binary mutation matrix (present=1, absent=0)
create_mutation_matrix <- function(vcf_files, sample_names) {
  all_variants <- list()
  
  for (i in seq_along(vcf_files)) {
    vcf <- read.vcfR(vcf_files[i], verbose = FALSE)
    pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
    
    variants <- paste(getCHROM(pass_vcf), getPOS(pass_vcf), sep = ":")
    all_variants[[sample_names[i]]] <- variants
  }
  
  # Get all unique variants
  unique_variants <- unique(unlist(all_variants))
  
  # Create binary matrix
  mat <- matrix(0, nrow = length(sample_names), ncol = length(unique_variants))
  rownames(mat) <- sample_names
  colnames(mat) <- unique_variants
  
  for (i in seq_along(sample_names)) {
    mat[i, colnames(mat) %in% all_variants[[sample_names[i]]]] <- 1
  }
  
  return(mat)
}

# Create matrix
vcf_files <- c(
  file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"),
  file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"),
  file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz")
)

mutation_matrix <- create_mutation_matrix(vcf_files, c("T1", "X1", "XX1"))

# Calculate distance and build tree
dist_matrix <- dist(mutation_matrix, method = "binary")
tree <- nj(dist_matrix)

# Root the tree with Normal (N1 as outgroup conceptually)
# Plot
pdf(file.path(VIS_DIR, "Clonal_Evolution_Tree.pdf"), width = 10, height = 8)
plot(tree, type = "phylogram", edge.width = 2, 
     main = "Clonal Evolution: T1 → X1 → XX1",
     cex = 1.5)
add.scale.bar()
dev.off()

tree
```

```{r, chunk_name="Tree"}
library(vcfR)
library(ape)
library(phangorn)

VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

# Create binary mutation matrix (present=1, absent=0)
create_mutation_matrix <- function(vcf_files, sample_names) {
  all_variants <- list()
  
  message("Loading variants from VCF files...")
  for (i in seq_along(vcf_files)) {
    vcf <- read.vcfR(vcf_files[i], verbose = FALSE)
    pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
    
    # Use chr:pos:ref:alt as unique identifier
    variants <- paste(getCHROM(pass_vcf), getPOS(pass_vcf), 
                     getREF(pass_vcf), getALT(pass_vcf), sep = ":")
    all_variants[[sample_names[i]]] <- variants
    message(paste("  ", sample_names[i], ":", length(variants), "somatic mutations"))
  }
  
  # Get all unique variants across all samples
  unique_variants <- unique(unlist(all_variants))
  message(paste("Total unique somatic variants:", length(unique_variants)))
  
  # Create binary matrix
  mat <- matrix(0, nrow = length(sample_names), ncol = length(unique_variants))
  rownames(mat) <- sample_names
  colnames(mat) <- unique_variants
  
  for (i in seq_along(sample_names)) {
    mat[i, colnames(mat) %in% all_variants[[sample_names[i]]]] <- 1
  }
  
  return(list(matrix = mat, variants = all_variants))
}

# Load mutation data
vcf_files <- c(
  file.path(VCF_DIR, "T1.hg19.filtered.vcf.gz"),
  file.path(VCF_DIR, "X1.hg19.filtered.vcf.gz"),
  file.path(VCF_DIR, "XX1.hg19.filtered.vcf.gz")
)

sample_names <- c("T1", "X1", "XX1")
result <- create_mutation_matrix(vcf_files, sample_names)
mutation_matrix <- result$matrix
all_variants <- result$variants

# Add N1 (Normal) as all zeros - representing germline state (no somatic mutations)
mutation_matrix_with_normal <- rbind(
  N1 = rep(0, ncol(mutation_matrix)),  # N1 has ZERO somatic mutations by definition
  mutation_matrix
)

message("\nMutation matrix summary:")
print(rowSums(mutation_matrix_with_normal))

# Calculate shared mutations
shared_T1_X1 <- sum(mutation_matrix["T1", ] == 1 & mutation_matrix["X1", ] == 1)
shared_X1_XX1 <- sum(mutation_matrix["X1", ] == 1 & mutation_matrix["XX1", ] == 1)
shared_T1_XX1 <- sum(mutation_matrix["T1", ] == 1 & mutation_matrix["XX1", ] == 1)
shared_all <- sum(mutation_matrix["T1", ] == 1 & 
                  mutation_matrix["X1", ] == 1 & 
                  mutation_matrix["XX1", ] == 1)

message("\nShared mutations analysis:")
message(paste("  T1 ∩ X1:", shared_T1_X1))
message(paste("  X1 ∩ XX1:", shared_X1_XX1))
message(paste("  T1 ∩ XX1:", shared_T1_XX1))
message(paste("  All three (T1 ∩ X1 ∩ XX1):", shared_all, "- TRUNK mutations"))

# Private mutations
private_T1 <- sum(mutation_matrix["T1", ] == 1 & 
                  mutation_matrix["X1", ] == 0 & 
                  mutation_matrix["XX1", ] == 0)
private_X1 <- sum(mutation_matrix["X1", ] == 1 & 
                  mutation_matrix["T1", ] == 0 & 
                  mutation_matrix["XX1", ] == 0)
private_XX1 <- sum(mutation_matrix["XX1", ] == 1 & 
                   mutation_matrix["T1", ] == 0 & 
                   mutation_matrix["X1", ] == 0)

message("\nPrivate mutations:")
message(paste("  T1 only:", private_T1))
message(paste("  X1 only:", private_X1))
message(paste("  XX1 only:", private_XX1))

# Calculate distance matrix
dist_matrix <- dist(mutation_matrix_with_normal, method = "binary")

# Build neighbor-joining tree
tree <- nj(dist_matrix)

# Root at N1
rooted_tree <- root(tree, outgroup = "N1", resolve.root = TRUE)

# Create better plot
pdf(file.path(VIS_DIR, "Clonal_Evolution_Tree_Rooted.pdf"), width = 12, height = 10)

par(mar = c(5, 4, 4, 2) + 0.1)

plot(rooted_tree, 
     type = "phylogram",
     direction = "rightwards",
     edge.width = 4,
     cex = 1.8,
     font = 2,
     tip.color = c("black", "red", "blue", "darkgreen"),
     edge.color = "grey30",
     main = "Clonal Evolution: N1 (Germline) → T1 → X1 → XX1",
     cex.main = 1.5)

# Add node point at root
nodelabels(text = "Root", 
          node = length(rooted_tree$tip.label) + 1,
          frame = "circle",
          bg = "lightblue",
          cex = 1.2,
          font = 2)

# Add scale bar
add.scale.bar(x = 0, y = 1, cex = 1.2, lwd = 2)
mtext("Genetic Distance (Binary)", side = 1, line = 3, cex = 0.9)

# Add legend with mutation counts
# Change legend position to bottom right
legend("bottomright",
       legend = c(
         paste0("N1 (Normal): 0 somatic"),
         paste0("T1 (Primary): ", sum(mutation_matrix["T1", ]), " somatic"),
         paste0("X1 (In-vivo): ", sum(mutation_matrix["X1", ]), " somatic"),
         paste0("XX1 (Secondary): ", sum(mutation_matrix["XX1", ]), " somatic")
       ),
       col = c("black", "red", "blue", "darkgreen"),
       pch = 19,
       cex = 1.1,
       bty = "n",
       pt.cex = 2)

# Add text box with shared mutation info
text(x = max(rooted_tree$edge[,2]) * 0.6, 
     y = 0.5,
     labels = paste0(
       "Trunk mutations (all 3): ", shared_all, "\n",
       "T1-X1 shared: ", shared_T1_X1, "\n",
       "X1-XX1 shared: ", shared_X1_XX1
     ),
     cex = 1,
     adj = 0,
     font = 2,
     col = "darkred")

dev.off()

message("\n✓ Rooted phylogenetic tree created successfully!")
message(paste("Output:", file.path(VIS_DIR, "Clonal_Evolution_Tree_Rooted.pdf")))

# Print tree structure
print(rooted_tree)


```


# Pathway Enrichment Bubble Plot

```{r, chunk_name="Pathway_Enrichment", fig.width=10, fig.height=8, eval=TRUE}
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)

VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

# Use the mutated genes from your OncoPrint analysis
# Extract all mutated genes across samples
library(vcfR)

extract_genes <- function(vcf_file) {
  vcf <- read.vcfR(vcf_file, verbose = FALSE)
  pass_vcf <- vcf[getFILTER(vcf) == "PASS", ]
  
  csq <- extract.info(pass_vcf, "CSQ")
  genes <- unique(sapply(csq, function(x) {
    if (is.na(x)) return(NA)
    strsplit(strsplit(x, ",")[[1]][1], "\\|")[[1]][4]
  }))
  
  return(genes[!is.na(genes) & genes != ""])
}

all_genes <- unique(c(
  extract_genes(file.path(VCF_DIR, "T1.hg19.annotated.vcf")),
  extract_genes(file.path(VCF_DIR, "X1.hg19.annotated.vcf")),
  extract_genes(file.path(VCF_DIR, "XX1.hg19.annotated.vcf"))
))

# Convert gene symbols to Entrez IDs
gene_entrez <- bitr(all_genes, fromType = "SYMBOL", 
                    toType = "ENTREZID", 
                    OrgDb = org.Hs.eg.db)

# KEGG pathway enrichment
kegg_enrich <- enrichKEGG(gene = gene_entrez$ENTREZID,
                          organism = 'hsa',
                          pvalueCutoff = 0.05)

# Plot
p <- dotplot(kegg_enrich, showCategory = 20) +
  ggtitle("KEGG Pathway Enrichment of Mutated Genes")

ggsave(file.path(VIS_DIR, "Pathway_Enrichment.pdf"), p, width = 12, height = 8)

p
```



# CNV-Mutation Integration Plot

```{r, chunk_name="CNV-Mutation", fig.width=10, fig.height=8, eval=TRUE}
library(ggplot2)
library(dplyr)
library(patchwork)

LIFTOVER_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/liftover_results"
VCF_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/somatic_variants"
VIS_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1/hg19_analysis/visualizations"

# This script overlays high-impact mutations on top of CNV tracks
# showing which genes have BOTH CNV alterations AND somatic mutations

# Load CNV data (already created in your script)
samples <- c("T1", "X1", "XX1")
all_cns_data <- list()

for (sample_id in samples) {
  file_path <- file.path(LIFTOVER_DIR, paste0(sample_id, ".hg38.cns"))
  cns_data <- read.delim(file_path, header = TRUE, sep = "\t") %>%
    mutate(sample = sample_id)
  all_cns_data[[sample_id]] <- cns_data
}

combined_cnv <- bind_rows(all_cns_data)
combined_cnv$chromosome <- factor(combined_cnv$chromosome, 
                                  levels = paste0("chr", c(1:22, "X", "Y")))

# Extract mutation positions
# (Reuse your parse function from OncoPrint)
# For simplicity, marking only high-impact genes

high_impact_genes <- c("TP53", "NOTCH1", "CDKN2A", "PTEN", "MYC")  # Add your top genes

p_cnv_mut <- ggplot(combined_cnv, aes(x = start / 1e6, y = log2)) +
  geom_point(aes(color = log2), size = 0.5, alpha = 0.7) +
  facet_grid(sample ~ chromosome, scales = "free_x", space = "free_x") +
  scale_color_gradient2(low = "blue", mid = "grey", high = "red", midpoint = 0) +
  labs(
    title = "Integrated CNV and High-Impact Mutations",
    x = "Genomic Position (Mb)",
    y = "Copy Number (log2)"
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    strip.text.x = element_text(angle = 90, size = 7),
    legend.position = "bottom"
  )

ggsave(file.path(VIS_DIR, "CNV_Mutation_Integration.pdf"), 
       p_cnv_mut, width = 14, height = 10)
p_cnv_mut
```



# Python Alternative for Custom Multi-Panel Plot

```{r, chunk_name="Multi-Panel-Plot", fig.width=16, fig.height=12, eval=TRUE}
library(ggplot2)
library(dplyr)
library(patchwork)
library(vcfR)

BASE_DIR <- "/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/15-WES_Patient1_L1_L2_Analysis-19-11-2025/Exome_Patient_1"
CNV_DIR <- paste0(BASE_DIR, "/hg19_analysis/cnv_results")
VCF_DIR <- paste0(BASE_DIR, "/hg19_analysis/somatic_variants")
VIS_DIR <- paste0(BASE_DIR, "/hg19_analysis/visualizations")

create_comprehensive_plot <- function(sample_id) {
  # Load CNV data
  cnr_file <- file.path(CNV_DIR, sample_id, paste0(sample_id, ".with_rg.cnr"))
  cns_file <- file.path(CNV_DIR, sample_id, paste0(sample_id, ".with_rg.cns"))
  
  cnr_data <- read.delim(cnr_file, header = TRUE)
  cns_data <- read.delim(cns_file, header = TRUE)
  
  # Prepare chromosome order
  cnr_data$chromosome <- factor(cnr_data$chromosome, 
                                levels = paste0("chr", c(1:22, "X", "Y")))
  cns_data$chromosome <- factor(cns_data$chromosome, 
                                levels = paste0("chr", c(1:22, "X", "Y")))
  
  # Panel 1: Coverage depth
  p1 <- ggplot(cnr_data, aes(x = start, y = depth, color = chromosome)) +
    geom_point(size = 0.3, alpha = 0.5) +
    facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
    labs(title = paste(sample_id, "- Coverage Depth"), y = "Coverage") +
    theme_minimal() +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      legend.position = "none",
      panel.spacing = unit(0.1, "lines")
    )
  
  # Panel 2: VAF (from VCF)
  vcf_file <- file.path(VCF_DIR, paste0(sample_id, ".hg19.filtered.vcf.gz"))
  vcf <- read.vcfR(vcf_file, verbose = FALSE)
  
  # Extract VAF
  ad <- extract.gt(vcf, element = "AD")
  vaf <- sapply(ad[, sample_id], function(x) {
    if (is.na(x)) return(NA)
    depths <- as.numeric(strsplit(x, ",")[[1]])
    if (length(depths) < 2) return(NA)
    return(depths[2] / sum(depths))
  })
  
  vaf_data <- data.frame(
    chromosome = factor(getCHROM(vcf), levels = paste0("chr", c(1:22, "X", "Y"))),
    position = getPOS(vcf),
    VAF = vaf
  )
  
  p2 <- ggplot(vaf_data, aes(x = position, y = VAF, color = chromosome)) +
    geom_point(size = 0.5, alpha = 0.6) +
    facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
    labs(y = "VAF") +
    ylim(0, 1) +
    theme_minimal() +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      legend.position = "none",
      panel.spacing = unit(0.1, "lines")
    )
  
  # Panel 3: Copy ratio (log2)
  p3 <- ggplot(cnr_data, aes(x = start, y = log2, color = chromosome)) +
    geom_point(size = 0.3, alpha = 0.3) +
    geom_segment(data = cns_data, 
                 aes(x = start, xend = end, y = log2, yend = log2),
                 color = "red", linewidth = 1) +
    facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
    geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
    labs(y = "Copy ratio (log2)") +
    theme_minimal() +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      legend.position = "none",
      panel.spacing = unit(0.1, "lines")
    )
  
  # Panel 4: Tumor fraction (from segments)
  # Calculate estimated tumor fraction per segment
  cns_data$tumor_fraction <- pmin(abs(cns_data$log2) * 0.5, 1)
  
  p4 <- ggplot(cns_data, aes(x = start, xend = end, y = tumor_fraction, yend = tumor_fraction)) +
    geom_segment(aes(color = chromosome), linewidth = 2) +
    facet_grid(. ~ chromosome, scales = "free_x", space = "free_x") +
    labs(y = "Tumor fraction", x = "Chromosome") +
    ylim(0, 1) +
    theme_minimal() +
    theme(
      legend.position = "none",
      panel.spacing = unit(0.1, "lines")
    )
  
  # Combine all panels
  combined_plot <- p1 / p2 / p3 / p4 +
    plot_annotation(
      title = paste("Comprehensive Genomic Profile -", sample_id),
      theme = theme(plot.title = element_text(size = 16, face = "bold"))
    )
  
  # Save
  ggsave(
    file.path(VIS_DIR, paste0(sample_id, "_comprehensive_profile.pdf")),
    combined_plot,
    width = 16,
    height = 12
  )
  
  return(combined_plot)
}

# Generate for all samples
for (sample in c("T1", "X1", "XX1")) {
  print(create_comprehensive_plot(sample))
}
```








