1. load libraries
2. Data PREPERATION
# Read the inferCNV HMM .dat file
infercnv_data <- read.delim("../L7_PBMC_uphyloplot/HMM_CNV_predictions.HMMi6.leiden.hmm_mode-subclusters.Pnorm_0.5.pred_cnv_genes.dat", sep="\t", header=TRUE)
# Remove any rows with NA or infinite values
infercnv_data <- infercnv_data[complete.cases(infercnv_data), ]
infercnv_data <- infercnv_data[is.finite(infercnv_data$start) & is.finite(infercnv_data$end), ]
# Remove 'chr' prefix from chromosome names if present
infercnv_data$chr <- gsub("^chr", "", infercnv_data$chr)
# Display the first few rows and structure of the data
print(head(infercnv_data))
str(infercnv_data)
# Filter out rows with PBMC from the cell_group_name column
infercnv_data <- infercnv_data[!grepl("PBMC", infercnv_data$cell_group_name), ]
# Replace 'all_observations' with 'L7', keeping only the part after the last underscore
infercnv_data$cell_group_name <- gsub("all_observations\\.all_observations_", "L7_", infercnv_data$cell_group_name)
# Check the first few rows after filtering and renaming
print(head(infercnv_data))
3. Query Cytogenetic Bands-Script2
# Load required libraries
library(dplyr)
library(readr)
# Function to download and process cytogenetic band data from UCSC
get_cyto_bands <- function() {
url <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz"
cyto_bands <- read_tsv(url, col_names = c("chr", "start", "end", "band", "stain"))
cyto_bands$chr <- gsub("chr", "", cyto_bands$chr) # Remove 'chr' prefix from chromosome names
return(cyto_bands)
}
# Function to find the cytogenetic band for a given position
find_band <- function(chr, pos_start, pos_end, cyto_bands) {
bands <- cyto_bands %>%
filter(chr == !!chr,
(start >= pos_start & start <= pos_end) |
(end >= pos_start & end <= pos_end) |
(start <= pos_start & end >= pos_end))
if (nrow(bands) > 0) {
return(paste(unique(bands$band), collapse = ","))
} else {
return(NA)
}
}
# Download cytogenetic band data
cyto_bands <- get_cyto_bands()
# Apply the function to each row in infercnv_data
result <- infercnv_data %>%
rowwise() %>%
mutate(band = find_band(chr, start, end, cyto_bands)) %>%
ungroup()
# View the first few rows of the result
print(head(result))
# Save the result to a new CSV file
write.csv(result, "L7_cytogenetics/L7_infercnv_with_bands.csv", row.names = FALSE)
# Display summary of the final result
summary(result)
4. Checking Cytogenetic Data
# Load necessary libraries
library(dplyr)
library(readr)
# Load the inferCNV data (L7_infercnv_with_bands2.csv) containing chromosome and band information
infercnv_data <- read_csv("L7_cytogenetics/L7_infercnv_with_bands.csv")
# Create a new column combining chromosome number and band information (e.g., "1p36.33")
infercnv_data <- infercnv_data %>%
mutate(chr_band = paste(chr, band, sep=""))
# Define cytogenetic bands for patient3
patient3_bands <- c(
"1p36.1",
"2p21", "2p13.6",
"3q24", "3q25.1", "3q25.31", "3q25.33",
"3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29",
"4", "5",
"6p21.31", "6p21.2",
"8",
"9p21.3",
"10p14",
"12p13.2", "12p13.1", "12q21.33", "12q22",
"14q12", "14q21.1", "14q22.1",
"15q24.2",
"16q24.2", "16q24.3",
"17p13.3", "17p11.2", "17p12",
"17q22", "17q24.3", "17q25.3"
)
# Define cytogenetic bands for L7
L7_bands <- c(
"1p36.1",
"2p21", "2p13.6",
"2q33.1", "2q33.3",
"3q24", "3q25.1", "3q25.31", "3q25.33",
"3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29",
"4q25", "4qter",
"5",
"6p21.31", "6p21.2",
"8",
"9p21.3",
"10p14",
"12p13.2", "12p13.1", "12q21.33", "12q22",
"14q12", "14q21.1", "14q22.1",
"15q24.2", "15q26.3",
"16q24.2", "16q24.3",
"17p13.3", "17p11.2", "17p12",
"17q22", "17q24.3", "17q25.3"
)
# Function to compare bands
compare_bands <- function(bands_list, infercnv_bands) {
comparison <- data.frame(
Band = bands_list,
Present_in_infercnv = ifelse(bands_list %in% infercnv_bands, "Present", "Not Present")
)
return(comparison)
}
# Compare patient3 bands to L7_infercnv bands
patient3_comparison <- compare_bands(patient3_bands, infercnv_data$chr_band)
# Compare L7 bands to L7_infercnv bands
L7_comparison <- compare_bands(L7_bands, infercnv_data$chr_band)
# View the comparison results
print("patient3 Comparison:")
print(patient3_comparison)
print("L7 Comparison:")
print(L7_comparison)
# Save the comparison results to CSV files
write.csv(patient3_comparison, "L7_cytogenetics/patient3_vs_L7_infercnv_comparison.csv", row.names = FALSE)
write.csv(L7_comparison, "L7_cytogenetics/L7_vs_L7_infercnv_comparison.csv", row.names = FALSE)
5. Cytogenetic Data Present and percentage of cells having that
CNVs
# Load required libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(readr)
# Read the inferCNV results
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Create chr_band column in infercnv_data
infercnv_data$chr_band <- paste0(infercnv_data$chr, infercnv_data$band)
# Save the updated data frame with the new chr_band column
write.csv(infercnv_data, "L7_cytogenetics/L7_infercnv_with_chr_bands.csv", row.names = FALSE)
# Print the first few rows to confirm the new column
print(head(infercnv_data))
cat("Updated data frame with chr_band column has been saved as 'L7_infercnv_with_chr_bands.csv'\n")
Updated data frame with chr_band column has been saved as 'L7_infercnv_with_chr_bands.csv'
6. Cytogenetic Data Present and percentage of cells having that
CNVs
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands, focusing on chr_band and state
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2 Copies (3x)",
TRUE ~ "Unknown" # Fallback for unexpected states
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Print the summary for significant CNV analysis
print(significant_cnv_summary)
# Save the significant CNV summary to a CSV file
output_file <- "L1_cytogenetics/significant_cnv_summary.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant CNV summary saved to", output_file, "\n")
Significant CNV summary saved to L1_cytogenetics/significant_cnv_summary.csv
# Visualization of the percentage of cells affected by CNVs in different chromosome bands
ggplot(significant_cnv_summary, aes(x = reorder(chr_band, -percentage), y = percentage, fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Cells Affected by CNVs in Chromosome Bands",
x = "Chromosome Band",
y = "Percentage of Cells Affected (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank())

# Save the plot
ggsave("L1_cytogenetics/cnv_percentage_by_chrbands.png", width = 10, height = 6)
# Load required libraries
library(dplyr)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown" # Added a fallback for unexpected state values
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_90.csv
# Create a bar plot with percentage on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = chr_band, fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Significant CNVs Affecting >90% of Cells",
x = "Percentage of Cells",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal() +
theme(axis.text.y = element_text(angle = 0, hjust = 1))

NA
NA
L1.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L2.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L2_cytogenetics/L2_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 97) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L2_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L2_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L3.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L3_cytogenetics/L3_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 98) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L3_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L3_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L4.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L4_cytogenetics/L4_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 93) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L4_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L4_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L5.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L5_cytogenetics/L5_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L5_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)
L6.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L6_cytogenetics/L6_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L6_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)
L7.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L7_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L7_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L1<8%&>10.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage >8 & percentage <10) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_less_than_5%.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with <5% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with <5% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_less_than_5%.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting <5% of Cells",
subtitle = "Chromosomal Bands with CNVs in <5% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

---
title: "Cytogenetic Analysis"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---

# 1. load libraries
```{r setup, include=FALSE}
# Install and load required packages
if (!requireNamespace("biomaRt", quietly = TRUE)) install.packages("biomaRt")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
library(biomaRt)
library(dplyr)


```

# 2. Data PREPERATION  
```{r data1, fig.height=6, fig.width=12}


# Read the inferCNV HMM .dat file
infercnv_data <- read.delim("../L7_PBMC_uphyloplot/HMM_CNV_predictions.HMMi6.leiden.hmm_mode-subclusters.Pnorm_0.5.pred_cnv_genes.dat", sep="\t", header=TRUE)

# Remove any rows with NA or infinite values
infercnv_data <- infercnv_data[complete.cases(infercnv_data), ]
infercnv_data <- infercnv_data[is.finite(infercnv_data$start) & is.finite(infercnv_data$end), ]

# Remove 'chr' prefix from chromosome names if present
infercnv_data$chr <- gsub("^chr", "", infercnv_data$chr)

# Display the first few rows and structure of the data
print(head(infercnv_data))
str(infercnv_data)



# Filter out rows with PBMC from the cell_group_name column
infercnv_data <- infercnv_data[!grepl("PBMC", infercnv_data$cell_group_name), ]

# Replace 'all_observations' with 'L7', keeping only the part after the last underscore
infercnv_data$cell_group_name <- gsub("all_observations\\.all_observations_", "L7_", infercnv_data$cell_group_name)

# Check the first few rows after filtering and renaming
print(head(infercnv_data))



```


# 3. Query Cytogenetic Bands-Script2
```{r data3, fig.height=6, fig.width=10}

# Load required libraries
library(dplyr)
library(readr)

# Function to download and process cytogenetic band data from UCSC
get_cyto_bands <- function() {
  url <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz"
  cyto_bands <- read_tsv(url, col_names = c("chr", "start", "end", "band", "stain"))
  cyto_bands$chr <- gsub("chr", "", cyto_bands$chr)  # Remove 'chr' prefix from chromosome names
  return(cyto_bands)
}

# Function to find the cytogenetic band for a given position
find_band <- function(chr, pos_start, pos_end, cyto_bands) {
  bands <- cyto_bands %>%
    filter(chr == !!chr, 
           (start >= pos_start & start <= pos_end) | 
           (end >= pos_start & end <= pos_end) |
           (start <= pos_start & end >= pos_end))
  
  if (nrow(bands) > 0) {
    return(paste(unique(bands$band), collapse = ","))
  } else {
    return(NA)
  }
}

# Download cytogenetic band data
cyto_bands <- get_cyto_bands()

# Apply the function to each row in infercnv_data
result <- infercnv_data %>%
  rowwise() %>%
  mutate(band = find_band(chr, start, end, cyto_bands)) %>%
  ungroup()

# View the first few rows of the result
print(head(result))

# Save the result to a new CSV file
write.csv(result, "L7_cytogenetics/L7_infercnv_with_bands.csv", row.names = FALSE)

# Display summary of the final result
summary(result)

```


# 4. Checking Cytogenetic Data
```{r data4, fig.height=6, fig.width=10}
# Load necessary libraries
library(dplyr)
library(readr)

# Load the inferCNV data (L7_infercnv_with_bands2.csv) containing chromosome and band information
infercnv_data <- read_csv("L7_cytogenetics/L7_infercnv_with_bands.csv")

# Create a new column combining chromosome number and band information (e.g., "1p36.33")
infercnv_data <- infercnv_data %>%
  mutate(chr_band = paste(chr, band, sep=""))

# Define cytogenetic bands for patient3
patient3_bands <- c(
 "1p36.1", 
  "2p21", "2p13.6", 
  "3q24", "3q25.1", "3q25.31", "3q25.33", 
  "3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29", 
  "4", "5", 
  "6p21.31", "6p21.2", 
  "8", 
  "9p21.3", 
  "10p14", 
  "12p13.2", "12p13.1", "12q21.33", "12q22", 
  "14q12", "14q21.1", "14q22.1", 
  "15q24.2", 
  "16q24.2", "16q24.3", 
  "17p13.3", "17p11.2", "17p12", 
  "17q22", "17q24.3", "17q25.3"
)

# Define cytogenetic bands for L7
L7_bands <- c(
  "1p36.1", 
  "2p21", "2p13.6", 
  "2q33.1", "2q33.3", 
  "3q24", "3q25.1", "3q25.31", "3q25.33", 
  "3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29", 
  "4q25", "4qter", 
  "5", 
  "6p21.31", "6p21.2", 
  "8", 
  "9p21.3", 
  "10p14", 
  "12p13.2", "12p13.1", "12q21.33", "12q22", 
  "14q12", "14q21.1", "14q22.1", 
  "15q24.2", "15q26.3", 
  "16q24.2", "16q24.3", 
  "17p13.3", "17p11.2", "17p12", 
  "17q22", "17q24.3", "17q25.3"
)

# Function to compare bands
compare_bands <- function(bands_list, infercnv_bands) {
  comparison <- data.frame(
    Band = bands_list,
    Present_in_infercnv = ifelse(bands_list %in% infercnv_bands, "Present", "Not Present")
  )
  return(comparison)
}

# Compare patient3 bands to L7_infercnv bands
patient3_comparison <- compare_bands(patient3_bands, infercnv_data$chr_band)

# Compare L7 bands to L7_infercnv bands
L7_comparison <- compare_bands(L7_bands, infercnv_data$chr_band)

# View the comparison results
print("patient3 Comparison:")
print(patient3_comparison)

print("L7 Comparison:")
print(L7_comparison)

# Save the comparison results to CSV files
write.csv(patient3_comparison, "L7_cytogenetics/patient3_vs_L7_infercnv_comparison.csv", row.names = FALSE)
write.csv(L7_comparison, "L7_cytogenetics/L7_vs_L7_infercnv_comparison.csv", row.names = FALSE)


```

# 5. Cytogenetic Data Present and percentage of cells having that CNVs
```{r data5, fig.height=8, fig.width=12}

# Load required libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(readr)

# Read the inferCNV results
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Create chr_band column in infercnv_data
infercnv_data$chr_band <- paste0(infercnv_data$chr, infercnv_data$band)

# Save the updated data frame with the new chr_band column
write.csv(infercnv_data, "L7_cytogenetics/L7_infercnv_with_chr_bands.csv", row.names = FALSE)

# Print the first few rows to confirm the new column
print(head(infercnv_data))

cat("Updated data frame with chr_band column has been saved as 'L7_infercnv_with_chr_bands.csv'\n")


```


# 6. Cytogenetic Data Present and percentage of cells having that CNVs
```{r data6, fig.height=8, fig.width=12}

# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands, focusing on chr_band and state
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2 Copies (3x)",
      TRUE ~ "Unknown"  # Fallback for unexpected states
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Print the summary for significant CNV analysis
print(significant_cnv_summary)

# Save the significant CNV summary to a CSV file
output_file <- "L1_cytogenetics/significant_cnv_summary.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant CNV summary saved to", output_file, "\n")

# Visualization of the percentage of cells affected by CNVs in different chromosome bands
ggplot(significant_cnv_summary, aes(x = reorder(chr_band, -percentage), y = percentage, fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Percentage of Cells Affected by CNVs in Chromosome Bands",
       x = "Chromosome Band",
       y = "Percentage of Cells Affected (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.title = element_blank())

# Save the plot
ggsave("L1_cytogenetics/cnv_percentage_by_chrbands.png", width = 10, height = 6)

```

```{r data7, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"  # Added a fallback for unexpected state values
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)


# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Create a bar plot with percentage on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = chr_band, fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Significant CNVs Affecting >90% of Cells",
       x = "Percentage of Cells",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal() +
  theme(axis.text.y = element_text(angle = 0, hjust = 1))


```

# L1.
```{r data8, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```
# L2.
```{r data9, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L2_cytogenetics/L2_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 97) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L2_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```
# L3.
```{r data10, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L3_cytogenetics/L3_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 98) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L3_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```
# L4.
```{r data11, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L4_cytogenetics/L4_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 93) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L4_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```
# L5.
```{r data12, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L5_cytogenetics/L5_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L5_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```


# L6.
```{r data13, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L6_cytogenetics/L6_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L6_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```

# L7.
```{r data14, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage > 90) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L7_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting >90% of Cells",
       subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```

# L1<8%&>10.
```{r data15, fig.height=8, fig.width=12}
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)

# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)

# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))

# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
  group_by(chr_band, state) %>%
  summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
  mutate(
    percentage = (unique_cells / total_cells) * 100,
    cnv_type = case_when(
      state == 1 ~ "Complete Loss (0x)",
      state == 2 ~ "Loss of One Copy (0.5x)",
      state == 3 ~ "Neutral (1x)",
      state == 4 ~ "Addition of One Copy (1.5x)",
      state == 5 ~ "Addition of Two Copies (2x)",
      state == 6 ~ "Placeholder for >2x Copies (3x)",
      TRUE ~ "Unknown"
    )
  )

# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
  filter(percentage >8  & percentage <10) %>%
  select(chr_band, state, unique_cells, percentage, cnv_type)

# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_less_than_5%.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)

# Print confirmation message
cat("Significant Gain/Loss summary with <5% affected cells saved to", output_file, "\n")

# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +  # Adjusted bar width
  scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61",  # Red for loss
                               "Loss of One Copy (0.5x)" = "#313695",  # Orange for partial loss
                               "Addition of One Copy (1.5x)" = "red",  # Blue for gain
                               "Addition of Two Copies (2x)" = "darkgreen")) +  # Dark blue for higher gain
  labs(title = "Significant CNVs Affecting <5% of Cells",
       subtitle = "Chromosomal Bands with CNVs in <5% of Cells",
       x = "Percentage of Cells (%)",
       y = "Chromosomal Band",
       fill = "CNV Type") +
  theme_minimal(base_size = 14) +  # Adjust text size for readability
  theme(
    axis.title.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  # Center the title
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    panel.grid.major.x = element_line(color = "gray90"),  # Add gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor gridlines
    legend.position = "top",  # Position legend at the top
    legend.key.size = unit(0.8, "cm")
  ) 

```