1. load libraries
2. Data PREPERATION
# Read the inferCNV HMM .dat file
infercnv_data <- read.delim("../L7_PBMC_uphyloplot/HMM_CNV_predictions.HMMi6.leiden.hmm_mode-subclusters.Pnorm_0.5.pred_cnv_genes.dat", sep="\t", header=TRUE)
# Remove any rows with NA or infinite values
infercnv_data <- infercnv_data[complete.cases(infercnv_data), ]
infercnv_data <- infercnv_data[is.finite(infercnv_data$start) & is.finite(infercnv_data$end), ]
# Remove 'chr' prefix from chromosome names if present
infercnv_data$chr <- gsub("^chr", "", infercnv_data$chr)
# Display the first few rows and structure of the data
print(head(infercnv_data))
str(infercnv_data)
# Filter out rows with PBMC from the cell_group_name column
infercnv_data <- infercnv_data[!grepl("PBMC", infercnv_data$cell_group_name), ]
# Replace 'all_observations' with 'L7', keeping only the part after the last underscore
infercnv_data$cell_group_name <- gsub("all_observations\\.all_observations_", "L7_", infercnv_data$cell_group_name)
# Check the first few rows after filtering and renaming
print(head(infercnv_data))
3. Query Cytogenetic Bands-Script2
# Load required libraries
library(dplyr)
library(readr)
# Function to download and process cytogenetic band data from UCSC
get_cyto_bands <- function() {
url <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/cytoBand.txt.gz"
cyto_bands <- read_tsv(url, col_names = c("chr", "start", "end", "band", "stain"))
cyto_bands$chr <- gsub("chr", "", cyto_bands$chr) # Remove 'chr' prefix from chromosome names
return(cyto_bands)
}
# Function to find the cytogenetic band for a given position
find_band <- function(chr, pos_start, pos_end, cyto_bands) {
bands <- cyto_bands %>%
filter(chr == !!chr,
(start >= pos_start & start <= pos_end) |
(end >= pos_start & end <= pos_end) |
(start <= pos_start & end >= pos_end))
if (nrow(bands) > 0) {
return(paste(unique(bands$band), collapse = ","))
} else {
return(NA)
}
}
# Download cytogenetic band data
cyto_bands <- get_cyto_bands()
# Apply the function to each row in infercnv_data
result <- infercnv_data %>%
rowwise() %>%
mutate(band = find_band(chr, start, end, cyto_bands)) %>%
ungroup()
# View the first few rows of the result
print(head(result))
# Save the result to a new CSV file
write.csv(result, "L7_cytogenetics/L7_infercnv_with_bands.csv", row.names = FALSE)
# Display summary of the final result
summary(result)
4. Checking Cytogenetic Data
# Load necessary libraries
library(dplyr)
library(readr)
# Load the inferCNV data (L7_infercnv_with_bands2.csv) containing chromosome and band information
infercnv_data <- read_csv("L7_cytogenetics/L7_infercnv_with_bands.csv")
# Create a new column combining chromosome number and band information (e.g., "1p36.33")
infercnv_data <- infercnv_data %>%
mutate(chr_band = paste(chr, band, sep=""))
# Define cytogenetic bands for patient3
patient3_bands <- c(
"1p36.1",
"2p21", "2p13.6",
"3q24", "3q25.1", "3q25.31", "3q25.33",
"3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29",
"4", "5",
"6p21.31", "6p21.2",
"8",
"9p21.3",
"10p14",
"12p13.2", "12p13.1", "12q21.33", "12q22",
"14q12", "14q21.1", "14q22.1",
"15q24.2",
"16q24.2", "16q24.3",
"17p13.3", "17p11.2", "17p12",
"17q22", "17q24.3", "17q25.3"
)
# Define cytogenetic bands for L7
L7_bands <- c(
"1p36.1",
"2p21", "2p13.6",
"2q33.1", "2q33.3",
"3q24", "3q25.1", "3q25.31", "3q25.33",
"3q26.1", "3q26.31", "3q26.33", "3q27.2", "3q29",
"4q25", "4qter",
"5",
"6p21.31", "6p21.2",
"8",
"9p21.3",
"10p14",
"12p13.2", "12p13.1", "12q21.33", "12q22",
"14q12", "14q21.1", "14q22.1",
"15q24.2", "15q26.3",
"16q24.2", "16q24.3",
"17p13.3", "17p11.2", "17p12",
"17q22", "17q24.3", "17q25.3"
)
# Function to compare bands
compare_bands <- function(bands_list, infercnv_bands) {
comparison <- data.frame(
Band = bands_list,
Present_in_infercnv = ifelse(bands_list %in% infercnv_bands, "Present", "Not Present")
)
return(comparison)
}
# Compare patient3 bands to L7_infercnv bands
patient3_comparison <- compare_bands(patient3_bands, infercnv_data$chr_band)
# Compare L7 bands to L7_infercnv bands
L7_comparison <- compare_bands(L7_bands, infercnv_data$chr_band)
# View the comparison results
print("patient3 Comparison:")
print(patient3_comparison)
print("L7 Comparison:")
print(L7_comparison)
# Save the comparison results to CSV files
write.csv(patient3_comparison, "L7_cytogenetics/patient3_vs_L7_infercnv_comparison.csv", row.names = FALSE)
write.csv(L7_comparison, "L7_cytogenetics/L7_vs_L7_infercnv_comparison.csv", row.names = FALSE)
5. Cytogenetic Data Present and percentage of cells having that
CNVs
# Load required libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(readr)
# Read the inferCNV results
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Create chr_band column in infercnv_data
infercnv_data$chr_band <- paste0(infercnv_data$chr, infercnv_data$band)
# Save the updated data frame with the new chr_band column
write.csv(infercnv_data, "L7_cytogenetics/L7_infercnv_with_chr_bands.csv", row.names = FALSE)
# Print the first few rows to confirm the new column
print(head(infercnv_data))
cat("Updated data frame with chr_band column has been saved as 'L7_infercnv_with_chr_bands.csv'\n")
Updated data frame with chr_band column has been saved as 'L7_infercnv_with_chr_bands.csv'
6. Cytogenetic Data Present and percentage of cells having that
CNVs
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands, focusing on chr_band and state
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2 Copies (3x)",
TRUE ~ "Unknown" # Fallback for unexpected states
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Print the summary for significant CNV analysis
print(significant_cnv_summary)
# Save the significant CNV summary to a CSV file
output_file <- "L1_cytogenetics/significant_cnv_summary.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant CNV summary saved to", output_file, "\n")
Significant CNV summary saved to L1_cytogenetics/significant_cnv_summary.csv
# Visualization of the percentage of cells affected by CNVs in different chromosome bands
ggplot(significant_cnv_summary, aes(x = reorder(chr_band, -percentage), y = percentage, fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Percentage of Cells Affected by CNVs in Chromosome Bands",
x = "Chromosome Band",
y = "Percentage of Cells Affected (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank())

# Save the plot
ggsave("L1_cytogenetics/cnv_percentage_by_chrbands.png", width = 10, height = 6)
# Load required libraries
library(dplyr)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown" # Added a fallback for unexpected state values
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_90.csv
# Create a bar plot with percentage on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = chr_band, fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Significant CNVs Affecting >90% of Cells",
x = "Percentage of Cells",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal() +
theme(axis.text.y = element_text(angle = 0, hjust = 1))

NA
NA
L1.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L2.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L2_cytogenetics/L2_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 97) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L2_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L2_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L3.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L3_cytogenetics/L3_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 98) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L3_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L3_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L4.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L4_cytogenetics/L4_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 93) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L4_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L4_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L5.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L5_cytogenetics/L5_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L5_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)
L6.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L6_cytogenetics/L6_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L6_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)
L7.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L7_cytogenetics/L7_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage > 90) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L7_cytogenetics/significant_gain_loss_summary_90.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with >90% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with >90% affected cells saved to L7_cytogenetics/significant_gain_loss_summary_90.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting >90% of Cells",
subtitle = "Chromosomal Bands with CNVs in >90% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

L1<8%&>10.
# Load required libraries
library(dplyr)
library(ggplot2)
library(readr)
# Read the updated inferCNV results with chr_band column
infercnv_file <- "L1_cytogenetics/L1_infercnv_with_chr_bands.csv"
infercnv_data <- read.csv(infercnv_file, stringsAsFactors = FALSE)
# Total number of unique cell groups
total_cells <- length(unique(infercnv_data$cell_group_name))
# Analyze CNV for all bands
cnv_summary <- infercnv_data %>%
group_by(chr_band, state) %>%
summarize(unique_cells = n_distinct(cell_group_name), .groups = 'drop') %>%
mutate(
percentage = (unique_cells / total_cells) * 100,
cnv_type = case_when(
state == 1 ~ "Complete Loss (0x)",
state == 2 ~ "Loss of One Copy (0.5x)",
state == 3 ~ "Neutral (1x)",
state == 4 ~ "Addition of One Copy (1.5x)",
state == 5 ~ "Addition of Two Copies (2x)",
state == 6 ~ "Placeholder for >2x Copies (3x)",
TRUE ~ "Unknown"
)
)
# Filter for CNVs affecting more than 10% of cells (for all states)
significant_cnv_summary <- cnv_summary %>%
filter(percentage >8 & percentage <10) %>%
select(chr_band, state, unique_cells, percentage, cnv_type)
# Save the filtered gain/loss summary to a CSV file
output_file <- "L1_cytogenetics/significant_gain_loss_summary_less_than_5%.csv"
write.csv(significant_cnv_summary, output_file, row.names = FALSE)
# Print confirmation message
cat("Significant Gain/Loss summary with <5% affected cells saved to", output_file, "\n")
Significant Gain/Loss summary with <5% affected cells saved to L1_cytogenetics/significant_gain_loss_summary_less_than_5%.csv
# Publication-quality plot with percentage of cells on X-axis and chromosomal bands on Y-axis
ggplot(significant_cnv_summary, aes(x = percentage, y = reorder(chr_band, percentage), fill = cnv_type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) + # Adjusted bar width
scale_fill_manual(values = c("Complete Loss (0x)" = "#fdae61", # Red for loss
"Loss of One Copy (0.5x)" = "#313695", # Orange for partial loss
"Addition of One Copy (1.5x)" = "red", # Blue for gain
"Addition of Two Copies (2x)" = "darkgreen")) + # Dark blue for higher gain
labs(title = "Significant CNVs Affecting <5% of Cells",
subtitle = "Chromosomal Bands with CNVs in <5% of Cells",
x = "Percentage of Cells (%)",
y = "Chromosomal Band",
fill = "CNV Type") +
theme_minimal(base_size = 14) + # Adjust text size for readability
theme(
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), # Center the title
plot.subtitle = element_text(hjust = 0.5, size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
panel.grid.major.x = element_line(color = "gray90"), # Add gridlines
panel.grid.minor.x = element_blank(), # Remove minor gridlines
legend.position = "top", # Position legend at the top
legend.key.size = unit(0.8, "cm")
)

