First, I’m loading the required libraries & functions
suppressPackageStartupMessages(library(data.table)) # For writing DE gene file
suppressPackageStartupMessages(library(ggplot2)) # For plotting
suppressPackageStartupMessages(library(ggpubr)) # For grid plotting
suppressPackageStartupMessages(library(plotly)) # For interactive plots
suppressPackageStartupMessages(library(crayon)) # Just for bolding the console output :D
cat(bold("data.table"), "version", as.character(packageVersion("data.table")), "\n")
## data.table version 1.17.0
cat(bold("ggplot2"), "version", as.character(packageVersion("ggplot2")), "\n")
## ggplot2 version 3.5.1
cat(bold("ggpubr"), "version", as.character(packageVersion("ggpubr")), "\n")
## ggpubr version 0.6.0
cat(bold("plotly"), "version", as.character(packageVersion("plotly")), "\n")
## plotly version 4.10.4
# Random seed
set.seed(42)
Paper: https://www.nature.com/articles/s41586-024-07606-7
Data repo: Synapse
## Parameters
# Here I cannot load the h5ad file, because it's too big, but I can work with only the metadata
input_metadata <- "./data/Kellis_2024/Gene Expression (snRNAseq - 10x) processed, multi-region/all_brain_regions_filt_preprocessed_scanpy_norm.final_noMB.cell_labels.tsv"
input_Oxphos_119_AUCell_values_path <- "./data/Kellis_119_Oxphos_AUCell_auc.tsv"
input_UPR_92_AUCell_values_path <- "./data/Kellis_92_UPR_AUCell_auc.tsv"
output_figure_prefix <- "./figures/Figure_5A/Kellis_"
plot.boxplot.aucell <- function(data.metadata, auc.name, metadata.name, output_figure_prefix = "", pwidth = 5, pheight = 7) {
# Remove rows with NA in the metadata column
boxplot_data <- data.metadata[!is.na(data.metadata[[metadata.name]]), ]
# Calculate median values of AUC per group
median_values <- tapply(boxplot_data[[auc.name]], boxplot_data[[metadata.name]], median)
# Reorder factor levels of metadata based on median AUC
boxplot_data[[metadata.name]] <- factor(boxplot_data[[metadata.name]], levels = names(sort(median_values, decreasing = FALSE)))
# Plot
p <- ggplot(boxplot_data, aes(x = .data[[metadata.name]], y = .data[[auc.name]])) +
geom_boxplot(fill = "lightgrey") +
labs(x = metadata.name, y = auc.name, title = metadata.name) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none")
# Save plots
ggsave(paste0(output_figure_prefix, auc.name, "_AUCell_ordered_boxplot_", metadata.name, ".pdf"), plot = p, width = pwidth, height = pheight, bg = "white")
ggsave(paste0(output_figure_prefix, auc.name, "_AUCell_ordered_boxplot_", metadata.name, ".png"), plot = p, width = pwidth, height = pheight, dpi = 1000, bg = "white")
return(p)
}
First step is to read the metadata
data.metadata <- fread(file = input_metadata, sep = "\t", quote = F, stringsAsFactors = F, header = T, data.table = F)
data.metadata
p <- ggplot(data.metadata, aes(x = U1, y = U2, color = region)) + geom_point()
p
data.auc.oxphos <- fread(input_Oxphos_119_AUCell_values_path, data.table = F)
data.auc.oxphos
QC
all(data.metadata$barcode %in% data.auc.oxphos$Cell) # Number differs for unknown reason... It matches the Seurat objects though...
## [1] TRUE
data.metadata$AUC_OXPHOS_119_KEGG <- data.auc.oxphos[match(data.metadata$barcode, data.auc.oxphos$Cell, nomatch = NA), "AUC"]
hist(data.metadata$AUC_OXPHOS_119_KEGG, breaks = 100)
data.auc.upr <- fread(input_UPR_92_AUCell_values_path, data.table = F)
data.auc.upr
QC
all(data.metadata$barcode %in% data.auc.upr$Cell) # Number differs for unknown reason... It matches the Seurat objects though...
## [1] TRUE
data.metadata$AUC_UPR_92 <- data.auc.upr[match(data.metadata$barcode, data.auc.upr$Cell, nomatch = NA), "AUC"]
hist(data.metadata$AUC_UPR_92, breaks = 100)
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "region")
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "major.celltype")
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "minor.celltype")
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "full.exttype")
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "neuronal.exttype")
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "cell_type_high_resolution", 10, 7)
plot.boxplot.aucell(data.metadata, "AUC_OXPHOS_119_KEGG", "hsubclass", 5, 7)
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "region")
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "major.celltype")
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "minor.celltype")
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "full.exttype")
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "neuronal.exttype")
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "cell_type_high_resolution", 10, 7)
plot.boxplot.aucell(data.metadata, "AUC_UPR_92", "hsubclass", 5, 7)