library(org.Hs.eg.db)
library(clusterProfiler)
library(pathview)
library(data.table)
library(readr)
library(enrichplot)
library(ggplot2)
We downloaded data directly from the GTEx Portal for local analysis. The data were then loaded into the R environment. To ensure high transcriptomic quality and more precise results, we selected donors with short clinical agony (Hardy Scale 1 or 2). We also filtered the data to include only genes with a minimum expression level above 1 TPM in at least 50% of the samples.
# Prepare the list of donors with a short clinical agony
pheno <- fread("GTEx_Analysis_v11_Annotations_SubjectPhenotypesDS.txt")
fast_death_subjects <- pheno[DTHHRDY %in% c(1, 2), SUBJID]
# For putamen
gene_tpm_v11_brain_putamen_basal_ganglia.gct.gz <- read_delim("gene_tpm_v11_brain_putamen_basal_ganglia.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
rows <- gene_tpm_v11_brain_putamen_basal_ganglia.gct.gz[,1]
Putamen <- gene_tpm_v11_brain_putamen_basal_ganglia.gct.gz[!grepl("^ENSG", gene_tpm_v11_brain_putamen_basal_ganglia.gct.gz$Description), ]
Putamen <- Putamen[,-1]
Putamen <- aggregate(. ~ Description, data = Putamen, FUN = sum)
rownames(Putamen) <- Putamen[,1]
Putamen_fd <- names(Putamen)[grepl(paste(fast_death_subjects, collapse="|"), names(Putamen))]
Putamen <- Putamen[, Putamen_fd]
test_Putamen <- Putamen[rowSums(Putamen >= 1) >= (ncol(Putamen) / 2), ]
# For nucleus caudatus
gene_tpm_v11_brain_caudate_basal_ganglia.gct.gz <- read_delim("gene_tpm_v11_brain_caudate_basal_ganglia.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
Caudate <- gene_tpm_v11_brain_caudate_basal_ganglia.gct.gz[!grepl("^ENSG", gene_tpm_v11_brain_caudate_basal_ganglia.gct.gz$Description), ]
Caudate <- Caudate[,-1]
Caudate <- aggregate(. ~ Description, data = Caudate, FUN = sum)
rownames(Caudate) <- Caudate[,1]
Caudate_fd <- names(Caudate)[grepl(paste(fast_death_subjects, collapse="|"), names(Caudate))]
Caudate <- Caudate[, Caudate_fd]
test_Caudate <- Caudate[rowSums(Caudate >= 1) >= (ncol(Caudate) / 2), ]
# For nucleus accumbens
gene_tpm_v11_brain_nucleus_accumbens_basal_ganglia.gct.gz <- read_delim("gene_tpm_v11_brain_nucleus_accumbens_basal_ganglia.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
Nacc <- gene_tpm_v11_brain_nucleus_accumbens_basal_ganglia.gct.gz[!grepl("^ENSG", gene_tpm_v11_brain_nucleus_accumbens_basal_ganglia.gct.gz$Description), ]
Nacc <- Nacc[,-1]
Nacc <- aggregate(. ~ Description, data = Nacc, FUN = sum)
rownames(Nacc) <- Nacc[,1]
Nacc_fd <- names(Nacc)[grepl(paste(fast_death_subjects, collapse="|"), names(Nacc))]
Nacc <- Nacc[, Nacc_fd]
test_Nacc <- Nacc[rowSums(Nacc >= 1) >= (ncol(Nacc) / 2), ]
# For BA9 in PFC
gene_tpm_v10_brain_frontal_cortex_ba9.gct.gz <- read_delim("gene_tpm_v10_brain_frontal_cortex_ba9.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
BA9 <- gene_tpm_v10_brain_frontal_cortex_ba9.gct.gz[!grepl("^ENSG", gene_tpm_v10_brain_frontal_cortex_ba9.gct.gz$Description), ]
BA9 <- BA9[,-1]
BA9 <- aggregate(. ~ Description, data = BA9, FUN = sum)
rownames(BA9) <- BA9[,1]
BA9_fd <- names(BA9)[grepl(paste(fast_death_subjects, collapse="|"), names(BA9))]
BA9 <- BA9[, BA9_fd]
test_BA9 <- BA9[rowSums(BA9 >= 1) >= (ncol(BA9) / 2), ]
We confirmed OCT3 mRNA expression in all structures included in the analysis.
Expression <- data.frame(Structure = c(rep("Nacc", length(colnames(Nacc))), rep("Caudate", length(colnames(Caudate))), rep("Putamen", length(colnames(Putamen))), rep("BA9", length(colnames(BA9)))), TPM = c(as.numeric(Nacc["SLC22A3", ]), as.numeric(Caudate["SLC22A3", ]), as.numeric(Putamen["SLC22A3", ]), as.numeric(BA9["SLC22A3", ])))
ggplot(Expression, aes(y=TPM, x=Structure))+
geom_boxplot(aes(fill=Structure))+
scale_fill_manual(values = c("forestgreen", "purple", "orchid", "coral1"))+
theme(axis.title = element_text(size = 16), axis.text = element_text(size = 14))+
theme(legend.position = "none")
After data preparing and filtration we ranged the remaining genes by the level of their coespression with OCT3 in different structures. We apply Spearman correlation test to order genes for further GSEA test.
prepare_ranked_genes <- function(expression_matrix, target_gene = "SLC22A3") {
# 1. Calculate Spearman correlation
# t() is expensive, so we do it once
transposed_data <- t(expression_matrix)
rho_vector <- cor(transposed_data,
transposed_data[, target_gene, drop = FALSE],
method = "spearman")
# 2. Map SYMBOL to ENTREZID
entrez_ids <- mapIds(org.Hs.eg.db,
keys = rownames(rho_vector),
column = "ENTREZID",
keytype = "SYMBOL",
multiVals = "first")
# 3. Create the named vector for clusterProfiler
ranked_vector <- as.numeric(rho_vector)
names(ranked_vector) <- entrez_ids
ranked_vector <- ranked_vector[!is.na(names(ranked_vector))]
ranked_vector <- ranked_vector[!is.na(ranked_vector)]
# 4. Clean: remove NAs (from mapping or cor) and sort
ranked_vector <- sort(na.omit(ranked_vector), decreasing = TRUE)
return(ranked_vector)
}
genes_Putamen <- prepare_ranked_genes(test_Putamen)
genes_Nacc <- prepare_ranked_genes(test_Nacc)
genes_Caudate <- prepare_ranked_genes(test_Caudate)
genes_BA9 <- prepare_ranked_genes(test_BA9)
We perform Gene Set Enrichment Analysis (GSEA) to identify biological pathways associated with the ranked gene list. We then verify whether any significant results were achieved by the analysis (p-value < 0.05).
GSEA_Putamen <- gseKEGG(geneList = genes_Putamen,
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
GSEA_Caudate <- gseKEGG(geneList = genes_Caudate,
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
GSEA_Nacc <- gseKEGG(geneList = na.omit(genes_Nacc),
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "BH")
GSEA_BA9 <- gseKEGG(geneList = na.omit(genes_BA9),
organism = "hsa",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.1,
pAdjustMethod = "BH")
GSEA_Putamen@result <- GSEA_Putamen@result[GSEA_Putamen@result$qvalue <= 0.05, ]
GSEA_Caudate@result <- GSEA_Caudate@result[GSEA_Caudate@result$qvalue <= 0.05, ]
GSEA_Nacc@result <- GSEA_Nacc@result[GSEA_Nacc@result$qvalue <= 0.05, ]
GSEA_BA9@result <- GSEA_BA9@result[GSEA_BA9@result$qvalue <= 0.05, ]
From the GSEA results, we specifically extracted pathways associated with the Nervous System, categorized according to the KEGG BRITE functional hierarchy. This targeted approach allows for a more focused interpretation of the neurobiological mechanisms involved.
# --- Comprehensive Brain & Mental Health Pathway List (KEGG BRITE) ---
# This list includes: 5.6 Nervous System, 6.1 Neurodegenerative, and 6.5 Mental Disorders.
neuro_mental_pathways <- c(
# 5.6 Nervous System (Physiology & Signaling)
"hsa04724", # Glutamatergic synapse
"hsa04727", # GABAergic synapse
"hsa04725", # Cholinergic synapse
"hsa04728", # Dopaminergic synapse
"hsa04726", # Serotonergic synapse
"hsa04720", # Long-term potentiation
"hsa04730", # Long-term depression
"hsa04723", # Retrograde endocannabinoid signaling
"hsa04721", # Synaptic vesicle cycle
"hsa04722", # Neurotrophin signaling pathway
# 6.1 Neurodegenerative Diseases
"hsa05010", # Alzheimer disease
"hsa05012", # Parkinson disease (Highly relevant for SLC22A3)
"hsa05014", # Amyotrophic lateral sclerosis
"hsa05017", # Spinocerebellar ataxia
"hsa05020", # Prion disease
"hsa05022", # Pathways in neurodegeneration
# 6.5 Mental and Behavioural Disorders & Addictions
"hsa05030", # Cocaine addiction
"hsa05031", # Amphetamine addiction
"hsa05032", # Morphine addiction
"hsa05033", # Nicotine addiction
"hsa05034", # Alcoholism
"hsa05035" # Depression (Newest KEGG addition)
)
# --- Filtering GSEA Results ---
# 1. Convert to data frame and ensure strings (avoiding factor issues)
Putamen_results <- as.data.frame(GSEA_Putamen)
Putamen_results$ID <- as.character(trimws(Putamen_results$ID))
clean_targets <- as.character(trimws(neuro_mental_pathways))
Putamen_results <- Putamen_results[Putamen_results$ID %in% clean_targets, ]
Putamen_results <- Putamen_results[Putamen_results$ID %in% neuro_mental_pathways, ]
# Print the top significant brain-related pathways
if (nrow(Putamen_results) > 0) {
print(knitr::kable(Putamen_results[, c("ID", "Description", "NES", "qvalue")],
caption = "Significant Brain-Related Pathways"))
} else {
cat("\n\n**Warning:** No significant pathways from the 'Neuro & Mental' list were found.\n\n")
}
##
##
## Table: Significant Brain-Related Pathways
##
## | |ID |Description | NES| qvalue|
## |:--------|:--------|:----------------------|---------:|---------:|
## |hsa05033 |hsa05033 |Nicotine addiction | -2.050838| 0.0230287|
## |hsa04721 |hsa04721 |Synaptic vesicle cycle | -1.949285| 0.0234751|
Caudate_results <- as.data.frame(GSEA_Caudate)
Caudate_results <- Caudate_results[Caudate_results$ID %in% neuro_mental_pathways, ]
# Print the top significant brain-related pathways
if (nrow(Caudate_results) > 0) {
print(knitr::kable(Caudate_results[, c("ID", "Description", "NES", "qvalue")],
caption = "Significant Brain-Related Pathways"))
} else {
cat("\n\n**Warning:** No significant pathways from the 'Neuro & Mental' list were found.\n\n")
}
##
##
## **Warning:** No significant pathways from the 'Neuro & Mental' list were found.
Nacc_results <- as.data.frame(GSEA_Nacc)
Nacc_results <- Nacc_results[Nacc_results$ID %in% neuro_mental_pathways & Nacc_results$qvalue < 0.05, ]
# Print the top significant brain-related pathways
if (nrow(Nacc_results) > 0) {
print(knitr::kable(Nacc_results[, c("ID", "Description", "NES", "qvalue")],
caption = "Significant Brain-Related Pathways"))
} else {
cat("\n\n**Warning:** No significant pathways from the 'Neuro & Mental' list were found.\n\n")
}
##
##
## Table: Significant Brain-Related Pathways
##
## | |ID |Description | NES| qvalue|
## |:--------|:--------|:---------------------|---------:|---------:|
## |hsa05031 |hsa05031 |Amphetamine addiction | -1.966318| 0.0372445|
BA9_results <- as.data.frame(GSEA_BA9)
BA9_results <- BA9_results[BA9_results$ID %in% clean_targets, ]
BA9_results <- BA9_results[BA9_results$ID %in% neuro_mental_pathways, ]
# Print the top significant brain-related pathways
message("Rows after filtering: ", nrow(BA9_results))
## Rows after filtering: 4
if (nrow(BA9_results) > 0) {
print(knitr::kable(BA9_results[, c("ID", "Description", "NES", "qvalue")],
caption = "Significant Brain-Related Pathways"))
} else {
cat("\n\n**Warning:** No significant pathways from the 'Neuro & Mental' list were found.\n\n")
}
##
##
## Table: Significant Brain-Related Pathways
##
## | |ID |Description | NES| qvalue|
## |:--------|:--------|:------------------------------|--------:|---------:|
## |hsa05032 |hsa05032 |Morphine addiction | 1.627799| 0.0030349|
## |hsa04727 |hsa04727 |GABAergic synapse | 1.614863| 0.0030349|
## |hsa04722 |hsa04722 |Neurotrophin signaling pathway | 1.442827| 0.0126495|
## |hsa04724 |hsa04724 |Glutamatergic synapse | 1.383566| 0.0273422|
Below, we visualize our results:
top_pathways <- Putamen_results$ID
for (p_id in top_pathways) {
p_name <- Putamen_results[Putamen_results$ID == p_id, "Description"]
stats <- GSEA_Putamen@result[p_id, ]
p_val <- formatC(stats$pvalue, format = "e", digits = 2)
q_val <- formatC(stats$p.adjust, format = "e", digits = 2)
label_text <- paste0("p-val: ", p_val, "\nq-val: ", q_val)
p <- gseaplot2(GSEA_Putamen, geneSetID = p_id, title = p_name)
p[[1]] <- p[[1]] +
annotate("text", x = Inf, y = -Inf, label = label_text,
hjust = 1.1, vjust = -0.5,
size = 3.5, fontface = "bold", color = "darkred")
pathview(gene.data = genes_Putamen,
pathway.id = p_id,
species = "hsa",
low = list(gene = "skyblue3"),
mid = list(gene = "white"),
high = list(gene = "magenta3"),
limit = list(gene = max(abs(genes_Putamen)), cpd = 1))
img_name <- paste0(p_id, ".pathview.png")
cat("\n\n#### GSEA Plot\n\n")
print(p)
cat("\n\n")
cat("\n#### KEGG Map\n\n")
# Выводим картинку только если файл свежий
img_name <- paste0(p_id, ".pathview.png")
if(file.exists(img_name)) {
cat(paste0(", ")\n\n"))
}
}
top_pathways <- Nacc_results$ID
for (p_id in top_pathways) {
p_name <- Nacc_results[Nacc_results$ID == p_id, "Description"]
stats <- GSEA_Nacc@result[p_id, ]
p_val <- formatC(stats$pvalue, format = "e", digits = 2)
q_val <- formatC(stats$p.adjust, format = "e", digits = 2)
label_text <- paste0("p-val: ", p_val, "\nq-val: ", q_val)
p <- gseaplot2(GSEA_Nacc, geneSetID = p_id, title = p_name)
p[[1]] <- p[[1]] +
annotate("text", x = Inf, y = -Inf, label = label_text,
hjust = 1.1, vjust = -0.5,
size = 3.5, fontface = "bold", color = "darkred")
pathview(gene.data = genes_Nacc,
pathway.id = p_id,
species = "hsa",
same.layer = FALSE,
low = list(gene = "skyblue3"),
mid = list(gene = "white"),
high = list(gene = "magenta3"),
limit = list(gene = max(abs(genes_Nacc)), cpd = 1))
img_name <- paste0(p_id, ".pathview.png")
img_name <- paste0(p_id, ".pathview.png")
cat("\n#### GSEA Plot\n\n")
print(p)
cat("\n\n")
cat("\n#### KEGG Map\n")
cat(paste0("\n"))
cat("\n")
}
top_pathways <- BA9_results$ID
for (p_id in top_pathways) {
p_name <- BA9_results[BA9_results$ID == p_id, "Description"]
stats <- GSEA_BA9@result[p_id, ]
p_val <- formatC(stats$pvalue, format = "e", digits = 2)
q_val <- formatC(stats$p.adjust, format = "e", digits = 2)
label_text <- paste0("p-val: ", p_val, "\nq-val: ", q_val)
p <- gseaplot2(GSEA_BA9, geneSetID = p_id, title = p_name)
p[[1]] <- p[[1]] +
annotate("text", x = Inf, y = -Inf, label = label_text,
hjust = 1.1, vjust = -0.5,
size = 3.5, fontface = "bold", color = "darkred")
pathview(gene.data = genes_BA9,
pathway.id = p_id,
species = "hsa",
low = list(gene = "skyblue3"),
mid = list(gene = "white"),
high = list(gene = "magenta3"),
limit = list(gene = max(abs(genes_Nacc)), cpd = 1))
img_name <- paste0(p_id, ".pathview.png")
cat("\n#### GSEA Plot\n\n")
print(p)
cat("\n\n")
cat("\n#### KEGG Map\n")
cat(paste0("\n"))
cat("\n")
}