knitr::opts_chunk$set(echo = TRUE)
library(org.Hs.eg.db)
library(clusterProfiler)
library(data.table)
library(readr)
library(corrplot)
library(BioNERO)
library(SummarizedExperiment)
library(clusterProfiler)
library(ggplot2)
DA_list <- c("TH", "DDC", "DRD1", "DRD2", "DRD3", "DRD4","DRD5", "SLC6A3", "SLC18A2", "COMT", "MAOA", "MAOB")
We downloaded data from the GTEx Portal for local analysis. The data were loaded into the R environment. To ensure high transcriptomic quality and more precise results, we selected donors with short clinical agony (Hardy Scale 1 or 2). We also filtered the data to include only genes with a minimum expression level above 0.1 TPM in at least 50% of the samples.
# Prepare the list of donors with a short clinical agony
pheno <- fread("GTEx_Analysis_v11_Annotations_SubjectPhenotypesDS.txt")
fast_death_subjects <- pheno[DTHHRDY %in% c(1, 2), SUBJID]
# The 'Brain - Cerebellum' dataset consists of RNA-seq data specifically from the vermis; therefore, the analysis results in the code below are attributed to the vermis.
gene_tpm_v11_brain_cerebellum.gct.gz <- read_delim("gene_tpm_v11_brain_cerebellum.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
Vermis <- gene_tpm_v11_brain_cerebellum.gct.gz[!grepl("^ENSG", gene_tpm_v11_brain_cerebellum.gct.gz$Description), ]
Vermis <- Vermis[,-1]
Vermis <- aggregate(. ~ Description, data = Vermis, FUN = sum)
rownames(Vermis) <- Vermis[,1]
Vermis <- Vermis[,-1]
# Filtration by Hardy Scale
Vermis_fd <- names(Vermis)[grepl(paste(fast_death_subjects, collapse="|"), names(Vermis))]
Vermis <- Vermis[, Vermis_fd]
# Filtration by expression
test_Vermis <- Vermis[rowSums(Vermis >= 0.1) >= (ncol(Vermis) / 2), ]
# Results from the 'Brain - Cerebellar Hemisphere' dataset are labeled as 'Hemispheres' below.
gene_tpm_v11_brain_cerebellar_hemisphere.gct.gz <- read_delim("gene_tpm_v11_brain_cerebellar_hemisphere.gct.gz",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE, skip = 2)
Hemispheres <- gene_tpm_v11_brain_cerebellar_hemisphere.gct.gz[!grepl("^ENSG", gene_tpm_v11_brain_cerebellar_hemisphere.gct.gz$Description), ]
Hemispheres <- Hemispheres[,-1]
Hemispheres <- aggregate(. ~ Description, data = Hemispheres, FUN = sum)
rownames(Hemispheres) <- Hemispheres[,1]
Hemispheres <- Hemispheres[,-1]
# Filtration by Hardy Scale
Hemispheres_fd <- names(Hemispheres)[grepl(paste(fast_death_subjects, collapse="|"), names(Hemispheres))]
Hemispheres <- Hemispheres[, Hemispheres_fd]
# Filtration by expression
test_Hemispheres <- Hemispheres[rowSums(Hemispheres >= 0.1) >= (ncol(Hemispheres) / 2), ]
After data preparing and filtration we performed correlation analysis for dopaminergic genes in both datasets separately.
Fig_1_data <- rbind(data.frame(Structure = rep("Vermis", ncol(test_Vermis)*length(DA_list)), Gene = rep(DA_list, each =ncol(test_Vermis)), log2_TPM = log2(c(t(test_Vermis[DA_list, ]))+1)), data.frame(Structure = rep("Hemispheres", ncol(test_Hemispheres)*length(DA_list)), Gene = rep(DA_list, each =ncol(test_Hemispheres)), log2_TPM = log2(c(t(test_Hemispheres[DA_list, ]))+1)))
Fig_1 <- ggplot(Fig_1_data, aes(y=log2_TPM, x=Gene))
Fig_1+
geom_boxplot(aes(fill=Structure))+
scale_fill_manual(values = c("bisque2", "khaki2"))+
ylab("log2(TPM+1)")+
theme(axis.title = element_text(size = 16), axis.text = element_text(size = 14), axis.text.x = element_text(angle = 15))+
theme(strip.text = element_text(size = 14))+
theme(legend.position = "bottom", legend.title = element_blank(), legend.text = element_text(size = 16))
Then, we assessed the normality of gene expression distribution using the Shapiro-Wilk test. Since the data followed a non-normal distribution, Spearman’s correlation was applied for further analysis.
# The normality of data distribution was assessed using the Shapiro-Wilk test. Due to the non-normal distribution of gene expression levels, Spearman's correlation was used for further analysis.
apply(na.omit(test_Hemispheres[DA_list, ]), 1, function(x) shapiro.test(x)$p.value)
## DDC DRD1 DRD2 DRD4 SLC18A2 MAOA
## 3.367290e-05 2.894105e-21 1.130659e-23 9.624758e-04 1.500365e-13 5.960831e-13
## MAOB
## 7.689434e-01
apply(na.omit(test_Vermis[DA_list, ]), 1, function(x) shapiro.test(x)$p.value)
## DDC DRD1 DRD2 DRD4 SLC18A2 MAOA
## 4.709595e-03 2.518305e-03 4.870648e-05 9.737165e-02 7.961220e-11 1.937126e-10
## MAOB
## 3.307319e-01
M <- cor(t(test_Vermis[rownames(test_Vermis) %in% DA_list, ]), method = "spearman")
M[abs(M) < 0.5] <- 0
valid_genes <- intersect(DA_list, rownames(M))
M <- M[valid_genes, valid_genes]
res_p <- cor.mtest(t(test_Vermis[rownames(test_Vermis) %in% DA_list, ]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.5] <- 1
corrplot(M,
method = "color",
p.mat = res_p$p,
sig.level = 0.05,
insig = "blank",
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in healthy human vermis",
mar = c(0, 0, 3, 0))
M <- cor(t(test_Hemispheres[rownames(test_Hemispheres) %in% DA_list, ]), method = "spearman")
valid_genes <- intersect(DA_list, rownames(M))
M[abs(M) < 0.5] <- 0
M <- M[valid_genes, valid_genes]
res_p <- cor.mtest(t(test_Hemispheres[rownames(test_Hemispheres) %in% DA_list, ]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.5] <- 1
corrplot(M,
method = "color",
p.mat = res_p$p,
sig.level = 0.05,
insig = "blank",
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in healthy human cerebellar hemispheres",
mar = c(0, 0, 3, 0))
To explore the involvement of dopamine system genes in gene co-expression networks, we analyzed the datasets using the WGCNA method.
Vermis_se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=test_Vermis),colData=data.frame(row.names = colnames(test_Vermis)))
Vermis_exp_filt <- replace_na(Vermis_se)
Vermis_exp_filt <- remove_nonexp(Vermis_exp_filt, method = "median", min_exp = 0.5)
Vermis_exp_filt <- filter_by_variance(Vermis_exp_filt, n = 5000)
Vermis_exp_filt@NAMES[Vermis_exp_filt@NAMES %in% DA_list] # No daergic genes overcome variability treshold in vermis.
## character(0)
Hemispheres_se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=test_Hemispheres),colData=data.frame(row.names = colnames(test_Hemispheres)))
Hemispheres_exp_filt <- BioNERO::replace_na(Hemispheres_se)
Hemispheres_exp_filt <- BioNERO::remove_nonexp(Hemispheres_exp_filt, method = "median", min_exp = 0.5)
Hemispheres_exp_filt <- BioNERO::filter_by_variance(Hemispheres_exp_filt, n = 5000)
Hemispheres_exp_filt@NAMES[Hemispheres_exp_filt@NAMES %in% DA_list] # Only MAOB expression overcame variability threshold.
## [1] "MAOB"
Hemispheres_exp_filt <- BioNERO::ZKfiltering(Hemispheres_exp_filt, cor_method = "pearson")
## Number of removed samples: 10
Hemispheres_sft <- BioNERO::SFT_fit(Hemispheres_exp_filt, net_type = "signed hybrid", cor_method = "pearson")
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 3 0.0606 0.128 -0.2070 1040.0 1140.0 1910
## 2 4 0.0398 -0.127 -0.0619 734.0 789.0 1530
## 3 5 0.1760 -0.344 0.2790 539.0 561.0 1260
## 4 6 0.2720 -0.532 0.4920 406.0 407.0 1050
## 5 7 0.3480 -0.705 0.6130 312.0 302.0 886
## 6 8 0.4180 -0.827 0.7150 244.0 226.0 755
## 7 9 0.4710 -0.935 0.7750 194.0 172.0 648
## 8 10 0.5140 -1.040 0.8130 156.0 132.0 559
## 9 11 0.5460 -1.140 0.8410 127.0 103.0 486
## 10 12 0.5870 -1.200 0.8690 104.0 80.8 424
## 11 13 0.6280 -1.230 0.8970 85.8 64.2 372
## 12 14 0.6500 -1.280 0.9110 71.4 51.3 327
## 13 15 0.6780 -1.320 0.9240 59.9 41.3 289
## 14 16 0.6970 -1.380 0.9300 50.5 33.5 257
## 15 17 0.7110 -1.440 0.9370 42.8 27.3 232
## 16 18 0.7180 -1.490 0.9420 36.5 22.4 209
## 17 19 0.7370 -1.520 0.9510 31.2 18.5 189
## 18 20 0.7400 -1.570 0.9520 26.9 15.2 171
## No power reached R-squared cut-off, now choosing max R-squared based power
Hemispheres_power <- Hemispheres_sft$power
Hemispheres_net <- BioNERO::exp2gcn(
Hemispheres_exp_filt, net_type = "signed hybrid", SFTpower = Hemispheres_power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
Hemispheres_hubs <- BioNERO::get_hubs_gcn(Hemispheres_exp_filt, Hemispheres_net)
Hemispheres_hubs[Hemispheres_hubs$Gene %in% DA_list,]
## [1] Gene Module kWithin
## <0 rows> (or 0-length row.names)
Hemispheres_net$genes_and_modules[Hemispheres_net$genes_and_modules$Genes %in% DA_list, ]
## Genes Modules
## 2264 MAOB lightcyan1
Hemispheres_edges_filtered_to <- BioNERO::get_edge_list(
Hemispheres_net, module = c("lightcyan1"),
filter = TRUE, method = "pvalue",
nSamples = ncol(Hemispheres_se)
)
custom_annotation_df <- data.frame(
Genes = unique(c(Hemispheres_edges_filtered_to$Gene1, Hemispheres_edges_filtered_to$Gene2)),
Status = "khaki2"
)
# General topology of the network which involves MAOB
plot_gcn(
edgelist_gcn =Hemispheres_edges_filtered_to[Hemispheres_edges_filtered_to$Weight > 0.7, ],
net = Hemispheres_net,
color_by = custom_annotation_df,
hubs = Hemispheres_hubs
)
# topology of MAOB-centered network
plot_gcn(
edgelist_gcn =Hemispheres_edges_filtered_to[Hemispheres_edges_filtered_to$Weight > 0.5 & Hemispheres_edges_filtered_to$Gene1 %in% c(DA_list) | Hemispheres_edges_filtered_to$Gene2 %in% c(DA_list) , ],
net = Hemispheres_net,
color_by = custom_annotation_df,
hubs = Hemispheres_hubs
)
Additionally, we performed functional analysis of genes, linked to MAOB in this network.
Hemispheres_coex <- unique(c(Hemispheres_edges_filtered_to[Hemispheres_edges_filtered_to$Weight > 0.5 & Hemispheres_edges_filtered_to$Gene1 %in% c(DA_list) | Hemispheres_edges_filtered_to$Gene2 %in% c(DA_list) , 1], (Hemispheres_edges_filtered_to[Hemispheres_edges_filtered_to$Weight > 0.5 & Hemispheres_edges_filtered_to$Gene1 %in% c(DA_list) | Hemispheres_edges_filtered_to$Gene2 %in% c(DA_list) , 2])))
ego_Hemispheres <- enrichGO(Hemispheres_coex,
OrgDb = "org.Hs.eg.db",
keyType = 'SYMBOL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
dotplot(simplify(ego_Hemispheres))