library(corrplot)
library(edgeR)
library(BioNERO)
library(SummarizedExperiment)
library(org.Mm.eg.db)
library(clusterProfiler)
# Lists for genes of interest with Ensembl codes and gene names.
DA_list <- mapIds(org.Mm.eg.db, c("Th", "Ddc", "Drd1", "Drd2", "Drd3", "Drd4","Drd5", "Slc6a3", "Slc18a2", "Comt", "Maoa", "Maob"), "ENSEMBL", "SYMBOL")
DA_list_s <- c("Th", "Ddc", "Drd1", "Drd2", "Drd3", "Drd4","Drd5", "Slc6a3", "Slc18a2", "Comt", "Maoa", "Maob")
Firstly, we downloaded the table of raw counts from GEO and identified DEGs between the two groups. Since the original dataset included multiple brain structures, only the cerebellum data was extracted for further analysis. DEG analysis was performed as described in the Materials and Methods section.
Subsequently, the data were CPM-normalized for basic correlation analysis and WGCNA.
GSE144046_Cul3_KD_autism_mice_study_counts.txt <- read.delim("GSE144046_Cul3_KD_autism_mice_study_counts.txt.gz", row.names=1)
GSE144046_groups <- c(rep("WT", 6), rep("Model", 6))
GSE144046_cer <- GSE144046_Cul3_KD_autism_mice_study_counts.txt[, c(85:96)]
#DEG analysis with edgeR
y <- DGEList(counts=GSE144046_cer, group=GSE144046_groups)
design <- model.matrix(~0+GSE144046_groups)
keep <- filterByExpr(y)
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
y <- estimateDisp(y, design, robust=TRUE)
plotBCV(y)
con <- makeContrasts(GSE144046_groupsModel - GSE144046_groupsWT, levels=design)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit, contrast = con)
# Here we could demonstratem that expression levels of dopaminergic genes is not significantly cahanged by the ASD condition modeling
topTags(qlf, n = Inf)$table[rownames(topTags(qlf, n = Inf)$table) %in% DA_list, ]
## logFC logCPM F PValue FDR
## ENSMUSG00000000214 0.63625533 2.9261921 17.22789048 0.001003654 0.1467335
## ENSMUSG00000040147 0.21614354 3.6480697 3.57640423 0.079726086 0.3123122
## ENSMUSG00000020182 0.20701191 2.2092432 1.80354326 0.200898805 0.4584456
## ENSMUSG00000000326 0.05511340 6.0000469 1.13413997 0.305123831 0.5573569
## ENSMUSG00000039358 -0.20060757 -0.5600456 0.93072686 0.351256379 0.5965636
## ENSMUSG00000025037 0.03953072 5.7614802 0.49305577 0.494209622 0.7077515
## ENSMUSG00000025094 -0.52630457 -0.5354357 0.46574740 0.506232536 0.7156888
## ENSMUSG00000032259 0.23452351 -1.3586941 0.46535470 0.506483776 0.7159002
## ENSMUSG00000021478 -0.10004583 -1.0928348 0.18872732 0.670704137 0.8309347
## ENSMUSG00000025496 0.04711207 -0.8577919 0.03108805 0.862605070 0.9354004
plotMDS(y)
# CPM-normalization and filtration
GSE144046_cer <- cpm(GSE144046_cer)
is_expressed <- rowMeans(GSE144046_cer > 0.1) > 0.5
GSE144046_cer <- GSE144046_cer[is_expressed, ]
Correlation analysis was performed using Spearman’s rank correlation test for the WT and model groups separately.
M <- cor(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(1:6)]), method = "spearman")
# We filtered our results for correlation level
valid_genes <- intersect(DA_list, rownames(M))
M[abs(M) < 0.6] <- 0
M <- M[valid_genes, valid_genes]
rownames(M) <- mapIds(org.Mm.eg.db, rownames(M), "SYMBOL", "ENSEMBL")
colnames(M) <- mapIds(org.Mm.eg.db, colnames(M), "SYMBOL", "ENSEMBL")
res_p <- cor.mtest(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(1:6)]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.6] <- 1
rownames(res_p$p) <- mapIds(org.Mm.eg.db, rownames(res_p$p), "SYMBOL", "ENSEMBL")
colnames(res_p$p) <- mapIds(org.Mm.eg.db, colnames(res_p$p), "SYMBOL", "ENSEMBL")
corrplot::corrplot(M,
method = "color",
p.mat = res_p$p,
sig.level = 0.05,
insig = "blank",
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in WT mice",
mar = c(0, 0, 3, 0))
M <- cor(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(7:12)]), method = "spearman")
M[abs(M) < 0.6] <- 0
valid_genes <- intersect(DA_list, rownames(M))
M <- M[valid_genes, valid_genes]
rownames(M) <- mapIds(org.Mm.eg.db, rownames(M), "SYMBOL", "ENSEMBL")
colnames(M) <- mapIds(org.Mm.eg.db, colnames(M), "SYMBOL", "ENSEMBL")
res_p <- cor.mtest(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(7:12)]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.6] <- 1
rownames(res_p$p) <- mapIds(org.Mm.eg.db, rownames(res_p$p), "SYMBOL", "ENSEMBL")
colnames(res_p$p) <- mapIds(org.Mm.eg.db, colnames(res_p$p), "SYMBOL", "ENSEMBL")
corrplot::corrplot(M,
method = "color",
p.mat = res_p$p,
sig.level = 0.05,
insig = "blank",
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in model mice",
mar = c(0, 0, 3, 0))
WGCNA was performed to identify dopaminergic genes involved in co-expression modules.
GSE144046_se <- SummarizedExperiment(assays=list(counts=cpm(GSE144046_cer)),colData=data.frame(Group = GSE144046_groups, row.names = colnames(GSE144046_cer)))
GSE144046_exp_filt <- replace_na(GSE144046_se)
GSE144046_exp_filt <- remove_nonexp(GSE144046_exp_filt, method = "median", min_exp = 0.5)
GSE144046_exp_filt <- filter_by_variance(GSE144046_exp_filt, n = 5000)
GSE144046_exp_filt@NAMES[GSE144046_exp_filt@NAMES %in% DA_list]
## [1] "ENSMUSG00000000326" "ENSMUSG00000025037"
GSE144046_exp_filt <- ZKfiltering(GSE144046_exp_filt, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_filt, net_type = "signed hybrid", cor_method = "pearson")
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 3 0.0583 -0.182 0.507 460.0 388.00 1080
## 2 4 0.4720 -0.585 0.587 330.0 262.00 919
## 3 5 0.7350 -0.825 0.730 250.0 185.00 802
## 4 6 0.8360 -0.961 0.804 196.0 135.00 712
## 5 7 0.8930 -1.040 0.864 158.0 100.00 639
## 6 8 0.9130 -1.080 0.888 131.0 76.00 579
## 7 9 0.9330 -1.110 0.915 110.0 58.20 528
## 8 10 0.9430 -1.120 0.931 93.2 46.00 485
## 9 11 0.9510 -1.130 0.944 80.3 36.70 447
## 10 12 0.9550 -1.140 0.950 69.9 29.60 416
## 11 13 0.9580 -1.150 0.954 61.4 24.20 389
## 12 14 0.9630 -1.150 0.960 54.3 19.70 366
## 13 15 0.9590 -1.160 0.951 48.4 16.50 345
## 14 16 0.9650 -1.170 0.959 43.4 13.80 326
## 15 17 0.9670 -1.170 0.961 39.1 11.70 309
## 16 18 0.9620 -1.170 0.955 35.4 9.85 294
## 17 19 0.9600 -1.180 0.951 32.2 8.35 280
## 18 20 0.9600 -1.190 0.950 29.4 7.18 267
GSE144046_power <- GSE144046_sft$power
GSE144046_net <- exp2gcn(
GSE144046_exp_filt, net_type = "signed hybrid", SFTpower = GSE144046_power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
GSE144046_MEtrait <- module_trait_cor(exp = GSE144046_exp_filt, MEs = GSE144046_net$MEs)
GSE144046_hubs <- get_hubs_gcn(GSE144046_exp_filt, GSE144046_net)
GSE144046_hubs[GSE144046_hubs$Gene %in% DA_list,]
## [1] Gene Module kWithin
## <0 rows> (or 0-length row.names)
GSE144046_net$genes_and_modules[GSE144046_net$genes_and_modules$Genes %in% DA_list, ]
## Genes Modules
## 14 ENSMUSG00000000326 darkmagenta
## 1454 ENSMUSG00000025037 darkmagenta
plot_module_trait_cor(GSE144046_MEtrait)
We demonstrated that Comt and Maoa were involved in a co-expression module that was not associated with either genotype. Subsequently, we analyzed the two genotypes separately to more precisely identify genotype-specific features.
GSE144046_exp_ctrl <- replace_na(GSE144046_se[,GSE144046_se$Group == "WT"])
#We renamed genes and applied symbols for more understandable results.
new_symbols <- mapIds(org.Mm.eg.db, rownames(GSE144046_exp_ctrl), "SYMBOL", "ENSEMBL")
rownames(GSE144046_exp_ctrl) <- new_symbols
GSE144046_exp_ctrl <- GSE144046_exp_ctrl[!is.na(rownames(GSE144046_exp_ctrl)), ]
GSE144046_exp_ctrl <- GSE144046_exp_ctrl[!duplicated(rownames(GSE144046_exp_ctrl)), ]
# Here we applied low threshold for expression variability.
GSE144046_exp_ctrl <- filter_by_variance(GSE144046_exp_ctrl, n = 5000)
print(nrow(GSE144046_exp_ctrl))
## [1] 5000
GSE144046_exp_ctrl <- ZKfiltering(GSE144046_exp_ctrl, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_ctrl, net_type = "signed hybrid", cor_method = "pearson")
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 3 0.48700 0.5410 0.825 1100 1280 1640
## 2 4 0.27300 0.3280 0.846 945 1100 1510
## 3 5 0.13900 0.1980 0.867 832 969 1400
## 4 6 0.05070 0.1090 0.891 744 856 1310
## 5 7 0.00900 0.0415 0.913 672 761 1230
## 6 8 0.00108 -0.0138 0.914 613 683 1160
## 7 9 0.02540 -0.0642 0.916 563 618 1100
## 8 10 0.06310 -0.0996 0.916 521 561 1040
## 9 11 0.11700 -0.1330 0.936 484 511 995
## 10 12 0.18700 -0.1640 0.931 451 469 954
## 11 13 0.27600 -0.2000 0.895 422 430 920
## 12 14 0.36300 -0.2300 0.880 397 397 889
## 13 15 0.45100 -0.2590 0.857 374 366 861
## 14 16 0.51900 -0.2850 0.841 353 340 834
## 15 17 0.57600 -0.3090 0.814 334 316 809
## 16 18 0.63400 -0.3360 0.805 317 294 786
## 17 19 0.69100 -0.3580 0.810 302 274 764
## 18 20 0.73300 -0.3810 0.814 287 256 744
power <- GSE144046_sft$power
GSE144046_net_ctrl <- exp2gcn(
GSE144046_exp_ctrl, net_type = "signed hybrid", SFTpower = power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
hubs <- get_hubs_gcn(GSE144046_exp_ctrl, GSE144046_net_ctrl)
# For checking the module in which dopaminergic genes are involved. It is "antiquewhite4"
GSE144046_net_ctrl$genes_and_modules[GSE144046_net_ctrl$genes_and_modules$Genes %in% DA_list_s, ]
## Genes Modules
## 15 Comt antiquewhite4
## 1462 Maoa antiquewhite4
GSE144046_ctrl_edges_filtered_to <- get_edge_list(
GSE144046_net_ctrl, module = c("antiquewhite4"),
filter = TRUE, method = "pvalue",
nSamples = ncol(GSE144046_exp_ctrl)
)
# We also modified hubs object to add genes of interest in the list for the visualization
my_targets <- c("Comt", "Maoa")
manual_hubs <- rbind(hubs, data.frame(Gene = my_targets, Module = c("antiquewhite4"), kWithin = rep(900,2)))
custom_annotation_df <- data.frame(
Genes = unique(c(GSE144046_ctrl_edges_filtered_to$Gene1, GSE144046_ctrl_edges_filtered_to$Gene2)),
Status = "forestgreen" #this color will be for "WT" here and in the another dataset.
)
plot_gcn(
edgelist_gcn = GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.99, ],
net = GSE144046_net_ctrl,
color_by = custom_annotation_df,
hubs = hubs
)
#General network topology in WT mice
plot_gcn(
edgelist_gcn =GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , ],
net = GSE144046_net_ctrl,
color_by = custom_annotation_df,
hubs = manual_hubs[c(1:50, 456, 457),], top_n_hubs = 22)
#Topology for network components interacting with Comt and Maoa genes in the model
Then we repeated the analysis for model animals.
GSE144046_exp_Model <- replace_na(GSE144046_se[,GSE144046_se$Group == "Model"])
#We renamed genes and applied symbols for more understandable results.
new_symbols <- mapIds(org.Mm.eg.db, rownames(GSE144046_exp_Model), "SYMBOL", "ENSEMBL")
rownames(GSE144046_exp_Model) <- new_symbols
GSE144046_exp_Model <- GSE144046_exp_Model[!is.na(rownames(GSE144046_exp_Model)), ]
GSE144046_exp_Model <- GSE144046_exp_Model[!duplicated(rownames(GSE144046_exp_Model)), ]
# Here we applied low threshold for expression variability.
GSE144046_exp_Model <- filter_by_variance(GSE144046_exp_Model, n = 5000)
print(nrow(GSE144046_exp_Model))
## [1] 5000
GSE144046_exp_Model <- ZKfiltering(GSE144046_exp_Model, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_Model, net_type = "signed hybrid", cor_method = "pearson")
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 3 0.18800 0.3010 0.473 629.0 660.0 1130
## 2 4 0.00125 0.0223 0.540 479.0 490.0 944
## 3 5 0.06690 -0.1770 0.631 380.0 381.0 809
## 4 6 0.20000 -0.3240 0.728 311.0 303.0 705
## 5 7 0.31200 -0.4220 0.795 260.0 247.0 622
## 6 8 0.40900 -0.5090 0.832 221.0 206.0 555
## 7 9 0.47900 -0.5730 0.853 191.0 174.0 499
## 8 10 0.53300 -0.6290 0.868 167.0 148.0 452
## 9 11 0.56900 -0.6820 0.865 147.0 128.0 412
## 10 12 0.61200 -0.7160 0.884 131.0 112.0 378
## 11 13 0.64400 -0.7450 0.897 117.0 98.7 347
## 12 14 0.69100 -0.7650 0.923 106.0 87.7 321
## 13 15 0.72100 -0.7880 0.933 96.1 78.4 298
## 14 16 0.74600 -0.8060 0.941 87.6 70.6 277
## 15 17 0.77700 -0.8140 0.958 80.2 63.6 259
## 16 18 0.79500 -0.8290 0.964 73.8 57.7 242
## 17 19 0.82300 -0.8410 0.978 68.1 52.6 227
## 18 20 0.82000 -0.8750 0.973 63.0 48.1 216
power <- GSE144046_sft$power
GSE144046_net_Model <- exp2gcn(
GSE144046_exp_Model, net_type = "signed hybrid", SFTpower = power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
hubs <- get_hubs_gcn(GSE144046_exp_Model, GSE144046_net_Model)
# For checking the module in which dopaminergic genes are involved. It is "brown4"
GSE144046_net_Model$genes_and_modules[GSE144046_net_Model$genes_and_modules$Genes %in% DA_list_s, ]
## Genes Modules
## 13 Comt brown4
## 1514 Maoa brown4
GSE144046_Model_edges_filtered_to <- get_edge_list(
GSE144046_net_Model, module = c("brown4"),
filter = TRUE, method = "pvalue",
nSamples = ncol(GSE144046_exp_Model)
)
# We also modified hubs object to add genes of interest in the list for the visualization
my_targets <- c("Comt", "Maoa")
manual_hubs <- rbind(hubs, data.frame(Gene = my_targets, Module = c("brown4"), kWithin = rep(900,2)))
custom_annotation_df <- data.frame(
Genes = unique(c(GSE144046_Model_edges_filtered_to$Gene1, GSE144046_Model_edges_filtered_to$Gene2)),
Status = "purple" #this color will be for "Model" here and in the another dataset.
)
plot_gcn(
edgelist_gcn = GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.98, ],
net = GSE144046_net_Model,
color_by = custom_annotation_df,
hubs = hubs
)
#General network topology in WT mice
plot_gcn(
edgelist_gcn =GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , ],
net = GSE144046_net_Model,
color_by = custom_annotation_df,
hubs = manual_hubs[c(1:50, 439, 440),], top_n_hubs = 22)
#Topology for network components interacting with Comt and Maoa genes in the model
Finally, we extracted genes that interact with Comt or Maoa in the identified networks and performed a functional analysis.
Model_coex <- unique(c(GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , 1], (GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , 2])))
WT_coex <- unique(c(GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , 1], (GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , 2])))
cc <- clusterProfiler::compareCluster(list(WT = WT_coex, Model = Model_coex,
Common = intersect(Model_coex, WT_coex)),
fun=enrichGO, ont="BP", OrgDb="org.Mm.eg.db", keyType = "SYMBOL")
dotplot(cc)