library(corrplot)
library(edgeR)
library(BioNERO)
library(SummarizedExperiment)
library(org.Mm.eg.db)
library(clusterProfiler)

# Lists for genes of interest with Ensembl codes and gene names.
DA_list <- mapIds(org.Mm.eg.db, c("Th", "Ddc", "Drd1", "Drd2", "Drd3", "Drd4","Drd5", "Slc6a3", "Slc18a2", "Comt", "Maoa", "Maob"), "ENSEMBL", "SYMBOL")
DA_list_s <- c("Th", "Ddc", "Drd1", "Drd2", "Drd3", "Drd4","Drd5", "Slc6a3", "Slc18a2", "Comt", "Maoa", "Maob")

Firstly, we downloaded the table of raw counts from GEO and identified DEGs between the two groups. Since the original dataset included multiple brain structures, only the cerebellum data was extracted for further analysis. DEG analysis was performed as described in the Materials and Methods section.

Subsequently, the data were CPM-normalized for basic correlation analysis and WGCNA.

GSE144046_Cul3_KD_autism_mice_study_counts.txt <- read.delim("GSE144046_Cul3_KD_autism_mice_study_counts.txt.gz", row.names=1)
GSE144046_groups <- c(rep("WT", 6), rep("Model", 6))

GSE144046_cer <- GSE144046_Cul3_KD_autism_mice_study_counts.txt[, c(85:96)]
#DEG analysis with edgeR
y <- DGEList(counts=GSE144046_cer, group=GSE144046_groups)
design <- model.matrix(~0+GSE144046_groups)
keep <- filterByExpr(y)
y <- y[keep, , keep.lib.sizes=FALSE]
y <- calcNormFactors(y)
y <- estimateDisp(y, design, robust=TRUE)
plotBCV(y)

con <- makeContrasts(GSE144046_groupsModel - GSE144046_groupsWT, levels=design)
fit <- glmQLFit(y, design)
qlf <- glmQLFTest(fit, contrast = con)

# Here we could demonstratem that expression levels of dopaminergic genes is not significantly cahanged by the ASD condition modeling
topTags(qlf, n = Inf)$table[rownames(topTags(qlf, n = Inf)$table) %in% DA_list, ]
##                          logFC     logCPM           F      PValue       FDR
## ENSMUSG00000000214  0.63625533  2.9261921 17.22789048 0.001003654 0.1467335
## ENSMUSG00000040147  0.21614354  3.6480697  3.57640423 0.079726086 0.3123122
## ENSMUSG00000020182  0.20701191  2.2092432  1.80354326 0.200898805 0.4584456
## ENSMUSG00000000326  0.05511340  6.0000469  1.13413997 0.305123831 0.5573569
## ENSMUSG00000039358 -0.20060757 -0.5600456  0.93072686 0.351256379 0.5965636
## ENSMUSG00000025037  0.03953072  5.7614802  0.49305577 0.494209622 0.7077515
## ENSMUSG00000025094 -0.52630457 -0.5354357  0.46574740 0.506232536 0.7156888
## ENSMUSG00000032259  0.23452351 -1.3586941  0.46535470 0.506483776 0.7159002
## ENSMUSG00000021478 -0.10004583 -1.0928348  0.18872732 0.670704137 0.8309347
## ENSMUSG00000025496  0.04711207 -0.8577919  0.03108805 0.862605070 0.9354004
plotMDS(y)

# CPM-normalization and filtration
GSE144046_cer <- cpm(GSE144046_cer)
is_expressed <- rowMeans(GSE144046_cer > 0.1) > 0.5
GSE144046_cer <- GSE144046_cer[is_expressed, ]

Correlation analysis was performed using Spearman’s rank correlation test for the WT and model groups separately.

M <- cor(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(1:6)]), method = "spearman")
# We filtered our results for correlation level
valid_genes <- intersect(DA_list, rownames(M))
M[abs(M) < 0.6] <- 0
M <- M[valid_genes, valid_genes]
rownames(M) <- mapIds(org.Mm.eg.db, rownames(M), "SYMBOL", "ENSEMBL")
colnames(M) <- mapIds(org.Mm.eg.db, colnames(M), "SYMBOL", "ENSEMBL")

res_p <- cor.mtest(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(1:6)]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.6] <- 1
rownames(res_p$p) <- mapIds(org.Mm.eg.db, rownames(res_p$p), "SYMBOL", "ENSEMBL")
colnames(res_p$p) <- mapIds(org.Mm.eg.db, colnames(res_p$p), "SYMBOL", "ENSEMBL")

corrplot::corrplot(M,
method = "color",
p.mat = res_p$p,      
sig.level = 0.05,     
insig = "blank",      
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in WT mice",
                   mar = c(0, 0, 3, 0))

M <- cor(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(7:12)]), method = "spearman")

M[abs(M) < 0.6] <- 0
valid_genes <- intersect(DA_list, rownames(M))
M <- M[valid_genes, valid_genes]
rownames(M) <- mapIds(org.Mm.eg.db, rownames(M), "SYMBOL", "ENSEMBL")
colnames(M) <- mapIds(org.Mm.eg.db, colnames(M), "SYMBOL", "ENSEMBL")

res_p <- cor.mtest(t(GSE144046_cer[rownames(GSE144046_cer) %in% DA_list, c(7:12)]), conf.level = 0.95)
res_p$p <- res_p$p[valid_genes, valid_genes]
res_p$p[abs(M) < 0.6] <- 1
rownames(res_p$p) <- mapIds(org.Mm.eg.db, rownames(res_p$p), "SYMBOL", "ENSEMBL")
colnames(res_p$p) <- mapIds(org.Mm.eg.db, colnames(res_p$p), "SYMBOL", "ENSEMBL")

corrplot::corrplot(M,
method = "color",
p.mat = res_p$p,      
sig.level = 0.05,     
insig = "blank",      
addCoef.col = "black",
number.digits = 1,
number.cex = 0.7,
tl.col = "black",
cl.lim = c(-1, 1),
col = colorRampPalette(c("magenta4", "white", "green4"))(200),
title = "Coexpression pattern in model mice",
                   mar = c(0, 0, 3, 0))

WGCNA was performed to identify dopaminergic genes involved in co-expression modules.

GSE144046_se <- SummarizedExperiment(assays=list(counts=cpm(GSE144046_cer)),colData=data.frame(Group = GSE144046_groups, row.names = colnames(GSE144046_cer)))
GSE144046_exp_filt <- replace_na(GSE144046_se)
GSE144046_exp_filt <- remove_nonexp(GSE144046_exp_filt, method = "median", min_exp = 0.5)
GSE144046_exp_filt <- filter_by_variance(GSE144046_exp_filt, n = 5000)
GSE144046_exp_filt@NAMES[GSE144046_exp_filt@NAMES %in% DA_list]
## [1] "ENSMUSG00000000326" "ENSMUSG00000025037"
GSE144046_exp_filt <- ZKfiltering(GSE144046_exp_filt, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_filt, net_type = "signed hybrid", cor_method = "pearson")
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      3   0.0583 -0.182          0.507   460.0    388.00   1080
## 2      4   0.4720 -0.585          0.587   330.0    262.00    919
## 3      5   0.7350 -0.825          0.730   250.0    185.00    802
## 4      6   0.8360 -0.961          0.804   196.0    135.00    712
## 5      7   0.8930 -1.040          0.864   158.0    100.00    639
## 6      8   0.9130 -1.080          0.888   131.0     76.00    579
## 7      9   0.9330 -1.110          0.915   110.0     58.20    528
## 8     10   0.9430 -1.120          0.931    93.2     46.00    485
## 9     11   0.9510 -1.130          0.944    80.3     36.70    447
## 10    12   0.9550 -1.140          0.950    69.9     29.60    416
## 11    13   0.9580 -1.150          0.954    61.4     24.20    389
## 12    14   0.9630 -1.150          0.960    54.3     19.70    366
## 13    15   0.9590 -1.160          0.951    48.4     16.50    345
## 14    16   0.9650 -1.170          0.959    43.4     13.80    326
## 15    17   0.9670 -1.170          0.961    39.1     11.70    309
## 16    18   0.9620 -1.170          0.955    35.4      9.85    294
## 17    19   0.9600 -1.180          0.951    32.2      8.35    280
## 18    20   0.9600 -1.190          0.950    29.4      7.18    267
GSE144046_power <- GSE144046_sft$power
GSE144046_net <- exp2gcn(
GSE144046_exp_filt, net_type = "signed hybrid", SFTpower = GSE144046_power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
GSE144046_MEtrait <- module_trait_cor(exp = GSE144046_exp_filt, MEs = GSE144046_net$MEs)
GSE144046_hubs <- get_hubs_gcn(GSE144046_exp_filt, GSE144046_net)
GSE144046_hubs[GSE144046_hubs$Gene %in% DA_list,]
## [1] Gene    Module  kWithin
## <0 rows> (or 0-length row.names)
GSE144046_net$genes_and_modules[GSE144046_net$genes_and_modules$Genes %in% DA_list, ]
##                   Genes     Modules
## 14   ENSMUSG00000000326 darkmagenta
## 1454 ENSMUSG00000025037 darkmagenta
plot_module_trait_cor(GSE144046_MEtrait)

We demonstrated that Comt and Maoa were involved in a co-expression module that was not associated with either genotype. Subsequently, we analyzed the two genotypes separately to more precisely identify genotype-specific features.

GSE144046_exp_ctrl <- replace_na(GSE144046_se[,GSE144046_se$Group == "WT"])
#We renamed genes and applied symbols for more understandable results.
new_symbols <- mapIds(org.Mm.eg.db, rownames(GSE144046_exp_ctrl), "SYMBOL", "ENSEMBL")
rownames(GSE144046_exp_ctrl) <- new_symbols
GSE144046_exp_ctrl <- GSE144046_exp_ctrl[!is.na(rownames(GSE144046_exp_ctrl)), ]
GSE144046_exp_ctrl <- GSE144046_exp_ctrl[!duplicated(rownames(GSE144046_exp_ctrl)), ]

# Here we applied low threshold for expression variability.
GSE144046_exp_ctrl <- filter_by_variance(GSE144046_exp_ctrl, n = 5000)
print(nrow(GSE144046_exp_ctrl)) 
## [1] 5000
GSE144046_exp_ctrl <- ZKfiltering(GSE144046_exp_ctrl, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_ctrl, net_type = "signed hybrid", cor_method = "pearson")
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      3  0.48700  0.5410          0.825    1100      1280   1640
## 2      4  0.27300  0.3280          0.846     945      1100   1510
## 3      5  0.13900  0.1980          0.867     832       969   1400
## 4      6  0.05070  0.1090          0.891     744       856   1310
## 5      7  0.00900  0.0415          0.913     672       761   1230
## 6      8  0.00108 -0.0138          0.914     613       683   1160
## 7      9  0.02540 -0.0642          0.916     563       618   1100
## 8     10  0.06310 -0.0996          0.916     521       561   1040
## 9     11  0.11700 -0.1330          0.936     484       511    995
## 10    12  0.18700 -0.1640          0.931     451       469    954
## 11    13  0.27600 -0.2000          0.895     422       430    920
## 12    14  0.36300 -0.2300          0.880     397       397    889
## 13    15  0.45100 -0.2590          0.857     374       366    861
## 14    16  0.51900 -0.2850          0.841     353       340    834
## 15    17  0.57600 -0.3090          0.814     334       316    809
## 16    18  0.63400 -0.3360          0.805     317       294    786
## 17    19  0.69100 -0.3580          0.810     302       274    764
## 18    20  0.73300 -0.3810          0.814     287       256    744
power <- GSE144046_sft$power

GSE144046_net_ctrl <- exp2gcn(
GSE144046_exp_ctrl, net_type = "signed hybrid", SFTpower = power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
hubs <- get_hubs_gcn(GSE144046_exp_ctrl, GSE144046_net_ctrl)

# For checking the module in which dopaminergic genes are involved. It is "antiquewhite4"
GSE144046_net_ctrl$genes_and_modules[GSE144046_net_ctrl$genes_and_modules$Genes %in% DA_list_s, ]
##      Genes       Modules
## 15    Comt antiquewhite4
## 1462  Maoa antiquewhite4
GSE144046_ctrl_edges_filtered_to <- get_edge_list(
GSE144046_net_ctrl, module = c("antiquewhite4"),
filter = TRUE, method = "pvalue",
nSamples = ncol(GSE144046_exp_ctrl)
)

# We also modified hubs object to add genes of interest in the list for the visualization
my_targets <- c("Comt", "Maoa")
manual_hubs <- rbind(hubs, data.frame(Gene = my_targets, Module = c("antiquewhite4"), kWithin = rep(900,2)))

custom_annotation_df <- data.frame(
Genes = unique(c(GSE144046_ctrl_edges_filtered_to$Gene1, GSE144046_ctrl_edges_filtered_to$Gene2)),
Status = "forestgreen" #this color will be for "WT" here and in the another dataset.
)

plot_gcn(
edgelist_gcn = GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.99, ],
net = GSE144046_net_ctrl,
color_by = custom_annotation_df,
hubs = hubs
)

#General network topology in WT mice

plot_gcn(
edgelist_gcn =GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , ],
net = GSE144046_net_ctrl,
color_by = custom_annotation_df,
hubs = manual_hubs[c(1:50, 456, 457),], top_n_hubs = 22)

#Topology for network components interacting with Comt and Maoa genes in the model

Then we repeated the analysis for model animals.

GSE144046_exp_Model <- replace_na(GSE144046_se[,GSE144046_se$Group == "Model"])
#We renamed genes and applied symbols for more understandable results.
new_symbols <- mapIds(org.Mm.eg.db, rownames(GSE144046_exp_Model), "SYMBOL", "ENSEMBL")
rownames(GSE144046_exp_Model) <- new_symbols
GSE144046_exp_Model <- GSE144046_exp_Model[!is.na(rownames(GSE144046_exp_Model)), ]
GSE144046_exp_Model <- GSE144046_exp_Model[!duplicated(rownames(GSE144046_exp_Model)), ]

# Here we applied low threshold for expression variability.
GSE144046_exp_Model <- filter_by_variance(GSE144046_exp_Model, n = 5000)
print(nrow(GSE144046_exp_Model)) 
## [1] 5000
GSE144046_exp_Model <- ZKfiltering(GSE144046_exp_Model, cor_method = "pearson")
GSE144046_sft <- SFT_fit(GSE144046_exp_Model, net_type = "signed hybrid", cor_method = "pearson")
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      3  0.18800  0.3010          0.473   629.0     660.0   1130
## 2      4  0.00125  0.0223          0.540   479.0     490.0    944
## 3      5  0.06690 -0.1770          0.631   380.0     381.0    809
## 4      6  0.20000 -0.3240          0.728   311.0     303.0    705
## 5      7  0.31200 -0.4220          0.795   260.0     247.0    622
## 6      8  0.40900 -0.5090          0.832   221.0     206.0    555
## 7      9  0.47900 -0.5730          0.853   191.0     174.0    499
## 8     10  0.53300 -0.6290          0.868   167.0     148.0    452
## 9     11  0.56900 -0.6820          0.865   147.0     128.0    412
## 10    12  0.61200 -0.7160          0.884   131.0     112.0    378
## 11    13  0.64400 -0.7450          0.897   117.0      98.7    347
## 12    14  0.69100 -0.7650          0.923   106.0      87.7    321
## 13    15  0.72100 -0.7880          0.933    96.1      78.4    298
## 14    16  0.74600 -0.8060          0.941    87.6      70.6    277
## 15    17  0.77700 -0.8140          0.958    80.2      63.6    259
## 16    18  0.79500 -0.8290          0.964    73.8      57.7    242
## 17    19  0.82300 -0.8410          0.978    68.1      52.6    227
## 18    20  0.82000 -0.8750          0.973    63.0      48.1    216
power <- GSE144046_sft$power

GSE144046_net_Model <- exp2gcn(
GSE144046_exp_Model, net_type = "signed hybrid", SFTpower = power,
cor_method = "pearson"
)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
hubs <- get_hubs_gcn(GSE144046_exp_Model, GSE144046_net_Model)

# For checking the module in which dopaminergic genes are involved. It is "brown4"
GSE144046_net_Model$genes_and_modules[GSE144046_net_Model$genes_and_modules$Genes %in% DA_list_s, ]
##      Genes Modules
## 13    Comt  brown4
## 1514  Maoa  brown4
GSE144046_Model_edges_filtered_to <- get_edge_list(
GSE144046_net_Model, module = c("brown4"),
filter = TRUE, method = "pvalue",
nSamples = ncol(GSE144046_exp_Model)
)

# We also modified hubs object to add genes of interest in the list for the visualization
my_targets <- c("Comt", "Maoa")
manual_hubs <- rbind(hubs, data.frame(Gene = my_targets, Module = c("brown4"), kWithin = rep(900,2)))

custom_annotation_df <- data.frame(
Genes = unique(c(GSE144046_Model_edges_filtered_to$Gene1, GSE144046_Model_edges_filtered_to$Gene2)),
Status = "purple" #this color will be for "Model" here and in the another dataset.
)

plot_gcn(
edgelist_gcn = GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.98, ],
net = GSE144046_net_Model,
color_by = custom_annotation_df,
hubs = hubs
)

#General network topology in WT mice

plot_gcn(
edgelist_gcn =GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , ],
net = GSE144046_net_Model,
color_by = custom_annotation_df,
hubs = manual_hubs[c(1:50, 439, 440),], top_n_hubs = 22)

#Topology for network components interacting with Comt and Maoa genes in the model

Finally, we extracted genes that interact with Comt or Maoa in the identified networks and performed a functional analysis.

Model_coex <- unique(c(GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , 1], (GSE144046_Model_edges_filtered_to[GSE144046_Model_edges_filtered_to$Weight > 0.5 & GSE144046_Model_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_Model_edges_filtered_to$Gene2 %in% c(DA_list_s) , 2])))
WT_coex <- unique(c(GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , 1], (GSE144046_ctrl_edges_filtered_to[GSE144046_ctrl_edges_filtered_to$Weight > 0.5 & GSE144046_ctrl_edges_filtered_to$Gene1 %in% c(DA_list_s) | GSE144046_ctrl_edges_filtered_to$Gene2 %in% c(DA_list_s) , 2])))
cc <- clusterProfiler::compareCluster(list(WT = WT_coex, Model = Model_coex,
Common = intersect(Model_coex, WT_coex)),
fun=enrichGO, ont="BP", OrgDb="org.Mm.eg.db", keyType = "SYMBOL")
dotplot(cc)