Install GAMclust package:
devtools::install_github("alserglab/GAMclust")
library(GAMclust)
library(gatom)
library(mwcsr)
library(fgsea)
library(data.table)
library(futile.logger)
set.seed(42)
First, please load and initialize all objects required for GAM-clustering analysis:
(1.1.) load KEGG metabolic
network network.kegg.rds
and its metabolites annotation met.kegg.db.rds
# KEGG network:
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.kegg.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.kegg.db.rds"))
(1.2.) or load combined metabolic network network.combined.rds,
its metabolites annotation met.combined.db.rds
and species-specific list of genes that either come from proteome or are
not linked to a specific enzyme gene2reaction.combined.mmu.eg.tsv
for mouse and gene2reaction.combined.hsa.eg.tsv
for human data;
# combined network (KEGG+Rhea+transport reactions):
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.combined.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.combined.db.rds"))
gene2reaction.extra <- data.table::fread("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/gene2reaction.combined.mmu.eg.tsv", colClasses="character")
org.Hs.eg.gatom.anno
for human data or org.Mm.eg.gatom.anno
for mouse data;network.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/org.Mm.eg.gatom.anno.rds"))
met.to.filter <- data.table::fread(system.file("mets2mask.lst", package="GAMclust"))$ID
(4.1.) we recommend to use here either heuristic relax-and-cut solver
rnc_solver from mwcsr
package,
solver <- mwcsr::rnc_solver()
(4.2.) either proprietary CPLEX solver (free for academy);
cplex.dir <- "/opt/ibm/ILOG/CPLEX_Studio1271"
solver <- mwcsr::virgo_solver(cplex_dir = cplex.dir)
work.dir <- "results"
dir.create(work.dir, showWarnings = F, recursive = T)
stats.dir <- file.path(work.dir, "stats")
dir.create(stats.dir, showWarnings = F, recursive = T)
setup_logger <- function(log.file.path, logger.name = "stats.logger") {
file.appender <- appender.file(log.file.path)
console.appender <- appender.console()
combined.appender <- function(line) {
file.appender(line)
console.appender(line)
}
flog.appender(combined.appender, name = logger.name)
flog.threshold(TRACE, name = logger.name)
}
log.file <- file.path(stats.dir, "log.txt")
setup_logger(log.file.path = log.file, logger.name = "stats.logger")
GAMclust works with bulk, single cell and spatial RNA-seq data.
This vignette shows how to process bulk RNA-seq data on the example of ImmGen Open Source data reanalysis.
Let’s load the data.
For bulk RNA-seq cell data, take 12,000-15,000 genes for the GAM-clustering analysis.
expression_set_object <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GAMclust/243_es.top12k.rds"))
E <- Biobase::exprs(expression_set_object)
nrow(E) # ! make sure this value is in range from 10,000 to 15,000
# [1] 12000
E[1:3, 1:3]
# MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# Actb 14.77450 14.97466 14.80920
# Cst3 12.73106 12.80513 12.62843
# Eef1a1 12.14864 12.38202 12.33756
Genes in your dataset may be named as Symbol, Entrez, Ensembl or
RefSeq IDs. One of these names should be specified as value of
gene.id.type parameter in prepareData().
E.prep <- prepareData(E = E,
gene.id.type = "Symbol",
use.PCA = FALSE,
network.annotation = network.annotation)
E.prep[1:3, 1:3]
# MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# 11461 0.7947992 1.0375496 0.8368845
# 13010 0.3038730 0.3471841 0.2438681
# 13627 0.4187557 0.6674637 0.6200901
The prepareNetwork() function defines the structure of
the final metabolic modules.
network.prep <- prepareNetwork(E = E.prep,
network = network,
topology = "metabolites",
met.to.filter = met.to.filter,
network.annotation = network.annotation,
gene2reaction.extra = gene2reaction.extra) # for combined network
# INFO [2026-01-24 15:09:37] Global metabolite network contains 6583 edges.
# INFO [2026-01-24 15:09:37] Largest connected component of this global network contains 1430 nodes and 5121 edges.
The preClustering() function defines initial patterns
using k-medoids clustering on gene expression matrix. It is strongly
recommended to do initial clustering with no less than 32 clusters
(initial.number.of.clusters = 32).
You can visualize the initial heatmap as shown below.
cur.centers <- preClustering(E.prep = E.prep,
network.prep = network.prep,
initial.number.of.clusters = 32,
network.annotation = network.annotation)
# INFO [2026-01-24 15:09:37] 1160 metabolic genes from the analysed dataset mapped to this component.
cur.centers[1:3, 1:3]
# MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# 1 -0.09284514 -0.12763246 0.02188235
# 2 0.03432400 0.02773624 0.13477218
# 3 0.78988889 0.80944803 0.87514519
pheatmap::pheatmap(
GAMclust:::normalize.rows(cur.centers),
cluster_rows=F, cluster_cols=F,
show_rownames=F, show_colnames=T)
Now you have everything prepared for the GAM-clustering analysis.
Initial patterns will be now refined in an iterative process. The
output of gamClustering() function presents a set of
specific subnetworks (also called metabolic modules) that reflect
metabolic variability within a given transcriptional dataset.
Note, that it may take a long time to derive metabolic modules by
gamClustering() function (tens of minutes).
There is a set of parameters which determine the size and number of your final modules. We recommend you to start with the default settings, however you can adjust them based on your own preferences:
If you consider final modules to be too small or too big and it
complicates interpretation for you, you can either increase or reduce by
10 units the max.module.size parameter.
If among final modules you consider presence of any modules with
too similar patterns, you can reduce by 0.1 units the
cor.threshold parameter.
If among final modules you consider presence of any uninformative
modules, you can reduce by 10 times the p.adj.val.threshold
parameter.
results <- gamClustering(E.prep = E.prep,
network.prep = network.prep,
cur.centers = cur.centers,
start.base = 0.5,
base.dec = 0.05,
max.module.size = 50,
cor.threshold = 0.7,
p.adj.val.threshold = 1e-5,
batch.solver = seq_batch_solver(solver),
work.dir = work.dir,
show.intermediate.clustering = F,
verbose = T,
collect.stats = T)
# INFO [2026-01-24 15:09:38] GAM-CLUSTERING starts here.
# INFO [2026-01-24 15:09:38] [*] Iteration 1
# INFO [2026-01-24 15:11:11] >> base was equal to: 0.5
# INFO [2026-01-24 15:11:11] >> number of modules was equal to: 32
# INFO [2026-01-24 15:11:11] >> sizes of modules (unique genes) were in range: 1-47
# INFO [2026-01-24 15:11:11] >> max diff: 1.95
# INFO [2026-01-24 15:11:11] [*] Iteration 2
# INFO [2026-01-24 15:12:45] >> base was equal to: 0.5
# INFO [2026-01-24 15:12:45] >> number of modules was equal to: 32
# INFO [2026-01-24 15:12:45] >> sizes of modules (unique genes) were in range: 1-48
# INFO [2026-01-24 15:12:45] >> max diff: 0
# INFO [2026-01-24 15:12:45] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:12:45] [*] Iteration 3
# INFO [2026-01-24 15:14:12] >> base was equal to: 0.5
# INFO [2026-01-24 15:14:12] >> number of modules was equal to: 31
# INFO [2026-01-24 15:14:12] >> sizes of modules (unique genes) were in range: 1-57
# INFO [2026-01-24 15:14:13] >> max diff: 0.45
# INFO [2026-01-24 15:14:13] [*] Iteration 4
# INFO [2026-01-24 15:15:43] >> base was equal to: 0.5
# INFO [2026-01-24 15:15:43] >> number of modules was equal to: 31
# INFO [2026-01-24 15:15:43] >> sizes of modules (unique genes) were in range: 1-54
# INFO [2026-01-24 15:15:43] >> max diff: 0
# INFO [2026-01-24 15:15:43] >> max cor exceeded 0.7: 0.75
# INFO [2026-01-24 15:15:43] [*] Iteration 5
# INFO [2026-01-24 15:17:07] >> base was equal to: 0.45
# INFO [2026-01-24 15:17:07] >> number of modules was equal to: 30
# INFO [2026-01-24 15:17:07] >> sizes of modules (unique genes) were in range: 1-60
# INFO [2026-01-24 15:17:07] >> max diff: 0.39
# INFO [2026-01-24 15:17:07] [*] Iteration 6
# INFO [2026-01-24 15:18:29] >> base was equal to: 0.45
# INFO [2026-01-24 15:18:29] >> number of modules was equal to: 30
# INFO [2026-01-24 15:18:29] >> sizes of modules (unique genes) were in range: 1-56
# INFO [2026-01-24 15:18:30] >> max diff: 0
# INFO [2026-01-24 15:18:30] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:18:30] [*] Iteration 7
# INFO [2026-01-24 15:19:46] >> base was equal to: 0.4
# INFO [2026-01-24 15:19:46] >> number of modules was equal to: 29
# INFO [2026-01-24 15:19:46] >> sizes of modules (unique genes) were in range: 1-79
# INFO [2026-01-24 15:19:46] >> max diff: 1.58
# INFO [2026-01-24 15:19:46] [*] Iteration 8
# INFO [2026-01-24 15:21:03] >> base was equal to: 0.4
# INFO [2026-01-24 15:21:03] >> number of modules was equal to: 29
# INFO [2026-01-24 15:21:03] >> sizes of modules (unique genes) were in range: 1-75
# INFO [2026-01-24 15:21:03] >> max diff: 0
# INFO [2026-01-24 15:21:03] >> max cor exceeded 0.7: 0.76
# INFO [2026-01-24 15:21:03] [*] Iteration 9
# INFO [2026-01-24 15:22:15] >> base was equal to: 0.35
# INFO [2026-01-24 15:22:15] >> number of modules was equal to: 28
# INFO [2026-01-24 15:22:15] >> sizes of modules (unique genes) were in range: 1-83
# INFO [2026-01-24 15:22:15] >> max diff: 1.15
# INFO [2026-01-24 15:22:15] [*] Iteration 10
# INFO [2026-01-24 15:23:27] >> base was equal to: 0.35
# INFO [2026-01-24 15:23:27] >> number of modules was equal to: 28
# INFO [2026-01-24 15:23:27] >> sizes of modules (unique genes) were in range: 1-77
# INFO [2026-01-24 15:23:27] >> max diff: 0
# INFO [2026-01-24 15:23:27] >> max cor exceeded 0.7: 0.75
# INFO [2026-01-24 15:23:27] [*] Iteration 11
# INFO [2026-01-24 15:24:36] >> base was equal to: 0.3
# INFO [2026-01-24 15:24:36] >> number of modules was equal to: 27
# INFO [2026-01-24 15:24:36] >> sizes of modules (unique genes) were in range: 1-58
# INFO [2026-01-24 15:24:37] >> max diff: 1.04
# INFO [2026-01-24 15:24:37] [*] Iteration 12
# INFO [2026-01-24 15:25:45] >> base was equal to: 0.3
# INFO [2026-01-24 15:25:45] >> number of modules was equal to: 27
# INFO [2026-01-24 15:25:45] >> sizes of modules (unique genes) were in range: 1-60
# INFO [2026-01-24 15:25:45] >> max diff: 0
# INFO [2026-01-24 15:25:45] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:25:45] [*] Iteration 13
# INFO [2026-01-24 15:26:51] >> base was equal to: 0.25
# INFO [2026-01-24 15:26:51] >> number of modules was equal to: 26
# INFO [2026-01-24 15:26:51] >> sizes of modules (unique genes) were in range: 0-31
# INFO [2026-01-24 15:26:51] >> max diff: 0.88
# INFO [2026-01-24 15:26:51] [*] Iteration 14
# INFO [2026-01-24 15:27:58] >> base was equal to: 0.25
# INFO [2026-01-24 15:27:58] >> number of modules was equal to: 26
# INFO [2026-01-24 15:27:58] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:27:58] >> max diff: 0
# INFO [2026-01-24 15:28:03] >> geseca padjs were in range: 0-0.997
# INFO [2026-01-24 15:28:03] [*] Iteration 15
# INFO [2026-01-24 15:29:07] >> base was equal to: 0.25
# INFO [2026-01-24 15:29:07] >> number of modules was equal to: 25
# INFO [2026-01-24 15:29:07] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:29:07] >> max diff: 0.21
# INFO [2026-01-24 15:29:07] [*] Iteration 16
# INFO [2026-01-24 15:30:10] >> base was equal to: 0.25
# INFO [2026-01-24 15:30:10] >> number of modules was equal to: 25
# INFO [2026-01-24 15:30:10] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:30:10] >> max diff: 0
# INFO [2026-01-24 15:30:13] >> geseca padjs were in range: 0-0.995
# INFO [2026-01-24 15:30:13] [*] Iteration 17
# INFO [2026-01-24 15:31:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:31:15] >> number of modules was equal to: 24
# INFO [2026-01-24 15:31:15] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:31:15] >> max diff: 0.66
# INFO [2026-01-24 15:31:15] [*] Iteration 18
# INFO [2026-01-24 15:32:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:32:15] >> number of modules was equal to: 24
# INFO [2026-01-24 15:32:15] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:32:15] >> max diff: 0
# INFO [2026-01-24 15:32:19] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:32:19] [*] Iteration 19
# INFO [2026-01-24 15:33:16] >> base was equal to: 0.25
# INFO [2026-01-24 15:33:16] >> number of modules was equal to: 23
# INFO [2026-01-24 15:33:16] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:33:17] >> max diff: 0.78
# INFO [2026-01-24 15:33:17] [*] Iteration 20
# INFO [2026-01-24 15:34:14] >> base was equal to: 0.25
# INFO [2026-01-24 15:34:14] >> number of modules was equal to: 23
# INFO [2026-01-24 15:34:14] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:34:14] >> max diff: 0
# INFO [2026-01-24 15:34:17] >> geseca padjs were in range: 0-0.99101
# INFO [2026-01-24 15:34:17] [*] Iteration 21
# INFO [2026-01-24 15:35:13] >> base was equal to: 0.25
# INFO [2026-01-24 15:35:13] >> number of modules was equal to: 22
# INFO [2026-01-24 15:35:13] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:35:13] >> max diff: 0.6
# INFO [2026-01-24 15:35:13] [*] Iteration 22
# INFO [2026-01-24 15:36:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:36:08] >> number of modules was equal to: 22
# INFO [2026-01-24 15:36:08] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:36:08] >> max diff: 0
# INFO [2026-01-24 15:36:12] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:36:12] [*] Iteration 23
# INFO [2026-01-24 15:37:04] >> base was equal to: 0.25
# INFO [2026-01-24 15:37:04] >> number of modules was equal to: 21
# INFO [2026-01-24 15:37:04] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:37:04] >> max diff: 0.43
# INFO [2026-01-24 15:37:04] [*] Iteration 24
# INFO [2026-01-24 15:37:57] >> base was equal to: 0.25
# INFO [2026-01-24 15:37:57] >> number of modules was equal to: 21
# INFO [2026-01-24 15:37:57] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:37:57] >> max diff: 0
# INFO [2026-01-24 15:38:01] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:38:01] [*] Iteration 25
# INFO [2026-01-24 15:38:52] >> base was equal to: 0.25
# INFO [2026-01-24 15:38:52] >> number of modules was equal to: 20
# INFO [2026-01-24 15:38:52] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:38:52] >> max diff: 0.78
# INFO [2026-01-24 15:38:52] [*] Iteration 26
# INFO [2026-01-24 15:39:44] >> base was equal to: 0.25
# INFO [2026-01-24 15:39:44] >> number of modules was equal to: 20
# INFO [2026-01-24 15:39:44] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:39:44] >> max diff: 0
# INFO [2026-01-24 15:39:47] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:39:47] [*] Iteration 27
# INFO [2026-01-24 15:40:36] >> base was equal to: 0.25
# INFO [2026-01-24 15:40:36] >> number of modules was equal to: 19
# INFO [2026-01-24 15:40:36] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:40:37] >> max diff: 0.15
# INFO [2026-01-24 15:40:37] [*] Iteration 28
# INFO [2026-01-24 15:41:26] >> base was equal to: 0.25
# INFO [2026-01-24 15:41:26] >> number of modules was equal to: 19
# INFO [2026-01-24 15:41:26] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:41:26] >> max diff: 0
# INFO [2026-01-24 15:41:29] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:41:29] [*] Iteration 29
# INFO [2026-01-24 15:42:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:42:15] >> number of modules was equal to: 18
# INFO [2026-01-24 15:42:15] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:42:15] >> max diff: 0.6
# INFO [2026-01-24 15:42:15] [*] Iteration 30
# INFO [2026-01-24 15:43:02] >> base was equal to: 0.25
# INFO [2026-01-24 15:43:02] >> number of modules was equal to: 18
# INFO [2026-01-24 15:43:02] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:43:02] >> max diff: 0
# INFO [2026-01-24 15:43:05] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:43:05] [*] Iteration 31
# INFO [2026-01-24 15:43:49] >> base was equal to: 0.25
# INFO [2026-01-24 15:43:49] >> number of modules was equal to: 17
# INFO [2026-01-24 15:43:49] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:43:49] >> max diff: 0.68
# INFO [2026-01-24 15:43:49] [*] Iteration 32
# INFO [2026-01-24 15:44:33] >> base was equal to: 0.25
# INFO [2026-01-24 15:44:33] >> number of modules was equal to: 17
# INFO [2026-01-24 15:44:33] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:44:33] >> max diff: 0
# INFO [2026-01-24 15:44:36] >> geseca padjs were in range: 0-0.99201
# INFO [2026-01-24 15:44:36] [*] Iteration 33
# INFO [2026-01-24 15:45:19] >> base was equal to: 0.25
# INFO [2026-01-24 15:45:19] >> number of modules was equal to: 16
# INFO [2026-01-24 15:45:19] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:45:19] >> max diff: 0.5
# INFO [2026-01-24 15:45:19] [*] Iteration 34
# INFO [2026-01-24 15:46:00] >> base was equal to: 0.25
# INFO [2026-01-24 15:46:00] >> number of modules was equal to: 16
# INFO [2026-01-24 15:46:00] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:46:00] >> max diff: 0
# INFO [2026-01-24 15:46:04] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:46:04] [*] Iteration 35
# INFO [2026-01-24 15:46:42] >> base was equal to: 0.25
# INFO [2026-01-24 15:46:42] >> number of modules was equal to: 15
# INFO [2026-01-24 15:46:42] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:46:42] >> max diff: 0
# INFO [2026-01-24 15:46:45] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:46:45] [*] Iteration 36
# INFO [2026-01-24 15:47:21] >> base was equal to: 0.25
# INFO [2026-01-24 15:47:21] >> number of modules was equal to: 14
# INFO [2026-01-24 15:47:21] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:47:21] >> max diff: 0.67
# INFO [2026-01-24 15:47:21] [*] Iteration 37
# INFO [2026-01-24 15:47:57] >> base was equal to: 0.25
# INFO [2026-01-24 15:47:57] >> number of modules was equal to: 14
# INFO [2026-01-24 15:47:57] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:47:57] >> max diff: 0
# INFO [2026-01-24 15:48:00] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:48:00] [*] Iteration 38
# INFO [2026-01-24 15:48:33] >> base was equal to: 0.25
# INFO [2026-01-24 15:48:33] >> number of modules was equal to: 13
# INFO [2026-01-24 15:48:33] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:48:33] >> max diff: 0
# INFO [2026-01-24 15:48:37] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:48:37] [*] Iteration 39
# INFO [2026-01-24 15:49:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:49:08] >> number of modules was equal to: 12
# INFO [2026-01-24 15:49:08] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:49:08] >> max diff: 0.75
# INFO [2026-01-24 15:49:08] [*] Iteration 40
# INFO [2026-01-24 15:49:40] >> base was equal to: 0.25
# INFO [2026-01-24 15:49:40] >> number of modules was equal to: 12
# INFO [2026-01-24 15:49:40] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:49:40] >> max diff: 0
# INFO [2026-01-24 15:49:43] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:49:43] [*] Iteration 41
# INFO [2026-01-24 15:50:11] >> base was equal to: 0.25
# INFO [2026-01-24 15:50:11] >> number of modules was equal to: 11
# INFO [2026-01-24 15:50:11] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:50:11] >> max diff: 0
# INFO [2026-01-24 15:50:15] >> geseca padjs were in range: 0-0.31469
# INFO [2026-01-24 15:50:15] [*] Iteration 42
# INFO [2026-01-24 15:50:41] >> base was equal to: 0.25
# INFO [2026-01-24 15:50:41] >> number of modules was equal to: 10
# INFO [2026-01-24 15:50:41] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:50:41] >> max diff: 0.47
# INFO [2026-01-24 15:50:41] [*] Iteration 43
# INFO [2026-01-24 15:51:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:51:08] >> number of modules was equal to: 10
# INFO [2026-01-24 15:51:08] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:51:08] >> max diff: 0
# INFO [2026-01-24 15:51:11] >> geseca padjs were in range: 0-0.34466
# INFO [2026-01-24 15:51:11] [*] Iteration 44
# INFO [2026-01-24 15:51:36] >> base was equal to: 0.25
# INFO [2026-01-24 15:51:36] >> number of modules was equal to: 9
# INFO [2026-01-24 15:51:36] >> sizes of modules (unique genes) were in range: 2-32
# INFO [2026-01-24 15:51:36] >> max diff: 0.5
# INFO [2026-01-24 15:51:36] [*] Iteration 45
# INFO [2026-01-24 15:52:00] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:00] >> number of modules was equal to: 9
# INFO [2026-01-24 15:52:00] >> sizes of modules (unique genes) were in range: 2-32
# INFO [2026-01-24 15:52:00] >> max diff: 0
# INFO [2026-01-24 15:52:03] >> geseca padjs were in range: 0-0.01547
# INFO [2026-01-24 15:52:03] [*] Iteration 46
# INFO [2026-01-24 15:52:24] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:24] >> number of modules was equal to: 8
# INFO [2026-01-24 15:52:24] >> sizes of modules (unique genes) were in range: 4-32
# INFO [2026-01-24 15:52:24] >> max diff: 1.28
# INFO [2026-01-24 15:52:24] [*] Iteration 47
# INFO [2026-01-24 15:52:46] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:46] >> number of modules was equal to: 8
# INFO [2026-01-24 15:52:46] >> sizes of modules (unique genes) were in range: 4-32
# INFO [2026-01-24 15:52:46] >> max diff: 0
# INFO [2026-01-24 15:52:49] >> geseca padjs were in range: 0-0
# INFO [2026-01-24 15:52:56] >> geseca padjs were in range: 0-0
# INFO [2026-01-24 15:52:56] GAM-CLUSTERING ends here.
Each metabolic module is a connected piece of metabolic network whose genes expression is correlated across all dataset.
The following functions will help you to visualize and explore the obtained results.
getGraphs(modules = results$modules,
network.annotation = network.annotation,
metabolites.annotation = metabolites.annotation,
seed.for.layout = 42,
work.dir = work.dir)
# Graphs for module 1 are built
# Graphs for module 2 are built
# Graphs for module 3 are built
# Graphs for module 4 are built
# Graphs for module 5 are built
# Graphs for module 6 are built
# Graphs for module 7 are built
# Graphs for module 8 are built
Example of the graph of the first module:
The table contains gene list. Each gene has two descriptive values: i) gene’s correlation value with the modules pattern and ii) gene’s score. High score means that this gene’s expression is similar to the module’s pattern and not similar to other modules’ patterns.
m.gene.list <- getGeneTables(modules = results$modules,
nets = results$nets,
patterns = results$patterns.pos,
gene.exprs = E.prep,
network.annotation = network.annotation,
work.dir = work.dir)
# Gene tables for module 1 are produced
# Gene tables for module 2 are produced
# Gene tables for module 3 are produced
# Gene tables for module 4 are produced
# Gene tables for module 5 are produced
# Gene tables for module 6 are produced
# Gene tables for module 7 are produced
# Gene tables for module 8 are produced
Example of the gene table of the first module:
head(GAMclust:::read.tsv(file.path(work.dir, "m.1.genes.tsv"))) |>
kableExtra::kable() |>
kableExtra::kable_styling()
| symbol | Entrez | score | cor |
|---|---|---|---|
| Txnrd2 | 26462 | 0.0529184 | 0.6956772 |
| Glb1 | 12091 | 0.3391486 | 0.6014851 |
| Hexa | 15211 | 0.7861294 | 0.5753885 |
| Slc2a8 | 56017 | 0.9524235 | 0.4546196 |
| Pla2g15 | 192654 | 1.5135420 | 0.4118784 |
| Slc3a2 | 17254 | 0.1831894 | 0.3795276 |
Heatmap for patterns of all modules:
patterns <- results$patterns.pos
pheatmap::pheatmap(
GAMclust:::normalize.rows(patterns),
cluster_rows=F, cluster_cols=F,
show_rownames=T, show_colnames=T)
Get plots of individual genes expression
for (i in seq_along(m.gene.list)) {
heatmap <- E[m.gene.list[[i]], , drop=F]
pheatmap::pheatmap(
GAMclust:::normalize.rows(heatmap),
cluster_rows=F, cluster_cols=F,
file=sprintf("%s/m.%s.genes.png", work.dir, i),
width=10, height=5,
show_rownames=T, show_colnames=T)
}
Example of the heatmap of the first module:
pheatmap::pheatmap(
GAMclust:::normalize.rows(E[m.gene.list[[1]], , drop=F]),
show_rownames=T, show_colnames=T)
Functional annotation of obtained modules is done based on KEGG and Reactome canonical metabolic pathways.
getAnnotationTables(network.annotation = network.annotation,
nets = results$nets,
work.dir = work.dir)
# Pathway annotation for module 1 is produced
# Pathway annotation for module 2 is produced
# Pathway annotation for module 3 is produced
# Pathway annotation for module 4 is produced
# Pathway annotation for module 5 is produced
# Pathway annotation for module 6 is produced
# Pathway annotation for module 7 is produced
# Pathway annotation for module 8 is produced
Example of the annotation table of the first module:
head(GAMclust:::read.tsv(file.path(work.dir, "m.1.pathways.tsv"))) |>
kableExtra::kable() |>
kableExtra::kable_styling(font_size = 8) |>
kableExtra::column_spec(1, width = "1.6in") |>
kableExtra::column_spec(2:6, width = "0.5in") |>
kableExtra::column_spec(7, width = "1.2in")
| pathway | pval | padj | foldEnrichment | overlap | size | overlapGenes |
|---|---|---|---|---|---|---|
| mmu00590: Arachidonic acid metabolism | 0.0024637 | 0.2931799 | 9.857143 | 3 | 9 | Alox5 Ltc4s Ggt5 |
| mmu_M00079: Keratan sulfate degradation | 0.0010909 | 0.2596229 | 29.571429 | 2 | 2 | Glb1 Hexa |
Annotation heatmap for all modules:
getAnnotationHeatmap(work.dir = work.dir)
# Processing module 1
# Module size: 32
# Processing module 2
# Module size: 12
# Processing module 3
# Module size: 10
# Processing module 4
# Module size: 9
# Processing module 5
# Module size: 5
# Processing module 6
# Module size: 5
# Processing module 7
# Module size: 5
# Processing module 8
# Module size: 4
You may also compare two results of running GAM-clustering on the
same dataset (e.g. runs with different parameters) or compare two
results of running GAM-clustering on different datasets (then set
same.data=FALSE).
modulesSimilarity(dir1 = work.dir,
dir2 = "old_dir",
name1 = "new",
name2 = "old",
same.data = TRUE,
use.genes.with.pos.score = TRUE,
work.dir = work.dir,
file.name = "comparison.png")