Install GAMclust package:

devtools::install_github("alserglab/GAMclust")
library(GAMclust)
library(gatom)
library(mwcsr)
library(fgsea)
library(data.table)
library(futile.logger)

set.seed(42)

Preparing working environment

First, please load and initialize all objects required for GAM-clustering analysis:

  1. load metabolic network and its metabolites annotation. We provide two networks: KEGG and combined network that includes KEGG, Rhea and transport reactions:

(1.1.) load KEGG metabolic network network.kegg.rds and its metabolites annotation met.kegg.db.rds

# KEGG network:
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.kegg.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.kegg.db.rds"))

(1.2.) or load combined metabolic network network.combined.rds, its metabolites annotation met.combined.db.rds and species-specific list of genes that either come from proteome or are not linked to a specific enzyme gene2reaction.combined.mmu.eg.tsv for mouse and gene2reaction.combined.hsa.eg.tsv for human data;

# combined network (KEGG+Rhea+transport reactions):
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.combined.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.combined.db.rds"))
gene2reaction.extra <- data.table::fread("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/gene2reaction.combined.mmu.eg.tsv", colClasses="character")
  1. load species-specific network annotation: org.Hs.eg.gatom.anno for human data or org.Mm.eg.gatom.anno for mouse data;
network.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/org.Mm.eg.gatom.anno.rds"))
  1. load provided list of metabolites that should not be considered during the analysis as connections between reactions (e.g., CO2, HCO3-, etc);
met.to.filter <- data.table::fread(system.file("mets2mask.lst", package="GAMclust"))$ID
  1. initialize SMGWCS solver:

(4.1.) we recommend to use here either heuristic relax-and-cut solver rnc_solver from mwcsr package,

solver <- mwcsr::rnc_solver()

(4.2.) either proprietary CPLEX solver (free for academy);

cplex.dir <-  "/opt/ibm/ILOG/CPLEX_Studio1271"
solver <- mwcsr::virgo_solver(cplex_dir = cplex.dir)
  1. set working directory where the results will be saved to.
work.dir <- "results"
dir.create(work.dir, showWarnings = F, recursive = T)
  1. TEMPORARY: collecting logs while developing the tool.
stats.dir <- file.path(work.dir, "stats")
dir.create(stats.dir, showWarnings = F, recursive = T)

setup_logger <- function(log.file.path, logger.name = "stats.logger") {
  file.appender <- appender.file(log.file.path)
  console.appender <- appender.console()
  combined.appender <- function(line) {
    file.appender(line)
    console.appender(line)
  }
  flog.appender(combined.appender, name = logger.name)
  flog.threshold(TRACE, name = logger.name)
}

log.file <- file.path(stats.dir, "log.txt")
setup_logger(log.file.path = log.file, logger.name = "stats.logger")

Preparing objects for the analysis

Preparing data

GAMclust works with bulk, single cell and spatial RNA-seq data.

This vignette shows how to process bulk RNA-seq data on the example of ImmGen Open Source data reanalysis.

Let’s load the data.

For bulk RNA-seq cell data, take 12,000-15,000 genes for the GAM-clustering analysis.

expression_set_object <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GAMclust/243_es.top12k.rds"))

E <- Biobase::exprs(expression_set_object)

nrow(E) # ! make sure this value is in range from 10,000 to 15,000
# [1] 12000
E[1:3, 1:3]
#        MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# Actb              14.77450            14.97466            14.80920
# Cst3              12.73106            12.80513            12.62843
# Eef1a1            12.14864            12.38202            12.33756

Genes in your dataset may be named as Symbol, Entrez, Ensembl or RefSeq IDs. One of these names should be specified as value of gene.id.type parameter in prepareData().

E.prep <- prepareData(E = E,
                      gene.id.type = "Symbol",
                      use.PCA = FALSE,
                      network.annotation = network.annotation)

E.prep[1:3, 1:3]
#       MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# 11461           0.7947992           1.0375496           0.8368845
# 13010           0.3038730           0.3471841           0.2438681
# 13627           0.4187557           0.6674637           0.6200901

Preparing network

The prepareNetwork() function defines the structure of the final metabolic modules.

network.prep <- prepareNetwork(E = E.prep,
                               network = network,
                               topology = "metabolites",
                               met.to.filter = met.to.filter,
                               network.annotation = network.annotation,
                               gene2reaction.extra = gene2reaction.extra) # for combined network
# INFO [2026-01-24 15:09:37] Global metabolite network contains 6583 edges.
# INFO [2026-01-24 15:09:37] Largest connected component of this global network contains 1430 nodes and 5121 edges.

Preclustering

The preClustering() function defines initial patterns using k-medoids clustering on gene expression matrix. It is strongly recommended to do initial clustering with no less than 32 clusters (initial.number.of.clusters = 32).

You can visualize the initial heatmap as shown below.

cur.centers <- preClustering(E.prep = E.prep,
                             network.prep = network.prep,
                             initial.number.of.clusters = 32,
                             network.annotation = network.annotation)
# INFO [2026-01-24 15:09:37] 1160 metabolic genes from the analysed dataset mapped to this component.
cur.centers[1:3, 1:3]
#   MF.64pLYVEpIIn.Ao.1 MF.64pLYVEpIIn.Ao.2 MF.64pLYVEpIIn.Ao.3
# 1         -0.09284514         -0.12763246          0.02188235
# 2          0.03432400          0.02773624          0.13477218
# 3          0.78988889          0.80944803          0.87514519
pheatmap::pheatmap(
      GAMclust:::normalize.rows(cur.centers),
      cluster_rows=F, cluster_cols=F,
      show_rownames=F, show_colnames=T)

GAM-clustering analysis

Now you have everything prepared for the GAM-clustering analysis.

Initial patterns will be now refined in an iterative process. The output of gamClustering() function presents a set of specific subnetworks (also called metabolic modules) that reflect metabolic variability within a given transcriptional dataset.

Note, that it may take a long time to derive metabolic modules by gamClustering() function (tens of minutes).

There is a set of parameters which determine the size and number of your final modules. We recommend you to start with the default settings, however you can adjust them based on your own preferences:

  1. If you consider final modules to be too small or too big and it complicates interpretation for you, you can either increase or reduce by 10 units the max.module.size parameter.

  2. If among final modules you consider presence of any modules with too similar patterns, you can reduce by 0.1 units the cor.threshold parameter.

  3. If among final modules you consider presence of any uninformative modules, you can reduce by 10 times the p.adj.val.threshold parameter.

results <- gamClustering(E.prep = E.prep,
                         network.prep = network.prep,
                         cur.centers = cur.centers,
                         
                         start.base = 0.5,
                         base.dec = 0.05,
                         max.module.size = 50,

                         cor.threshold = 0.7,
                         p.adj.val.threshold = 1e-5,

                         batch.solver = seq_batch_solver(solver),
                         work.dir = work.dir,
                         
                         show.intermediate.clustering = F,
                         verbose = T,
                         collect.stats = T)
# INFO [2026-01-24 15:09:38] GAM-CLUSTERING starts here.
# INFO [2026-01-24 15:09:38] [*] Iteration 1
# INFO [2026-01-24 15:11:11] >> base was equal to: 0.5
# INFO [2026-01-24 15:11:11] >> number of modules was equal to: 32
# INFO [2026-01-24 15:11:11] >> sizes of modules (unique genes) were in range: 1-47
# INFO [2026-01-24 15:11:11] >> max diff: 1.95
# INFO [2026-01-24 15:11:11] [*] Iteration 2
# INFO [2026-01-24 15:12:45] >> base was equal to: 0.5
# INFO [2026-01-24 15:12:45] >> number of modules was equal to: 32
# INFO [2026-01-24 15:12:45] >> sizes of modules (unique genes) were in range: 1-48
# INFO [2026-01-24 15:12:45] >> max diff: 0
# INFO [2026-01-24 15:12:45] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:12:45] [*] Iteration 3
# INFO [2026-01-24 15:14:12] >> base was equal to: 0.5
# INFO [2026-01-24 15:14:12] >> number of modules was equal to: 31
# INFO [2026-01-24 15:14:12] >> sizes of modules (unique genes) were in range: 1-57
# INFO [2026-01-24 15:14:13] >> max diff: 0.45
# INFO [2026-01-24 15:14:13] [*] Iteration 4
# INFO [2026-01-24 15:15:43] >> base was equal to: 0.5
# INFO [2026-01-24 15:15:43] >> number of modules was equal to: 31
# INFO [2026-01-24 15:15:43] >> sizes of modules (unique genes) were in range: 1-54
# INFO [2026-01-24 15:15:43] >> max diff: 0
# INFO [2026-01-24 15:15:43] >> max cor exceeded 0.7: 0.75
# INFO [2026-01-24 15:15:43] [*] Iteration 5
# INFO [2026-01-24 15:17:07] >> base was equal to: 0.45
# INFO [2026-01-24 15:17:07] >> number of modules was equal to: 30
# INFO [2026-01-24 15:17:07] >> sizes of modules (unique genes) were in range: 1-60
# INFO [2026-01-24 15:17:07] >> max diff: 0.39
# INFO [2026-01-24 15:17:07] [*] Iteration 6
# INFO [2026-01-24 15:18:29] >> base was equal to: 0.45
# INFO [2026-01-24 15:18:29] >> number of modules was equal to: 30
# INFO [2026-01-24 15:18:29] >> sizes of modules (unique genes) were in range: 1-56
# INFO [2026-01-24 15:18:30] >> max diff: 0
# INFO [2026-01-24 15:18:30] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:18:30] [*] Iteration 7
# INFO [2026-01-24 15:19:46] >> base was equal to: 0.4
# INFO [2026-01-24 15:19:46] >> number of modules was equal to: 29
# INFO [2026-01-24 15:19:46] >> sizes of modules (unique genes) were in range: 1-79
# INFO [2026-01-24 15:19:46] >> max diff: 1.58
# INFO [2026-01-24 15:19:46] [*] Iteration 8
# INFO [2026-01-24 15:21:03] >> base was equal to: 0.4
# INFO [2026-01-24 15:21:03] >> number of modules was equal to: 29
# INFO [2026-01-24 15:21:03] >> sizes of modules (unique genes) were in range: 1-75
# INFO [2026-01-24 15:21:03] >> max diff: 0
# INFO [2026-01-24 15:21:03] >> max cor exceeded 0.7: 0.76
# INFO [2026-01-24 15:21:03] [*] Iteration 9
# INFO [2026-01-24 15:22:15] >> base was equal to: 0.35
# INFO [2026-01-24 15:22:15] >> number of modules was equal to: 28
# INFO [2026-01-24 15:22:15] >> sizes of modules (unique genes) were in range: 1-83
# INFO [2026-01-24 15:22:15] >> max diff: 1.15
# INFO [2026-01-24 15:22:15] [*] Iteration 10
# INFO [2026-01-24 15:23:27] >> base was equal to: 0.35
# INFO [2026-01-24 15:23:27] >> number of modules was equal to: 28
# INFO [2026-01-24 15:23:27] >> sizes of modules (unique genes) were in range: 1-77
# INFO [2026-01-24 15:23:27] >> max diff: 0
# INFO [2026-01-24 15:23:27] >> max cor exceeded 0.7: 0.75
# INFO [2026-01-24 15:23:27] [*] Iteration 11
# INFO [2026-01-24 15:24:36] >> base was equal to: 0.3
# INFO [2026-01-24 15:24:36] >> number of modules was equal to: 27
# INFO [2026-01-24 15:24:36] >> sizes of modules (unique genes) were in range: 1-58
# INFO [2026-01-24 15:24:37] >> max diff: 1.04
# INFO [2026-01-24 15:24:37] [*] Iteration 12
# INFO [2026-01-24 15:25:45] >> base was equal to: 0.3
# INFO [2026-01-24 15:25:45] >> number of modules was equal to: 27
# INFO [2026-01-24 15:25:45] >> sizes of modules (unique genes) were in range: 1-60
# INFO [2026-01-24 15:25:45] >> max diff: 0
# INFO [2026-01-24 15:25:45] >> max cor exceeded 0.7: 0.74
# INFO [2026-01-24 15:25:45] [*] Iteration 13
# INFO [2026-01-24 15:26:51] >> base was equal to: 0.25
# INFO [2026-01-24 15:26:51] >> number of modules was equal to: 26
# INFO [2026-01-24 15:26:51] >> sizes of modules (unique genes) were in range: 0-31
# INFO [2026-01-24 15:26:51] >> max diff: 0.88
# INFO [2026-01-24 15:26:51] [*] Iteration 14
# INFO [2026-01-24 15:27:58] >> base was equal to: 0.25
# INFO [2026-01-24 15:27:58] >> number of modules was equal to: 26
# INFO [2026-01-24 15:27:58] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:27:58] >> max diff: 0
# INFO [2026-01-24 15:28:03] >> geseca padjs were in range: 0-0.997
# INFO [2026-01-24 15:28:03] [*] Iteration 15
# INFO [2026-01-24 15:29:07] >> base was equal to: 0.25
# INFO [2026-01-24 15:29:07] >> number of modules was equal to: 25
# INFO [2026-01-24 15:29:07] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:29:07] >> max diff: 0.21
# INFO [2026-01-24 15:29:07] [*] Iteration 16
# INFO [2026-01-24 15:30:10] >> base was equal to: 0.25
# INFO [2026-01-24 15:30:10] >> number of modules was equal to: 25
# INFO [2026-01-24 15:30:10] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:30:10] >> max diff: 0
# INFO [2026-01-24 15:30:13] >> geseca padjs were in range: 0-0.995
# INFO [2026-01-24 15:30:13] [*] Iteration 17
# INFO [2026-01-24 15:31:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:31:15] >> number of modules was equal to: 24
# INFO [2026-01-24 15:31:15] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:31:15] >> max diff: 0.66
# INFO [2026-01-24 15:31:15] [*] Iteration 18
# INFO [2026-01-24 15:32:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:32:15] >> number of modules was equal to: 24
# INFO [2026-01-24 15:32:15] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:32:15] >> max diff: 0
# INFO [2026-01-24 15:32:19] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:32:19] [*] Iteration 19
# INFO [2026-01-24 15:33:16] >> base was equal to: 0.25
# INFO [2026-01-24 15:33:16] >> number of modules was equal to: 23
# INFO [2026-01-24 15:33:16] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:33:17] >> max diff: 0.78
# INFO [2026-01-24 15:33:17] [*] Iteration 20
# INFO [2026-01-24 15:34:14] >> base was equal to: 0.25
# INFO [2026-01-24 15:34:14] >> number of modules was equal to: 23
# INFO [2026-01-24 15:34:14] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:34:14] >> max diff: 0
# INFO [2026-01-24 15:34:17] >> geseca padjs were in range: 0-0.99101
# INFO [2026-01-24 15:34:17] [*] Iteration 21
# INFO [2026-01-24 15:35:13] >> base was equal to: 0.25
# INFO [2026-01-24 15:35:13] >> number of modules was equal to: 22
# INFO [2026-01-24 15:35:13] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:35:13] >> max diff: 0.6
# INFO [2026-01-24 15:35:13] [*] Iteration 22
# INFO [2026-01-24 15:36:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:36:08] >> number of modules was equal to: 22
# INFO [2026-01-24 15:36:08] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:36:08] >> max diff: 0
# INFO [2026-01-24 15:36:12] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:36:12] [*] Iteration 23
# INFO [2026-01-24 15:37:04] >> base was equal to: 0.25
# INFO [2026-01-24 15:37:04] >> number of modules was equal to: 21
# INFO [2026-01-24 15:37:04] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:37:04] >> max diff: 0.43
# INFO [2026-01-24 15:37:04] [*] Iteration 24
# INFO [2026-01-24 15:37:57] >> base was equal to: 0.25
# INFO [2026-01-24 15:37:57] >> number of modules was equal to: 21
# INFO [2026-01-24 15:37:57] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:37:57] >> max diff: 0
# INFO [2026-01-24 15:38:01] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:38:01] [*] Iteration 25
# INFO [2026-01-24 15:38:52] >> base was equal to: 0.25
# INFO [2026-01-24 15:38:52] >> number of modules was equal to: 20
# INFO [2026-01-24 15:38:52] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:38:52] >> max diff: 0.78
# INFO [2026-01-24 15:38:52] [*] Iteration 26
# INFO [2026-01-24 15:39:44] >> base was equal to: 0.25
# INFO [2026-01-24 15:39:44] >> number of modules was equal to: 20
# INFO [2026-01-24 15:39:44] >> sizes of modules (unique genes) were in range: 0-33
# INFO [2026-01-24 15:39:44] >> max diff: 0
# INFO [2026-01-24 15:39:47] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:39:47] [*] Iteration 27
# INFO [2026-01-24 15:40:36] >> base was equal to: 0.25
# INFO [2026-01-24 15:40:36] >> number of modules was equal to: 19
# INFO [2026-01-24 15:40:36] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:40:37] >> max diff: 0.15
# INFO [2026-01-24 15:40:37] [*] Iteration 28
# INFO [2026-01-24 15:41:26] >> base was equal to: 0.25
# INFO [2026-01-24 15:41:26] >> number of modules was equal to: 19
# INFO [2026-01-24 15:41:26] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:41:26] >> max diff: 0
# INFO [2026-01-24 15:41:29] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:41:29] [*] Iteration 29
# INFO [2026-01-24 15:42:15] >> base was equal to: 0.25
# INFO [2026-01-24 15:42:15] >> number of modules was equal to: 18
# INFO [2026-01-24 15:42:15] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:42:15] >> max diff: 0.6
# INFO [2026-01-24 15:42:15] [*] Iteration 30
# INFO [2026-01-24 15:43:02] >> base was equal to: 0.25
# INFO [2026-01-24 15:43:02] >> number of modules was equal to: 18
# INFO [2026-01-24 15:43:02] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:43:02] >> max diff: 0
# INFO [2026-01-24 15:43:05] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:43:05] [*] Iteration 31
# INFO [2026-01-24 15:43:49] >> base was equal to: 0.25
# INFO [2026-01-24 15:43:49] >> number of modules was equal to: 17
# INFO [2026-01-24 15:43:49] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:43:49] >> max diff: 0.68
# INFO [2026-01-24 15:43:49] [*] Iteration 32
# INFO [2026-01-24 15:44:33] >> base was equal to: 0.25
# INFO [2026-01-24 15:44:33] >> number of modules was equal to: 17
# INFO [2026-01-24 15:44:33] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:44:33] >> max diff: 0
# INFO [2026-01-24 15:44:36] >> geseca padjs were in range: 0-0.99201
# INFO [2026-01-24 15:44:36] [*] Iteration 33
# INFO [2026-01-24 15:45:19] >> base was equal to: 0.25
# INFO [2026-01-24 15:45:19] >> number of modules was equal to: 16
# INFO [2026-01-24 15:45:19] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:45:19] >> max diff: 0.5
# INFO [2026-01-24 15:45:19] [*] Iteration 34
# INFO [2026-01-24 15:46:00] >> base was equal to: 0.25
# INFO [2026-01-24 15:46:00] >> number of modules was equal to: 16
# INFO [2026-01-24 15:46:00] >> sizes of modules (unique genes) were in range: 0-32
# INFO [2026-01-24 15:46:00] >> max diff: 0
# INFO [2026-01-24 15:46:04] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:46:04] [*] Iteration 35
# INFO [2026-01-24 15:46:42] >> base was equal to: 0.25
# INFO [2026-01-24 15:46:42] >> number of modules was equal to: 15
# INFO [2026-01-24 15:46:42] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:46:42] >> max diff: 0
# INFO [2026-01-24 15:46:45] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:46:45] [*] Iteration 36
# INFO [2026-01-24 15:47:21] >> base was equal to: 0.25
# INFO [2026-01-24 15:47:21] >> number of modules was equal to: 14
# INFO [2026-01-24 15:47:21] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:47:21] >> max diff: 0.67
# INFO [2026-01-24 15:47:21] [*] Iteration 37
# INFO [2026-01-24 15:47:57] >> base was equal to: 0.25
# INFO [2026-01-24 15:47:57] >> number of modules was equal to: 14
# INFO [2026-01-24 15:47:57] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:47:57] >> max diff: 0
# INFO [2026-01-24 15:48:00] >> geseca padjs were in range: 0-0.99401
# INFO [2026-01-24 15:48:00] [*] Iteration 38
# INFO [2026-01-24 15:48:33] >> base was equal to: 0.25
# INFO [2026-01-24 15:48:33] >> number of modules was equal to: 13
# INFO [2026-01-24 15:48:33] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:48:33] >> max diff: 0
# INFO [2026-01-24 15:48:37] >> geseca padjs were in range: 0-0.996
# INFO [2026-01-24 15:48:37] [*] Iteration 39
# INFO [2026-01-24 15:49:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:49:08] >> number of modules was equal to: 12
# INFO [2026-01-24 15:49:08] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:49:08] >> max diff: 0.75
# INFO [2026-01-24 15:49:08] [*] Iteration 40
# INFO [2026-01-24 15:49:40] >> base was equal to: 0.25
# INFO [2026-01-24 15:49:40] >> number of modules was equal to: 12
# INFO [2026-01-24 15:49:40] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:49:40] >> max diff: 0
# INFO [2026-01-24 15:49:43] >> geseca padjs were in range: 0-0.99301
# INFO [2026-01-24 15:49:43] [*] Iteration 41
# INFO [2026-01-24 15:50:11] >> base was equal to: 0.25
# INFO [2026-01-24 15:50:11] >> number of modules was equal to: 11
# INFO [2026-01-24 15:50:11] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:50:11] >> max diff: 0
# INFO [2026-01-24 15:50:15] >> geseca padjs were in range: 0-0.31469
# INFO [2026-01-24 15:50:15] [*] Iteration 42
# INFO [2026-01-24 15:50:41] >> base was equal to: 0.25
# INFO [2026-01-24 15:50:41] >> number of modules was equal to: 10
# INFO [2026-01-24 15:50:41] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:50:41] >> max diff: 0.47
# INFO [2026-01-24 15:50:41] [*] Iteration 43
# INFO [2026-01-24 15:51:08] >> base was equal to: 0.25
# INFO [2026-01-24 15:51:08] >> number of modules was equal to: 10
# INFO [2026-01-24 15:51:08] >> sizes of modules (unique genes) were in range: 1-32
# INFO [2026-01-24 15:51:08] >> max diff: 0
# INFO [2026-01-24 15:51:11] >> geseca padjs were in range: 0-0.34466
# INFO [2026-01-24 15:51:11] [*] Iteration 44
# INFO [2026-01-24 15:51:36] >> base was equal to: 0.25
# INFO [2026-01-24 15:51:36] >> number of modules was equal to: 9
# INFO [2026-01-24 15:51:36] >> sizes of modules (unique genes) were in range: 2-32
# INFO [2026-01-24 15:51:36] >> max diff: 0.5
# INFO [2026-01-24 15:51:36] [*] Iteration 45
# INFO [2026-01-24 15:52:00] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:00] >> number of modules was equal to: 9
# INFO [2026-01-24 15:52:00] >> sizes of modules (unique genes) were in range: 2-32
# INFO [2026-01-24 15:52:00] >> max diff: 0
# INFO [2026-01-24 15:52:03] >> geseca padjs were in range: 0-0.01547
# INFO [2026-01-24 15:52:03] [*] Iteration 46
# INFO [2026-01-24 15:52:24] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:24] >> number of modules was equal to: 8
# INFO [2026-01-24 15:52:24] >> sizes of modules (unique genes) were in range: 4-32
# INFO [2026-01-24 15:52:24] >> max diff: 1.28
# INFO [2026-01-24 15:52:24] [*] Iteration 47
# INFO [2026-01-24 15:52:46] >> base was equal to: 0.25
# INFO [2026-01-24 15:52:46] >> number of modules was equal to: 8
# INFO [2026-01-24 15:52:46] >> sizes of modules (unique genes) were in range: 4-32
# INFO [2026-01-24 15:52:46] >> max diff: 0
# INFO [2026-01-24 15:52:49] >> geseca padjs were in range: 0-0
# INFO [2026-01-24 15:52:56] >> geseca padjs were in range: 0-0
# INFO [2026-01-24 15:52:56] GAM-CLUSTERING ends here.

Visualizing and exploring the GAM-clustering results

Each metabolic module is a connected piece of metabolic network whose genes expression is correlated across all dataset.

The following functions will help you to visualize and explore the obtained results.

Get graphs of modules:
getGraphs(modules = results$modules,
          network.annotation = network.annotation,
          metabolites.annotation = metabolites.annotation,
          seed.for.layout = 42,
          work.dir = work.dir)
# Graphs for module 1 are built
# Graphs for module 2 are built
# Graphs for module 3 are built
# Graphs for module 4 are built
# Graphs for module 5 are built
# Graphs for module 6 are built
# Graphs for module 7 are built
# Graphs for module 8 are built

Example of the graph of the first module:

m.1 n1--n28 Plod1 n1--n5 Gpt2 n1--n38 Adhfe1 n1--n24 Gpt2 n2--n35 Gbgt1 n2--n10 Hexa n3--n4 Pla2g15 n3--n29 Pld3 n3--n8 Pla2g4a n4--n37 Abhd12 n4--n33 Abhd12 n4--n31 Lpl n5--n5 Slc3a2 n5--n34 Oplah n5--n25 Glul n5--n30 Ggt5 n7--n29 Pld3 n6--n7 Pld3 n6--n19 Gdpd1 n8--n36 Ptgs1 n8--n26 Alox5 n9--n9 Slc2a8 n9--n17 Gaa n9--n21 Glb1 n25--n25 Slc3a2 n10--n13 Hexa n13--n14 Glb1 n12--n13 Neu1 n12--n23 Npl n12--n24 Npl n11--n23 Renbp n15--n15 Slc7a8 n15--n22 Pcyox1 n15--n39 Txnrd2 n16--n32 Plcd1 n16--n33 Daglb n17--n17 Slc45a4 n18--n37 Acp2 n14--n20 Glb1 n20--n21 Glb1 n24--n24 Slc16a7 n26--n30 Ltc4s n27--n30 Ltc4s n27--n39 Txnrd2 n1 2-Oxoglutarate n28 Succinate n2 Globoside n35 N-Acetyl-D-galactosaminyl-N-Acetyl-D-galactosaminyl-1,3-D-galactosyl-1,4-D-galactosyl-1,4-D-glucosylceramide n3 Phosphatidylcholine n4 Fatty acid n5 L-Glutamate n34 5-Oxoproline n29 Phosphatidate n7 Phosphatidylethanolamine n6 Ethanolamine n8 Arachidonate n36 Prostaglandin G2 n9 D-Glucose n25 L-Glutamine n10 N-Acetyl-D-galactosamine n13 N-Acetyl-D-galactosaminyl-(N-acetylneuraminyl)-D-galactosyl-D-glucosylceramide n12 N-Acetylneuraminate n11 N-Acetyl-D-glucosamine n23 N-Acetyl-D-mannosamine n15 L-Cysteine n16 1,2-Diacyl-sn-glycerol n32 1-Phosphatidyl-D-myo-inositol 4,5-bisphosphate n37 1-Acylglycerol n33 2-Acylglycerol n17 Sucrose n18 1-Acyl-sn-glycerol 3-phosphate n38 (R)-2-Hydroxyglutarate n19 1-(1-Alkenyl)-sn-glycero-3-phosphoethanolamine n14 D-Galactosyl-N-acetyl-D-galactosaminyl-(N-acetylneuraminyl)-D-galactosyl-D-glucosylceramide n21 Lactose n20 D-Galactose n22 Prenyl-L-cysteine n24 Pyruvate n26 Leukotriene A4 n30 Leukotriene C4 n27 Glutathione n31 Triacylglycerol n39 S-Glutathionyl-L-cysteine

Get gene tables:

The table contains gene list. Each gene has two descriptive values: i) gene’s correlation value with the modules pattern and ii) gene’s score. High score means that this gene’s expression is similar to the module’s pattern and not similar to other modules’ patterns.

m.gene.list <- getGeneTables(modules = results$modules,
                             nets = results$nets,
                             patterns = results$patterns.pos,
                             gene.exprs = E.prep,
                             network.annotation = network.annotation,
                             work.dir = work.dir)
# Gene tables for module 1 are produced
# Gene tables for module 2 are produced
# Gene tables for module 3 are produced
# Gene tables for module 4 are produced
# Gene tables for module 5 are produced
# Gene tables for module 6 are produced
# Gene tables for module 7 are produced
# Gene tables for module 8 are produced

Example of the gene table of the first module:

head(GAMclust:::read.tsv(file.path(work.dir, "m.1.genes.tsv"))) |>
  kableExtra::kable() |>
  kableExtra::kable_styling()
symbol Entrez score cor
Txnrd2 26462 0.0529184 0.6956772
Glb1 12091 0.3391486 0.6014851
Hexa 15211 0.7861294 0.5753885
Slc2a8 56017 0.9524235 0.4546196
Pla2g15 192654 1.5135420 0.4118784
Slc3a2 17254 0.1831894 0.3795276
Get plots of patterns and individual genes expression:

Heatmap for patterns of all modules:

patterns <- results$patterns.pos

pheatmap::pheatmap(
  GAMclust:::normalize.rows(patterns),
  cluster_rows=F, cluster_cols=F,
  show_rownames=T, show_colnames=T)

Get plots of individual genes expression

for (i in seq_along(m.gene.list)) {

  heatmap <- E[m.gene.list[[i]], , drop=F]

  pheatmap::pheatmap(
    GAMclust:::normalize.rows(heatmap),
    cluster_rows=F, cluster_cols=F,
    file=sprintf("%s/m.%s.genes.png", work.dir, i),
    width=10, height=5,
    show_rownames=T, show_colnames=T)
}

Example of the heatmap of the first module:

pheatmap::pheatmap(
  GAMclust:::normalize.rows(E[m.gene.list[[1]], , drop=F]),
  show_rownames=T, show_colnames=T)

Get annotation tables and annotation heatmap for all modules:

Functional annotation of obtained modules is done based on KEGG and Reactome canonical metabolic pathways.

getAnnotationTables(network.annotation = network.annotation,
                    nets = results$nets,
                    work.dir = work.dir)
# Pathway annotation for module 1 is produced
# Pathway annotation for module 2 is produced
# Pathway annotation for module 3 is produced
# Pathway annotation for module 4 is produced
# Pathway annotation for module 5 is produced
# Pathway annotation for module 6 is produced
# Pathway annotation for module 7 is produced
# Pathway annotation for module 8 is produced

Example of the annotation table of the first module:

head(GAMclust:::read.tsv(file.path(work.dir, "m.1.pathways.tsv"))) |>
  kableExtra::kable() |>
  kableExtra::kable_styling(font_size = 8) |>
  kableExtra::column_spec(1, width = "1.6in") |>
  kableExtra::column_spec(2:6, width = "0.5in") |>
  kableExtra::column_spec(7, width = "1.2in")
pathway pval padj foldEnrichment overlap size overlapGenes
mmu00590: Arachidonic acid metabolism 0.0024637 0.2931799 9.857143 3 9 Alox5 Ltc4s Ggt5
mmu_M00079: Keratan sulfate degradation 0.0010909 0.2596229 29.571429 2 2 Glb1 Hexa

Annotation heatmap for all modules:

getAnnotationHeatmap(work.dir = work.dir)
# Processing module 1
# Module size: 32
# Processing module 2
# Module size: 12
# Processing module 3
# Module size: 10
# Processing module 4
# Module size: 9
# Processing module 5
# Module size: 5
# Processing module 6
# Module size: 5
# Processing module 7
# Module size: 5
# Processing module 8
# Module size: 4

Compare modules obtained in different runs:

You may also compare two results of running GAM-clustering on the same dataset (e.g. runs with different parameters) or compare two results of running GAM-clustering on different datasets (then set same.data=FALSE).

modulesSimilarity(dir1 = work.dir,
                  dir2 = "old_dir",
                  name1 = "new",
                  name2 = "old",
                  same.data = TRUE,
                  use.genes.with.pos.score = TRUE,
                  work.dir = work.dir,
                  file.name = "comparison.png")