Install GAMclust package:

devtools::install_github("alserglab/GAMclust")
library(GAMclust)
library(gatom)
library(mwcsr)
library(fgsea)
library(data.table)
library(Seurat)
library(futile.logger)

set.seed(42)

Preparing working environment

First, please load and initialize all objects required for GAM-clustering analysis:

  1. load metabolic network and its metabolites annotation. We provide two networks: KEGG and combined network that includes KEGG, Rhea and transport reactions:

(1.1.) load KEGG metabolic network network.kegg.rds and its metabolites annotation met.kegg.db.rds

# KEGG network:
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.kegg.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.kegg.db.rds"))

(1.2.) or load combined metabolic network network.combined.rds, its metabolites annotation met.combined.db.rds and species-specific list of genes that either come from proteome or are not linked to a specific enzyme gene2reaction.combined.mmu.eg.tsv for mouse and gene2reaction.combined.hsa.eg.tsv for human data;

# combined network (KEGG+Rhea+transport reactions):
network <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/network.combined.rds"))
metabolites.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/met.combined.db.rds"))
gene2reaction.extra <- data.table::fread("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/gene2reaction.combined.hsa.eg.tsv", colClasses="character")
  1. load species-specific network annotation: org.Hs.eg.gatom.anno for human data or org.Mm.eg.gatom.anno for mouse data;
network.annotation <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GATOM/org.Hs.eg.gatom.anno.rds"))
  1. load provided list of metabolites that should not be considered during the analysis as connections between reactions (e.g., CO2, HCO3-, etc);
met.to.filter <- data.table::fread(system.file("mets2mask.lst", package="GAMclust"))$ID
  1. initialize SMGWCS solver:

(4.1.) we recommend to use here either heuristic relax-and-cut solver rnc_solver from mwcsr package,

solver <- mwcsr::rnc_solver()

(4.2.) either proprietary CPLEX solver (free for academy);

cplex.dir <-  "/opt/ibm/ILOG/CPLEX_Studio1271"
solver <- mwcsr::virgo_solver(cplex_dir = cplex.dir)
  1. set working directory where the results will be saved to.
work.dir <- "results"
dir.create(work.dir, showWarnings = F, recursive = T)
  1. TEMPORARY: collecting logs while developing the tool.
stats.dir <- file.path(work.dir, "stats")
dir.create(stats.dir, showWarnings = F, recursive = T)

setup_logger <- function(log.file.path, logger.name = "stats.logger") {
  file.appender <- appender.file(log.file.path)
  console.appender <- appender.console()
  combined.appender <- function(line) {
    file.appender(line)
    console.appender(line)
  }
  flog.appender(combined.appender, name = logger.name)
  flog.threshold(TRACE, name = logger.name)
}

log.file <- file.path(stats.dir, "log.txt")
setup_logger(log.file.path = log.file, logger.name = "stats.logger")

Preparing objects for the analysis

Preparing data

GAMclust works with bulk, single cell and spatial RNA-seq data.

This vignette shows how to process spatial RNA-seq data.

Let’s load the data and take 10,000 genes for the GAM-clustering analysis.

seurat_object <- readRDS(url("http://artyomovlab.wustl.edu/publications/supp_materials/GAMclust/275_T_seurat.rds"))

seurat_object <- Seurat::SCTransform(seurat_object, 
                                     assay = "Spatial", 
                                     variable.features.n = 10000,
                                     verbose = FALSE)

E <- as.matrix(Seurat::GetAssayData(object = seurat_object,
                                    assay = "SCT",
                                    slot = "scale.data"))

nrow(E) # ! make sure this value is in range from 6,000 to 12,000
# [1] 10000
E[1:3, 1:3]
#       AAACAAGTATCTCCCA-1 AAACACCAATAACTGC-1 AAACAGAGCGACTCCT-1
# NOC2L           1.249130          0.8296517          3.7173470
# ISG15           2.642288         -0.7594430          0.5659099
# AGRN           -0.620876         -0.7318498          0.5300554

Genes in your dataset may be named as Symbol, Entrez, Ensembl or RefSeq IDs. One of these names should be specified as value of gene.id.type parameter in prepareData().

If you analyse singe cell or spatial RNA-seq data, please set use.PCA=TRUE in prepareData().

E.prep <- prepareData(E = E,
                      gene.id.type = "Symbol",
                      use.PCA = TRUE,
                      use.PCA.n = 50,
                      network.annotation = network.annotation)

E.prep[1:3, 1:3]
#                 PC1       PC2        PC3
# 4504       6.326338  2.513886  0.5230565
# 22865     -1.635009 -2.005070  0.9369077
# 100129792 -1.437753 -1.059253 -1.0455837

Preparing network

The prepareNetwork() function defines the structure of the final metabolic modules.

network.prep <- prepareNetwork(E = E.prep,
                               network = network,
                               topology = "metabolites",
                               met.to.filter = met.to.filter,
                               network.annotation = network.annotation,
                               gene2reaction.extra = gene2reaction.extra) # for combined network
# INFO [2026-01-24 15:14:06] Global metabolite network contains 5746 edges.
# INFO [2026-01-24 15:14:06] Largest connected component of this global network contains 1276 nodes and 4403 edges.

Preclustering

The preClustering() function defines initial patterns using k-medoids clustering on gene expression matrix. It is strongly recommended to do initial clustering with no less than 32 clusters (initial.number.of.clusters = 32).

You can visualize the initial heatmap as shown below.

cur.centers <- preClustering(E.prep = E.prep,
                             network.prep = network.prep,
                             initial.number.of.clusters = 32,
                             network.annotation = network.annotation)
# INFO [2026-01-24 15:14:06] 1002 metabolic genes from the analysed dataset mapped to this component.
cur.centers[1:3, 1:3]
#           PC1        PC2        PC3
# 1  0.08031134 -0.3373824  2.2391351
# 2  3.50078547 -0.7895749  3.3735482
# 3 -2.35233343 -1.4231047 -0.8383281
pheatmap::pheatmap(
      GAMclust:::normalize.rows(cur.centers),
      cluster_rows=F, cluster_cols=F,
      show_rownames=F, show_colnames=T)

GAM-clustering analysis

Now you have everything prepared for the GAM-clustering analysis.

Initial patterns will be now refined in an iterative process. The output of gamClustering() function presents a set of specific subnetworks (also called metabolic modules) that reflect metabolic variability within a given transcriptional dataset.

Note, that it may take a long time to derive metabolic modules by gamClustering() function (tens of minutes).

There is a set of parameters which determine the size and number of your final modules. We recommend you to start with the default settings, however you can adjust them based on your own preferences:

  1. If you consider final modules to bee too small or too big and it complicates interpretation for you, you can either increase or reduce by 10 units the max.module.size parameter.

  2. If among final modules you consider presence of any modules with too similar patterns, you can reduce by 0.1 units the cor.threshold parameter.

  3. If among final modules you consider presence of any uninformative modules, you can reduce by 10 times the p.adj.val.threshold parameter.

results <- gamClustering(E.prep = E.prep,
                         network.prep = network.prep,
                         cur.centers = cur.centers,
                         
                         start.base = 0.5,
                         base.dec = 0.05,
                         max.module.size = 50,

                         cor.threshold = 0.5,
                         p.adj.val.threshold = 1e-5,

                         batch.solver = seq_batch_solver(solver),
                         work.dir = work.dir,
                         
                         show.intermediate.clustering = FALSE,
                         verbose = FALSE,
                         collect.stats = TRUE)
# INFO [2026-01-24 15:14:07] GAM-CLUSTERING starts here.
# INFO [2026-01-24 15:14:07] [*] Iteration 1
# INFO [2026-01-24 15:15:19] [*] Iteration 2
# INFO [2026-01-24 15:16:34] >> max cor exceeded 0.5: 0.76
# INFO [2026-01-24 15:16:34] [*] Iteration 3
# INFO [2026-01-24 15:17:45] [*] Iteration 4
# INFO [2026-01-24 15:18:55] >> max cor exceeded 0.5: 0.73
# INFO [2026-01-24 15:18:55] [*] Iteration 5
# INFO [2026-01-24 15:20:00] [*] Iteration 6
# INFO [2026-01-24 15:21:05] >> max cor exceeded 0.5: 0.7
# INFO [2026-01-24 15:21:05] [*] Iteration 7
# INFO [2026-01-24 15:22:11] [*] Iteration 8
# INFO [2026-01-24 15:23:17] >> max cor exceeded 0.5: 0.66
# INFO [2026-01-24 15:23:17] [*] Iteration 9
# INFO [2026-01-24 15:24:23] [*] Iteration 10
# INFO [2026-01-24 15:25:28] >> max cor exceeded 0.5: 0.65
# INFO [2026-01-24 15:25:28] [*] Iteration 11
# INFO [2026-01-24 15:26:30] >> max cor exceeded 0.5: 0.64
# INFO [2026-01-24 15:26:30] [*] Iteration 12
# INFO [2026-01-24 15:27:28] [*] Iteration 13
# INFO [2026-01-24 15:28:28] >> max cor exceeded 0.5: 0.6
# INFO [2026-01-24 15:28:28] [*] Iteration 14
# INFO [2026-01-24 15:29:26] [*] Iteration 15
# INFO [2026-01-24 15:30:26] >> max cor exceeded 0.5: 0.59
# INFO [2026-01-24 15:30:26] [*] Iteration 16
# INFO [2026-01-24 15:31:22] [*] Iteration 17
# INFO [2026-01-24 15:32:18] >> max cor exceeded 0.5: 0.57
# INFO [2026-01-24 15:32:18] [*] Iteration 18
# INFO [2026-01-24 15:33:11] [*] Iteration 19
# INFO [2026-01-24 15:34:05] >> max cor exceeded 0.5: 0.54
# INFO [2026-01-24 15:34:05] [*] Iteration 20
# INFO [2026-01-24 15:34:56] [*] Iteration 21
# INFO [2026-01-24 15:35:48] >> max cor exceeded 0.5: 0.54
# INFO [2026-01-24 15:35:48] [*] Iteration 22
# INFO [2026-01-24 15:36:36] [*] Iteration 23
# INFO [2026-01-24 15:37:25] >> max cor exceeded 0.5: 0.53
# INFO [2026-01-24 15:37:25] [*] Iteration 24
# INFO [2026-01-24 15:38:12] [*] Iteration 25
# INFO [2026-01-24 15:38:58] >> max cor exceeded 0.5: 0.51
# INFO [2026-01-24 15:38:58] [*] Iteration 26
# INFO [2026-01-24 15:39:42] [*] Iteration 27
# INFO [2026-01-24 15:40:26] >> max cor exceeded 0.5: 0.5
# INFO [2026-01-24 15:40:26] [*] Iteration 28
# INFO [2026-01-24 15:41:06] [*] Iteration 29
# INFO [2026-01-24 15:41:47] >> max cor exceeded 0.5: 0.5
# INFO [2026-01-24 15:41:47] [*] Iteration 30
# INFO [2026-01-24 15:42:28] [*] Iteration 31
# INFO [2026-01-24 15:43:13] [*] Iteration 32
# INFO [2026-01-24 15:43:50] [*] Iteration 33
# INFO [2026-01-24 15:44:28] [*] Iteration 34
# INFO [2026-01-24 15:45:06] [*] Iteration 35
# INFO [2026-01-24 15:45:40] [*] Iteration 36
# INFO [2026-01-24 15:46:16] [*] Iteration 37
# INFO [2026-01-24 15:46:49] [*] Iteration 38
# INFO [2026-01-24 15:47:22] [*] Iteration 39
# INFO [2026-01-24 15:47:52] [*] Iteration 40
# INFO [2026-01-24 15:48:23] [*] Iteration 41
# INFO [2026-01-24 15:48:50] [*] Iteration 42
# INFO [2026-01-24 15:49:18] [*] Iteration 43
# INFO [2026-01-24 15:49:44] [*] Iteration 44
# INFO [2026-01-24 15:50:07] [*] Iteration 45
# INFO [2026-01-24 15:50:31] [*] Iteration 46
# INFO [2026-01-24 15:50:51] [*] Iteration 47
# INFO [2026-01-24 15:51:12] [*] Iteration 48
# INFO [2026-01-24 15:51:30] [*] Iteration 49
# INFO [2026-01-24 15:51:50] [*] Iteration 50
# INFO [2026-01-24 15:52:05] [*] Iteration 51
# INFO [2026-01-24 15:52:23] [*] Iteration 52
# INFO [2026-01-24 15:52:37] [*] Iteration 53
# INFO [2026-01-24 15:52:53] [*] Iteration 54
# INFO [2026-01-24 15:53:05] [*] Iteration 55
# INFO [2026-01-24 15:53:22] GAM-CLUSTERING ends here.

Visualizing and exploring the GAM-clustering results

Each metabolic module is a connected piece of metabolic network whose genes expression is correlated across all dataset.

The following functions will help you to visualize and explore the obtained results.

Get graphs of modules:
getGraphs(modules = results$modules,
          network.annotation = network.annotation,
          metabolites.annotation = metabolites.annotation,
          seed.for.layout = 42,
          work.dir = work.dir)
# Graphs for module 1 are built
# Graphs for module 2 are built
# Graphs for module 3 are built
# Graphs for module 4 are built

Example of the graph of the fourth module:

m.4 n1--n1 SLC2A3 n1--n23 HK2 n2--n2 SLC2A1 n2--n24 HK2 n3--n21 EGLN3 n3--n5 BCAT2 n3--n8 GPT2 n3--n25 PLOD2 n3--n16 KYAT3 n4--n27 GAPDH n4--n9 TPI1 n4--n30 ALDOA n5--n5 SLC3A2 n5--n18 GLUL n18--n18 SLC3A2 n6--n27 PGK1 n6--n26 BPGM n7--n7 SLC3A2 n7--n28 PSPH n7--n8 SDSL n8--n22 LDHA n8--n10 PKM n9--n30 ALDOA n10--n26 ENO2 n11--n12 PYGL n11--n23 PGM1 n12--n13 PYGL n13--n13 PYGL n13--n19 GYS1 n15--n29 CHSY1 n15--n19 UGDH n16--n16 SLCO4A1 n16--n20 BEND5 n14--n23 GPI n14--n24 GNPDA1 n14--n17 PFKFB4 n14--n30 PFKP n1 D-Glucose n2 D-Glucosamine n3 2-Oxoglutarate n21 succinate n4 D-Glyceraldehyde 3-phosphate n27 3-Phospho-D-glyceroyl phosphate n5 L-Glutamate n18 L-Glutamine n6 3-Phospho-D-glycerate n7 L-Serine n28 O-Phospho-L-serine n8 Pyruvate n22 (S)-Lactate n9 Glycerone phosphate n25 Succinate n10 Phosphoenolpyruvate n26 2-Phospho-D-glycerate n11 D-Glucose 1-phosphate n12 Starch n13 Amylose n23 D-Glucose 6-phosphate n24 D-Glucosamine 6-phosphate n15 UDP-glucuronate n29 beta-D-Glucuronosyl-(1->3)-N-acetyl-beta-D-galactosaminyl-(1->4)-beta-D-glucuronosylproteoglycan n16 L-glutamate n14 D-Fructose 6-phosphate n17 beta-D-Fructose 2,6-bisphosphate n30 D-Fructose 1,6-bisphosphate n19 UDP-glucose n20 C-terminal alpha-amino-acyl-L-glutamate residue

Get gene tables:

The table contains gene list. Each gene has two descriptive values: i) gene’s correlation value with the modules pattern and ii) gene’s score. High score means that this gene’s expression is similar to the module’s pattern and not similar to other modules’ patterns.

m.gene.list <- getGeneTables(modules = results$modules,
                             nets = results$nets,
                             patterns = results$patterns.pos,
                             gene.exprs = E.prep,
                             network.annotation = network.annotation,
                             work.dir = work.dir)
# Gene tables for module 1 are produced
# Gene tables for module 2 are produced
# Gene tables for module 3 are produced
# Gene tables for module 4 are produced

Example of the gene table of the fourth module:

head(GAMclust:::read.tsv(file.path(work.dir, "m.4.genes.tsv"))) |>
  kableExtra::kable() |>
  kableExtra::kable_styling()
symbol Entrez score cor
SLC2A3 6515 3.313484 0.9539268
SLC2A1 6513 2.705281 0.9306558
EGLN3 112399 2.619330 0.9266640
PGK1 5230 2.445532 0.9200964
GLUL 2752 2.456103 0.9196962
GAPDH 2597 2.481569 0.9178584
Get plots of patterns:
for(i in 1:length(m.gene.list)){
 
  print(fgsea::plotCoregulationProfileSpatial(m.gene.list[[i]], 
                                             seurat_object,
                                             title = paste0("module ", i)))
}

Get plots of individual genes expression (example for the fourth module):
Seurat::DefaultAssay(seurat_object) <- "SCT"

i <- 4

Seurat::SpatialFeaturePlot(seurat_object, 
                           features = m.gene.list[[i]], 
                           pt.size.factor = 2, stroke = 0.01, alpha = 0.7,
                           ncol = 8)

Get tables and plots with annotation of modules:

Functional annotation of obtained modules is done based on KEGG and Reactome canonical metabolic pathways.

getAnnotationTables(network.annotation = network.annotation,
                    nets = results$nets,
                    work.dir = work.dir)
# Pathway annotation for module 1 is produced
# Pathway annotation for module 2 is produced
# Pathway annotation for module 3 is produced
# Pathway annotation for module 4 is produced

Example of the annotation table of the fourth module:

head(GAMclust:::read.tsv(file.path(work.dir, "m.4.pathways.tsv"))) |>
  kableExtra::kable() |>
  kableExtra::kable_styling(font_size = 8) |>
  kableExtra::column_spec(1, width = "1.6in") |>
  kableExtra::column_spec(2:6, width = "0.5in") |>
  kableExtra::column_spec(7, width = "1.2in")
pathway pval padj foldEnrichment overlap size overlapGenes
R-HSA-70171: Glycolysis 0.0000000 0.0000000 17.032258 12 13 GNPDA1 ENO2 ALDOA GAPDH GPI HK2 PFKFB4 PFKP PGK1 PKM BPGM TPI1
hsa00500: Starch and sucrose metabolism 0.0000703 0.0041371 9.225806 5 10 GPI GYS1 HK2 PGM1 PYGL
hsa_M00570: Isoleucine biosynthesis, threonine => 2-oxobutanoate => isoleucine 0.0028474 0.0894088 18.451613 2 2 SDSL BCAT2
R-HSA-196836: Vitamin C (ascorbate) metabolism 0.0082525 0.1850920 12.301075 2 3 SLC2A1 SLC2A3
R-HSA-71240: Tryptophan catabolism 0.0159469 0.3129585 9.225806 2 4 KYAT3 SLC3A2
hsa00220: Arginine biosynthesis 0.0256824 0.4222898 7.380645 2 5 GLUL GPT2

Annotation heatmap for all modules:

getAnnotationHeatmap(work.dir = work.dir)
# Processing module 1
# Module size: 50
# Processing module 2
# Module size: 50
# Processing module 3
# Module size: 36
# Processing module 4
# Module size: 31

Compare modules obtained in different runs:

You may also compare two results of running GAM-clustering on the same dataset (e.g. runs with different parameters) or compare two results of running GAM-clustering on different datasets (then set same.data=FALSE).

modulesSimilarity(dir1 = work.dir,
                  dir2 = "old_dir",
                  name1 = "new",
                  name2 = "old",
                  same.data = TRUE,
                  use.genes.with.pos.score = TRUE,
                  work.dir = work.dir,
                  file.name = "comparison.png")