Overview

This script fits generalised additive models (GAMs) to the BM → CNS macrophage pseudotime trajectory using tradeSeq, providing statistically rigorous, publication-quality results for genes whose expression changes along the transition.

Pipeline: 1. Load pseudotime trajectory object and counts 2. Fit GAMs via fitGAM() 3. associationTest() — genes that change significantly along pseudotime 4. startVsEndTest() — genes that differ between trajectory start and end 5. Cluster genes by expression dynamics → figure-ready heatmap 6. GO enrichment on gene clusters 7. Publication figures: smooth curves, heatmap, dot plots


1. Setup

# Run once:
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("tradeSeq")
BiocManager::install("clusterExperiment")
install.packages("mgcv")
suppressPackageStartupMessages({
  library(Seurat)
  library(qs2)
  library(tradeSeq)
  library(SingleCellExperiment)
  library(dplyr)
  library(tidyr)
  library(ggplot2)
  library(patchwork)
  library(ComplexHeatmap)
  library(circlize)
  library(RColorBrewer)
  library(ggrepel)
  library(gprofiler2)
  library(knitr)
  library(viridis)
  library(scales)
})

base_path <- "/Users/alasdairduguid/Documents/Projects/BSH start up grant 2024 - scRNAseq miR128a/scRNAseq_proj/analysis_feb26"
cache_dir  <- file.path(base_path, "data")

# ── Plot theme ──────────────────────────────────────────────────────────────
theme_pub <- theme_classic(base_size = 12) +
  theme(
    strip.background = element_blank(),
    strip.text       = element_text(face = "bold", size = 11),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(size = 10, colour = "grey40"),
    legend.title     = element_text(face = "bold"),
    axis.line        = element_line(colour = "grey30"),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  )

# Niche state palette (matching 13c plots)
niche_cols <- c(
  "BAM_1 (CNS-adapted)" = "#B2182B",
  "BM Mac (in BM)"      = "#4393C3",
  "BM Mac (in CNS)"     = "#9970AB"
)

2. Load Data & Build tradeSeq Input

# ── Load the myeloid Seurat object ──────────────────────────────────────────
# Adjust filename if saved under a different name from 13c
myeloid <- qs_read(file.path(cache_dir, "13d_transition_with_pseudotime.qs"))

# Ensure the pseudotime and niche state columns are present.
# Adjust column names below to match what was used in 13c:
pseudotime_col  <- "pseudotime"     # numeric pseudotime column in meta.data
niche_state_col <- "niche_state"    # categorical: BAM_1, BM Mac in BM/CNS

cat("Cells in object:", ncol(myeloid), "\n")
## Cells in object: 1147
cat("Pseudotime range:", range(myeloid[[pseudotime_col]][[1]], na.rm = TRUE), "\n")
## Pseudotime range: 0 6.202476
cat("\nNiche state breakdown:\n")
## 
## Niche state breakdown:
print(table(myeloid[[niche_state_col]]))
## niche_state
## BAM_1 (CNS-adapted)      BM Mac (in BM)     BM Mac (in CNS) 
##                 565                 499                  83
# ── Keep only cells that are part of the trajectory ─────────────────────────
# (removes any cells with NA pseudotime)
cells_keep <- !is.na(myeloid[[pseudotime_col]][[1]])
myeloid_traj <- myeloid[, cells_keep]

cat("Cells retained for trajectory analysis:", ncol(myeloid_traj), "\n")
## Cells retained for trajectory analysis: 1147
# ── Extract inputs for tradeSeq ──────────────────────────────────────────────
counts_mat <- GetAssayData(myeloid_traj, assay = "originalexp", layer = "counts")

pseudotime_vec <- myeloid_traj[[pseudotime_col]][[1]]

# For a single lineage: pseudotime matrix = n_cells × 1, weights = all 1s
pt_matrix <- matrix(pseudotime_vec, ncol = 1,
                    dimnames = list(colnames(myeloid_traj), "Lineage1"))
wt_matrix <- matrix(1, nrow = ncol(myeloid_traj), ncol = 1,
                    dimnames = list(colnames(myeloid_traj), "Lineage1"))

cat("\nPseudotime matrix dim:", dim(pt_matrix), "\n")
## 
## Pseudotime matrix dim: 1147 1
cat("Counts matrix dim:", dim(counts_mat), "\n")
## Counts matrix dim: 26694 1147
# ── Gene filtering: expressed in ≥ 5% of trajectory cells ───────────────────
pct_expressed <- rowSums(counts_mat > 0) / ncol(counts_mat)
genes_keep     <- names(pct_expressed[pct_expressed >= 0.05])

cat("Genes passing filter:", length(genes_keep), "of", nrow(counts_mat), "\n")
## Genes passing filter: 6267 of 26694
counts_filt <- counts_mat[genes_keep, ]

3. Fit GAMs with tradeSeq

# ── Select number of knots via AIC ───────────────────────────────────────────
# Sample 200 genes for speed; real run uses full set
set.seed(42)
sample_genes <- sample(genes_keep, min(200, length(genes_keep)))

aic_res <- evaluateK(
  counts    = counts_filt[sample_genes, ],
  pseudotime = pt_matrix,
  cellWeights = wt_matrix,
  k = 3:8,
  nGenes = 200,
  verbose = FALSE
)

# Plot AIC vs k (tradeSeq built-in)
# Recommended k is typically where AIC stabilises
# ── Fit GAMs ─────────────────────────────────────────────────────────────────
# Set k based on evaluateK output above (typically 5-6 for a single trajectory)
# Adjust k if the AIC plot suggests otherwise
k_use <- 6

cat("Fitting GAMs with k =", k_use, "knots on", length(genes_keep), "genes...\n")
## Fitting GAMs with k = 6 knots on 6267 genes...
cat("(This takes ~20–40 min on a standard server. Run on Eddie.)\n")
## (This takes ~20–40 min on a standard server. Run on Eddie.)
sce <- fitGAM(
  counts      = counts_filt,
  pseudotime  = pt_matrix,
  cellWeights = wt_matrix,
  nknots      = k_use,
  verbose     = TRUE,
  parallel    = TRUE   # uses BiocParallel; set BPPARAM if needed
)
##   |                                                                              |                                                                      |   0%  |                                                                              |==                                                                    |   3%  |                                                                              |====                                                                  |   5%  |                                                                              |=====                                                                 |   8%  |                                                                              |=======                                                               |  10%  |                                                                              |=========                                                             |  13%  |                                                                              |===========                                                           |  15%  |                                                                              |============                                                          |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |================                                                      |  23%  |                                                                              |==================                                                    |  25%  |                                                                              |===================                                                   |  28%  |                                                                              |=====================                                                 |  30%  |                                                                              |=======================                                               |  33%  |                                                                              |=========================                                             |  35%  |                                                                              |==========================                                            |  38%  |                                                                              |============================                                          |  40%  |                                                                              |==============================                                        |  43%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  48%  |                                                                              |===================================                                   |  50%  |                                                                              |=====================================                                 |  53%  |                                                                              |=======================================                               |  55%  |                                                                              |========================================                              |  58%  |                                                                              |==========================================                            |  60%  |                                                                              |============================================                          |  63%  |                                                                              |==============================================                        |  65%  |                                                                              |===============================================                       |  68%  |                                                                              |=================================================                     |  70%  |                                                                              |===================================================                   |  73%  |                                                                              |=====================================================                 |  75%  |                                                                              |======================================================                |  78%  |                                                                              |========================================================              |  80%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  85%  |                                                                              |=============================================================         |  87%  |                                                                              |===============================================================       |  90%  |                                                                              |=================================================================     |  92%  |                                                                              |==================================================================    |  95%  |                                                                              |====================================================================  |  97%  |                                                                              |======================================================================| 100%
# Save immediately
qs_save(sce, file.path(cache_dir, "13d_tradeseq_sce.qs"))
cat("GAM fit saved.\n")
## GAM fit saved.
# ── If re-running after GAM fit: load cached SCE ────────────────────────────
sce <- qs_read(file.path(cache_dir, "13d_tradeseq_sce.qs"))

4. Statistical Tests

4a — Association Test (changes along pseudotime)

# Tests whether smoothed expression changes significantly along pseudotime
# (Wald test on GAM coefficients; FDR-corrected)
assoc_res <- associationTest(sce, lineages = TRUE, l2fc = log2(1.5))

assoc_res <- assoc_res %>%
  tibble::rownames_to_column("gene") %>%
  arrange(pvalue_1) %>%
  mutate(
    padj     = p.adjust(pvalue_1, method = "BH"),
    sig      = padj < 0.05 & !is.na(padj)
  )

cat("Genes significantly associated with pseudotime (FDR < 0.05):",
    sum(assoc_res$sig, na.rm = TRUE), "\n")
## Genes significantly associated with pseudotime (FDR < 0.05): 810
assoc_res %>%
  filter(sig) %>%
  slice_head(n = 30) %>%
  select(gene, waldStat_1, df_1, pvalue_1, padj) %>%
  mutate(across(where(is.numeric), ~round(.x, 4))) %>%
  kable(caption = "Top 30 pseudotime-associated genes (association test)")
Top 30 pseudotime-associated genes (association test)
gene waldStat_1 df_1 pvalue_1 padj
Slc40a1 118.6676 4 0 0
Camk1d 86.9442 4 0 0
Mrc1 85.9262 4 0 0
Fcna 214.6035 4 0 0
Nr1h3 126.0166 4 0 0
Cd82 112.8044 4 0 0
B2m 124.5736 4 0 0
Sirpa 215.1976 4 0 0
Srxn1 187.1373 4 0 0
Ctsa 90.0110 4 0 0
Fabp5 107.2492 4 0 0
Cd5l 385.2321 4 0 0
S100a6 93.6406 4 0 0
S100a11 95.1629 4 0 0
S100a10 246.7843 4 0 0
Ctss 121.5157 4 0 0
Vcam1 766.2468 4 0 0
Gclm 119.5957 4 0 0
Atp6v0d2 161.0261 4 0 0
Txn1 157.3168 4 0 0
Ptgr1 95.5842 4 0 0
Plin2 93.5091 4 0 0
Osbpl9 166.1383 4 0 0
Eps15 104.6871 4 0 0
Akr1a1 262.3329 4 0 0
Prdx1 775.9843 4 0 0
Sdc3 262.7220 4 0 0
Laptm5 90.4100 4 0 0
C1qc 102.4812 4 0 0
C1qa 113.6652 4 0 0

4b — Start vs End Test

# Tests whether expression differs between trajectory start (BM) and end (CNS-adapted)
se_res <- startVsEndTest(sce, pseudotimeValues = c(0, 1))  # 0=start, 1=end (scaled)

se_res <- se_res %>%
  tibble::rownames_to_column("gene") %>%
  arrange(pvalue) %>%
  mutate(
    padj      = p.adjust(pvalue, method = "BH"),
    sig       = padj < 0.05 & !is.na(padj),
    direction = case_when(
      sig & logFClineage1 > 0  ~ "Up at CNS end",
      sig & logFClineage1 < 0  ~ "Up at BM start",
      TRUE                     ~ "NS"
    )
  )

cat("Start vs End — genes UP at CNS end (FDR < 0.05):",
    sum(se_res$direction == "Up at CNS end"), "\n")
## Start vs End — genes UP at CNS end (FDR < 0.05): 1127
cat("Start vs End — genes UP at BM start (FDR < 0.05):",
    sum(se_res$direction == "Up at BM start"), "\n")
## Start vs End — genes UP at BM start (FDR < 0.05): 443
se_res %>%
  filter(sig) %>%
  arrange(desc(abs(logFClineage1))) %>%
  slice_head(n = 40) %>%
  select(gene, logFClineage1, waldStat, padj, direction) %>%
  mutate(across(where(is.numeric), ~round(.x, 4))) %>%
  kable(caption = "Top 40 start-vs-end genes (ranked by |logFC|)")
Top 40 start-vs-end genes (ranked by |logFC|)
gene logFClineage1 waldStat padj direction
Itgad -2.9457 11.9779 0.0055 Up at BM start
Adamtsl5 2.8682 20.4097 0.0002 Up at CNS end
Lyve1 2.6760 8.6924 0.0194 Up at CNS end
Gstm2 2.6489 23.0931 0.0000 Up at CNS end
Ccnd2 -2.6180 14.5589 0.0019 Up at BM start
Zswim8 -2.5428 8.8083 0.0186 Up at BM start
Akr1b8 2.5335 84.1452 0.0000 Up at CNS end
Smim1 2.4026 8.6082 0.0201 Up at CNS end
Cd209f -2.3774 10.6871 0.0091 Up at BM start
Prnp -2.2791 11.9348 0.0055 Up at BM start
Gbe1 2.1072 33.3001 0.0000 Up at CNS end
Selenom 1.9875 31.1851 0.0000 Up at CNS end
C3 -1.9644 19.1925 0.0003 Up at BM start
F13a1 1.9278 77.8909 0.0000 Up at CNS end
S100a6 1.9234 23.3230 0.0000 Up at CNS end
Srxn1 1.8002 39.4293 0.0000 Up at CNS end
Mrap -1.7799 24.1869 0.0000 Up at BM start
Ski -1.7529 9.0315 0.0171 Up at BM start
S100a4 1.7422 16.7860 0.0008 Up at CNS end
Clec4d 1.7384 62.0723 0.0000 Up at CNS end
Prdx1 1.7028 283.6154 0.0000 Up at CNS end
Maoa 1.6820 22.8418 0.0001 Up at CNS end
S100a10 1.6377 59.3892 0.0000 Up at CNS end
Cbr3 1.5778 32.0446 0.0000 Up at CNS end
Mt2 1.5185 62.9749 0.0000 Up at CNS end
Rnf19b -1.4949 10.2058 0.0109 Up at BM start
Esd 1.4648 117.7728 0.0000 Up at CNS end
Abcg3 -1.4398 18.3847 0.0004 Up at BM start
Pira2 -1.4266 12.2960 0.0047 Up at BM start
Arhgap15 -1.4191 8.7897 0.0187 Up at BM start
S100a11 1.4074 40.9119 0.0000 Up at CNS end
Chchd10 1.3841 29.8393 0.0000 Up at CNS end
Ppp3cb -1.3802 7.7640 0.0280 Up at BM start
Camk1d -1.3619 61.7830 0.0000 Up at BM start
Mmp12 1.3570 9.8485 0.0125 Up at CNS end
Stab2 -1.3540 10.0549 0.0115 Up at BM start
Axl -1.3417 51.1273 0.0000 Up at BM start
Anxa1 1.3284 27.7306 0.0000 Up at CNS end
Thoc2 -1.3110 6.8503 0.0394 Up at BM start
Tspan10 -1.2973 30.8553 0.0000 Up at BM start

5. Gene Clustering by Dynamics

# ── Pull smoothed fitted values for sig. genes ───────────────────────────────
sig_genes <- assoc_res %>%
  filter(sig) %>%
  pull(gene)

cat("Clustering", length(sig_genes), "significantly dynamic genes...\n")
## Clustering 810 significantly dynamic genes...
# Get smoothed expression values at 100 pseudotime points
yhat <- predictSmooth(sce, gene = sig_genes, nPoints = 100, tidy = FALSE)

# Scale each gene to [0,1] for clustering
yhat_scaled <- t(apply(yhat, 1, function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / diff(rng)
}))

# k-means clustering into 6 expression dynamics groups
set.seed(42)
n_clusters  <- 6
km_fit      <- kmeans(yhat_scaled, centers = n_clusters, nstart = 25, iter.max = 100)
gene_cluster <- data.frame(
  gene    = sig_genes,
  cluster = km_fit$cluster
)

# Assign interpretable labels based on trajectory shape
# (will auto-label after inspection — update if needed)
cluster_labels <- c(
  "1" = "Early-high / lost",
  "2" = "Late-gained",
  "3" = "Transient mid",
  "4" = "Monotone increase",
  "5" = "Monotone decrease",
  "6" = "Late surge"
)

gene_cluster <- gene_cluster %>%
  mutate(cluster_label = cluster_labels[as.character(cluster)])

cat("\nGenes per cluster:\n")
## 
## Genes per cluster:
print(table(gene_cluster$cluster_label))
## 
## Early-high / lost        Late surge       Late-gained Monotone decrease 
##               130               179                67               141 
## Monotone increase     Transient mid 
##               127               166

6. Publication Figures

Fig 1 — Smooth Expression Curves: Top Dynamic Genes

# ── Select top genes per cluster for display ─────────────────────────────────
top_per_cluster <- gene_cluster %>%
  left_join(assoc_res %>% select(gene, waldStat_1, padj), by = "gene") %>%
  group_by(cluster_label) %>%
  slice_max(waldStat_1, n = 4) %>%
  ungroup()

display_genes <- top_per_cluster$gene

# Get predicted smooth values in tidy format
smooth_tidy <- predictSmooth(sce, gene = display_genes, nPoints = 100, tidy = TRUE)

# Add cell-level observed expression + pseudotime for jitter layer
obs_df <- data.frame(
  gene        = rep(display_genes, each = ncol(myeloid_traj)),
  pseudotime  = rep(pseudotime_vec, times = length(display_genes)),
  expression  = as.numeric(
    t(GetAssayData(myeloid_traj, assay = "originalexp", layer = "data")[display_genes, ])
  ),
  niche_state = rep(myeloid_traj[[niche_state_col]][[1]], times = length(display_genes))
) %>%
  filter(!is.na(pseudotime))

# Merge cluster label for facet ordering
smooth_tidy <- smooth_tidy %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")

obs_df <- obs_df %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")

ggplot() +
  # Jittered observed expression coloured by niche state
  geom_point(data = obs_df,
             aes(x = pseudotime, y = expression, colour = niche_state),
             alpha = 0.25, size = 0.7, stroke = 0) +
  # Smooth GAM fit
  geom_line(data = smooth_tidy,
            aes(x = time, y = yhat),
            colour = "black", linewidth = 1.1) +
  scale_colour_manual(values = niche_cols, name = "Niche state") +
  facet_wrap(~gene, scales = "free_y", ncol = 4) +
  labs(
    x        = "Pseudotime (BM → CNS)",
    y        = "Normalised expression",
    title    = "Gene Expression Dynamics Along BM → CNS Macrophage Transition",
    subtitle = "GAM smooth fit ± SE; points coloured by niche state"
  ) +
  theme_pub +
  theme(
    strip.text      = element_text(face = "italic", size = 10),
    legend.position = "bottom",
    legend.key.size = unit(0.4, "cm")
  )

ggsave(file.path(cache_dir, "Fig_smooth_curves_top_genes.pdf"),
       width = 16, height = 14)

Fig 2 — Heatmap of All Significant Genes by Cluster

# ── Build matrix: genes × pseudotime points, scaled per gene ─────────────────
hm_genes   <- gene_cluster$gene
yhat_hm    <- predictSmooth(sce, gene = hm_genes, nPoints = 100, tidy = FALSE)

yhat_hm_sc <- t(apply(yhat_hm, 1, function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / diff(rng)
}))

# Order genes within each cluster by peak pseudotime
gene_order <- gene_cluster %>%
  mutate(
    peak_pt = apply(yhat_hm_sc[gene, ], 1, which.max)
  ) %>%
  arrange(cluster, peak_pt) %>%
  pull(gene)

# Annotation colour for clusters
cluster_cols <- setNames(
  brewer.pal(n_clusters, "Set2"),
  unique(gene_cluster$cluster_label[match(1:n_clusters,
                                          gene_cluster$cluster)])
)

row_ann <- rowAnnotation(
  Cluster = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
  col = list(Cluster = cluster_cols),
  annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
  simple_anno_size = unit(0.5, "cm")
)

# Column (pseudotime) annotation
col_ann <- HeatmapAnnotation(
  Pseudotime = anno_lines(
    seq(0, 1, length.out = 100),
    gp = gpar(col = "grey30", lwd = 1.5),
    axis = FALSE
  ),
  annotation_name_gp = gpar(fontsize = 9)
)

col_fun <- colorRamp2(
  c(0, 0.5, 1),
  c("#4575B4", "#FFFFBF", "#D73027")
)

Heatmap(
  yhat_hm_sc[gene_order, ],
  name            = "Scaled\nExpression",
  col             = col_fun,
  cluster_rows    = FALSE,
  cluster_columns = FALSE,
  show_row_names  = length(gene_order) <= 150,   # hide names if too many genes
  row_names_gp    = gpar(fontsize = 6, fontface = "italic"),
  show_column_names = FALSE,
  column_title    = "Pseudotime  (BM start → CNS end)",
  column_title_gp = gpar(fontsize = 11, fontface = "bold"),
  left_annotation = row_ann,
  top_annotation  = col_ann,
  heatmap_legend_param = list(
    title_gp     = gpar(fontsize = 9, fontface = "bold"),
    labels_gp    = gpar(fontsize = 8),
    legend_height = unit(3, "cm")
  ),
  row_split       = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
  row_title_gp    = gpar(fontsize = 9, fontface = "bold"),
  border          = FALSE
)

Fig 3 — Volcano Plot: Start vs End

# Label top genes in each direction
top_up   <- se_res %>% filter(direction == "Up at CNS end") %>%
              slice_max(logFClineage1, n = 15) %>% pull(gene)
top_down <- se_res %>% filter(direction == "Up at BM start") %>%
              slice_min(logFClineage1, n = 15) %>% pull(gene)
label_genes <- c(top_up, top_down)

volcano_df <- se_res %>%
  mutate(
    log10p = -log10(pmax(padj, 1e-100)),
    label  = ifelse(gene %in% label_genes, gene, NA)
  )

ggplot(volcano_df, aes(x = logFClineage1, y = log10p)) +
  geom_point(aes(colour = direction), alpha = 0.5, size = 1.2) +
  geom_text_repel(
    data = volcano_df %>% filter(!is.na(label)),
    aes(label = label),
    fontface  = "italic",
    size      = 3,
    max.overlaps = 30,
    segment.colour = "grey60",
    segment.size   = 0.3,
    box.padding    = 0.35
  ) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed",
             colour = "grey40", linewidth = 0.5) +
  geom_vline(xintercept = c(-log2(1.5), log2(1.5)), linetype = "dashed",
             colour = "grey40", linewidth = 0.5) +
  scale_colour_manual(
    values = c(
      "Up at CNS end"   = "#B2182B",
      "Up at BM start"  = "#4393C3",
      "NS"              = "grey75"
    ),
    name = NULL
  ) +
  scale_x_continuous(
    labels = function(x) ifelse(x > 0, paste0("+", round(x, 1)), round(x, 1))
  ) +
  labs(
    x        = expression(log[2]*" fold change (CNS end / BM start)"),
    y        = expression(-log[10]*" adjusted p-value"),
    title    = "Macrophage Reprogramming: BM Start vs CNS End",
    subtitle = "Trajectory start vs end test (tradeSeq); FDR < 0.05 threshold shown"
  ) +
  theme_pub +
  theme(legend.position = "top")

ggsave(file.path(cache_dir, "Fig_volcano_start_vs_end.pdf"),
       width = 9, height = 7)

7. GO Enrichment by Trajectory Cluster

# Run gProfiler2 on each cluster separately
go_results <- lapply(unique(gene_cluster$cluster_label), function(cl) {
  genes <- gene_cluster %>% filter(cluster_label == cl) %>% pull(gene)
  if (length(genes) < 5) return(NULL)

  res <- gost(
    query           = genes,
    organism        = "mmusculus",
    sources         = c("GO:BP", "GO:MF", "KEGG", "REACTOME"),
    evcodes         = FALSE,
    correction_method = "fdr",
    user_threshold  = 0.05
  )

  if (is.null(res)) return(NULL)

  res$result %>%
    select(term_name, term_id, source, p_value, term_size, intersection_size) %>%
    mutate(cluster = cl) %>%
    arrange(p_value)
})

go_all <- bind_rows(go_results)

# Show top 5 terms per cluster
go_all %>%
  group_by(cluster) %>%
  slice_head(n = 5) %>%
  ungroup() %>%
  select(cluster, term_name, source, p_value, intersection_size) %>%
  mutate(p_value = signif(p_value, 3)) %>%
  kable(caption = "Top GO/pathway terms per gene expression cluster")
Top GO/pathway terms per gene expression cluster
cluster term_name source p_value intersection_size
Early-high / lost catabolic process GO:BP 0.00e+00 49
Early-high / lost small molecule metabolic process GO:BP 0.00e+00 38
Early-high / lost catalytic activity GO:MF 0.00e+00 75
Early-high / lost oxidoreductase activity GO:MF 0.00e+00 26
Early-high / lost monosaccharide metabolic process GO:BP 0.00e+00 17
Late surge immune system process GO:BP 0.00e+00 72
Late surge immune response GO:BP 0.00e+00 55
Late surge regulation of immune system process GO:BP 0.00e+00 46
Late surge defense response GO:BP 0.00e+00 47
Late surge adaptive immune response GO:BP 0.00e+00 27
Late-gained defense response GO:BP 0.00e+00 28
Late-gained response to other organism GO:BP 0.00e+00 25
Late-gained biological process involved in interspecies interaction between organisms GO:BP 0.00e+00 26
Late-gained response to external biotic stimulus GO:BP 0.00e+00 25
Late-gained immune response GO:BP 0.00e+00 26
Monotone decrease Lysosome KEGG 0.00e+00 24
Monotone decrease establishment of localization GO:BP 0.00e+00 76
Monotone decrease transport GO:BP 0.00e+00 74
Monotone decrease localization GO:BP 0.00e+00 80
Monotone decrease regulation of immune system process GO:BP 0.00e+00 43
Monotone increase endosomal transport GO:BP 0.00e+00 16
Monotone increase vesicle-mediated transport GO:BP 0.00e+00 31
Monotone increase Endocytosis KEGG 2.00e-07 14
Monotone increase transport GO:BP 4.00e-07 50
Monotone increase intracellular transport GO:BP 4.00e-07 26
Transient mid Proteasome KEGG 2.00e-07 9
Transient mid cellular localization GO:BP 1.20e-06 50
Transient mid protein binding GO:MF 1.30e-06 101
Transient mid cytoskeletal protein binding GO:MF 1.60e-05 23
Transient mid establishment of localization in cell GO:BP 2.19e-05 34
# ── Dot plot: top terms per cluster ──────────────────────────────────────────
go_plot_df <- go_all %>%
  group_by(cluster) %>%
  slice_head(n = 6) %>%
  ungroup() %>%
  mutate(
    log10p     = -log10(p_value),
    GeneRatio  = intersection_size / term_size,
    term_name  = stringr::str_wrap(term_name, 40)
  )

ggplot(go_plot_df,
       aes(x = cluster, y = reorder(term_name, log10p),
           size = GeneRatio, colour = log10p)) +
  geom_point() +
  scale_colour_gradientn(
    colours = c("#FDAE61", "#D73027", "#A50026"),
    name    = expression(-log[10]*p)
  ) +
  scale_size_continuous(name = "Gene ratio", range = c(2, 8)) +
  labs(
    x     = "Expression cluster",
    y     = NULL,
    title = "GO / Pathway Enrichment by Pseudotime Gene Cluster"
  ) +
  theme_pub +
  theme(
    axis.text.x  = element_text(angle = 35, hjust = 1, size = 9),
    axis.text.y  = element_text(size = 9),
    panel.grid.major.x = element_line(colour = "grey88")
  )

ggsave(file.path(cache_dir, "Fig_GO_dotplot_by_cluster.pdf"),
       width = 13, height = 10)

8. Key Genes Summary Table

# Merge association test + start-vs-end test + cluster assignment
summary_tbl <- assoc_res %>%
  filter(sig) %>%
  select(gene, waldStat_assoc = waldStat_1, padj_assoc = padj) %>%
  left_join(
    se_res %>% select(gene, logFC_CNS_vs_BM = logFClineage1,
                      padj_sve = padj, direction),
    by = "gene"
  ) %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene") %>%
  arrange(desc(abs(logFC_CNS_vs_BM))) %>%
  mutate(across(where(is.numeric), ~round(.x, 4)))

write.csv(summary_tbl,
          file.path(cache_dir, "13d_pseudotime_DE_results.csv"),
          row.names = FALSE)

summary_tbl %>%
  slice_head(n = 50) %>%
  kable(caption = "Top 50 dynamic genes: merged association + start-vs-end results")
Top 50 dynamic genes: merged association + start-vs-end results
gene waldStat_assoc padj_assoc logFC_CNS_vs_BM padj_sve direction cluster_label
Adamtsl5 38.1662 0.0000 2.8682 0.0002 Up at CNS end Transient mid
Lyve1 46.6962 0.0000 2.6760 0.0194 Up at CNS end Late-gained
Gstm2 57.1275 0.0000 2.6489 0.0000 Up at CNS end Early-high / lost
Ccnd2 20.2038 0.0036 -2.6180 0.0019 Up at BM start Monotone decrease
Akr1b8 224.4304 0.0000 2.5335 0.0000 Up at CNS end Early-high / lost
Smim1 21.0625 0.0009 2.4026 0.0201 Up at CNS end Monotone increase
Cd209f 123.3976 0.0000 -2.3774 0.0091 Up at BM start Late-gained
Prnp 15.5021 0.0239 -2.2791 0.0055 Up at BM start Late surge
Gbe1 91.0071 0.0000 2.1072 0.0000 Up at CNS end Early-high / lost
Selenom 74.8904 0.0000 1.9875 0.0000 Up at CNS end Transient mid
C3 53.9416 0.0000 -1.9644 0.0003 Up at BM start Late surge
F13a1 789.2216 0.0000 1.9278 0.0000 Up at CNS end Monotone increase
S100a6 93.6406 0.0000 1.9234 0.0000 Up at CNS end Transient mid
Srxn1 187.1373 0.0000 1.8002 0.0000 Up at CNS end Early-high / lost
Mrap 67.9125 0.0000 -1.7799 0.0000 Up at BM start Late surge
S100a4 64.7953 0.0000 1.7422 0.0008 Up at CNS end Transient mid
Clec4d 200.2591 0.0000 1.7384 0.0000 Up at CNS end Early-high / lost
Prdx1 775.9843 0.0000 1.7028 0.0000 Up at CNS end Early-high / lost
Maoa 65.1603 0.0000 1.6820 0.0001 Up at CNS end Early-high / lost
S100a10 246.7843 0.0000 1.6377 0.0000 Up at CNS end Transient mid
Cbr3 51.0573 0.0000 1.5778 0.0000 Up at CNS end Early-high / lost
Mt2 293.3153 0.0000 1.5185 0.0000 Up at CNS end Early-high / lost
Rgs18 28.2941 0.0001 1.4839 0.0516 NS Monotone increase
Esd 340.2193 0.0000 1.4648 0.0000 Up at CNS end Early-high / lost
Abcg3 56.3380 0.0000 -1.4398 0.0004 Up at BM start Late surge
Pira2 33.2112 0.0000 -1.4266 0.0047 Up at BM start Late surge
S100a11 95.1629 0.0000 1.4074 0.0000 Up at CNS end Early-high / lost
Chchd10 76.4537 0.0000 1.3841 0.0000 Up at CNS end Transient mid
Camk1d 86.9442 0.0000 -1.3619 0.0000 Up at BM start Monotone decrease
Mmp12 40.1056 0.0000 1.3570 0.0125 Up at CNS end Late surge
Stab2 25.7148 0.0004 -1.3540 0.0115 Up at BM start Late surge
Ifit3 25.7624 0.0003 1.3430 0.1081 NS Late-gained
Axl 288.9259 0.0000 -1.3417 0.0000 Up at BM start Late surge
Anxa1 27.6325 0.0002 1.3284 0.0000 Up at CNS end Early-high / lost
Susd1 16.4412 0.0164 -1.2726 0.0021 Up at BM start Late-gained
Vcam1 766.2468 0.0000 -1.2347 0.0000 Up at BM start Late surge
Tuba4a 37.7111 0.0000 1.2225 0.0039 Up at CNS end Early-high / lost
Crip2 55.1452 0.0000 -1.2212 0.0002 Up at BM start Late surge
Bnip3 62.5282 0.0000 1.2205 0.0000 Up at CNS end Early-high / lost
Cd5l 385.2321 0.0000 -1.2181 0.0000 Up at BM start Late surge
Slc43a2 64.9738 0.0000 -1.2180 0.0000 Up at BM start Monotone decrease
Fxyd2 72.7558 0.0000 1.1991 0.0002 Up at CNS end Transient mid
Slc8b1 14.4239 0.0358 -1.1881 0.0075 Up at BM start Late surge
Pstpip1 41.3718 0.0000 1.1877 0.0010 Up at CNS end Transient mid
Slc12a7 22.3011 0.0015 -1.1846 0.0101 Up at BM start Monotone decrease
Myo10 115.4465 0.0000 -1.1794 0.0000 Up at BM start Late surge
Prdx6 72.2733 0.0000 1.1670 0.0000 Up at CNS end Early-high / lost
Bin1 214.8838 0.0000 1.1454 0.0000 Up at CNS end Monotone increase
Sel1l 28.1588 0.0001 -1.1450 0.0012 Up at BM start Monotone decrease
Mt1 405.8848 0.0000 1.1402 0.0000 Up at CNS end Early-high / lost

Session Information

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.4.0                viridis_0.6.5              
##  [3] viridisLite_0.4.3           knitr_1.51                 
##  [5] gprofiler2_0.2.4            ggrepel_0.9.8              
##  [7] RColorBrewer_1.1-3          circlize_0.4.17            
##  [9] ComplexHeatmap_2.25.3       patchwork_1.3.2            
## [11] ggplot2_4.0.2               tidyr_1.3.2                
## [13] dplyr_1.2.0                 SingleCellExperiment_1.28.1
## [15] SummarizedExperiment_1.36.0 Biobase_2.66.0             
## [17] GenomicRanges_1.58.0        GenomeInfoDb_1.42.3        
## [19] IRanges_2.40.1              S4Vectors_0.44.0           
## [21] BiocGenerics_0.52.0         MatrixGenerics_1.18.1      
## [23] matrixStats_1.5.0           tradeSeq_1.20.0            
## [25] qs2_0.1.7                   Seurat_5.4.0               
## [27] SeuratObject_5.3.0          sp_2.2-1                   
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.23        TrajectoryUtils_1.14.0  slingshot_2.14.0       
##   [4] splines_4.4.2           later_1.4.8             bitops_1.0-9           
##   [7] tibble_3.3.1            polyclip_1.10-7         fastDummies_1.7.5      
##  [10] lifecycle_1.0.5         edgeR_4.4.2             doParallel_1.0.17      
##  [13] globals_0.19.1          lattice_0.22-9          MASS_7.3-65            
##  [16] magrittr_2.0.4          limma_3.62.2            plotly_4.12.0          
##  [19] sass_0.4.10             rmarkdown_2.30          jquerylib_0.1.4        
##  [22] yaml_2.3.12             httpuv_1.6.16           otel_0.2.0             
##  [25] sctransform_0.4.3       spam_2.11-3             spatstat.sparse_3.1-0  
##  [28] reticulate_1.45.0       cowplot_1.2.0           pbapply_1.7-4          
##  [31] abind_1.4-8             zlibbioc_1.52.0         Rtsne_0.17             
##  [34] purrr_1.2.1             RCurl_1.98-1.17         GenomeInfoDbData_1.2.13
##  [37] irlba_2.3.7             listenv_0.10.1          spatstat.utils_3.2-2   
##  [40] goftest_1.2-3           RSpectra_0.16-2         spatstat.random_3.4-4  
##  [43] fitdistrplus_1.2-6      parallelly_1.46.1       codetools_0.2-20       
##  [46] DelayedArray_0.32.0     shape_1.4.6.1           tidyselect_1.2.1       
##  [49] UCSC.utils_1.2.0        farver_2.1.2            spatstat.explore_3.7-0 
##  [52] jsonlite_2.0.0          GetoptLong_1.1.0        progressr_0.18.0       
##  [55] ggridges_0.5.7          survival_3.8-6          iterators_1.0.14       
##  [58] systemfonts_1.3.2       foreach_1.5.2           tools_4.4.2            
##  [61] ragg_1.5.1              ica_1.0-3               Rcpp_1.1.1             
##  [64] glue_1.8.0              gridExtra_2.3           SparseArray_1.6.2      
##  [67] xfun_0.56               mgcv_1.9-4              withr_3.0.2            
##  [70] fastmap_1.2.0           digest_0.6.39           R6_2.6.1               
##  [73] mime_0.13               textshaping_1.0.5       colorspace_2.1-2       
##  [76] Cairo_1.7-0             scattermore_1.2         tensor_1.5.1           
##  [79] dichromat_2.0-0.1       spatstat.data_3.1-9     generics_0.1.4         
##  [82] data.table_1.18.2.1     httr_1.4.8              htmlwidgets_1.6.4      
##  [85] S4Arrays_1.6.0          uwot_0.2.4              pkgconfig_2.0.3        
##  [88] gtable_0.3.6            lmtest_0.9-40           S7_0.2.1               
##  [91] XVector_0.46.0          htmltools_0.5.9         dotCall64_1.2          
##  [94] clue_0.3-67             png_0.1-9               spatstat.univar_3.1-6  
##  [97] rstudioapi_0.18.0       reshape2_1.4.5          rjson_0.2.23           
## [100] curl_7.0.0              nlme_3.1-168            cachem_1.1.0           
## [103] zoo_1.8-15              GlobalOptions_0.1.3     stringr_1.6.0          
## [106] KernSmooth_2.23-26      parallel_4.4.2          miniUI_0.1.2           
## [109] pillar_1.11.1           vctrs_0.7.1             RANN_2.6.2             
## [112] promises_1.5.0          stringfish_0.18.0       xtable_1.8-8           
## [115] cluster_2.1.8.2         princurve_2.1.6         evaluate_1.0.5         
## [118] cli_3.6.5               locfit_1.5-9.12         compiler_4.4.2         
## [121] rlang_1.1.7             crayon_1.5.3            future.apply_1.20.2    
## [124] labeling_0.4.3          plyr_1.8.9              stringi_1.8.7          
## [127] deldir_2.0-4            BiocParallel_1.40.2     lazyeval_0.2.2         
## [130] spatstat.geom_3.7-0     Matrix_1.7-4            RcppHNSW_0.6.0         
## [133] future_1.70.0           statmod_1.5.1           shiny_1.13.0           
## [136] ROCR_1.0-12             igraph_2.2.2            RcppParallel_5.1.11-2  
## [139] bslib_0.10.0