Overview

This script fits generalised additive models (GAMs) to the BM → CNS macrophage pseudotime trajectory using tradeSeq, providing statistically rigorous, publication-quality results for genes whose expression changes along the transition.

Pipeline: 1. Load pseudotime trajectory object and counts 2. Fit GAMs via fitGAM() 3. associationTest() — genes that change significantly along pseudotime 4. startVsEndTest() — genes that differ between trajectory start and end 5. Cluster genes by expression dynamics → figure-ready heatmap 6. GO enrichment on gene clusters 7. Publication figures: smooth curves, heatmap, dot plots

1. Setup

# Run once:
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("tradeSeq")
BiocManager::install("clusterExperiment")
install.packages("mgcv")

suppressPackageStartupMessages({
  library(Seurat)
  library(qs2)
  library(tradeSeq)
  library(SingleCellExperiment)
  library(dplyr)
  library(tidyr)
  library(ggplot2)
  library(patchwork)
  library(ComplexHeatmap)
  library(circlize)
  library(RColorBrewer)
  library(ggrepel)
  library(gprofiler2)
  library(knitr)
  library(viridis)
  library(scales)
})

base_path <- "/Users/alasdairduguid/Documents/Projects/BSH start up grant 2024 - scRNAseq miR128a/scRNAseq_proj/analysis_feb26"
cache_dir  <- file.path(base_path, "data")

# ── Plot theme ──────────────────────────────────────────────────────────────
theme_pub <- theme_classic(base_size = 12) +
  theme(
    strip.background = element_blank(),
    strip.text       = element_text(face = "bold", size = 11),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(size = 10, colour = "grey40"),
    legend.title     = element_text(face = "bold"),
    axis.line        = element_line(colour = "grey30"),
    panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
  )

# Niche state palette (matching 13c plots)
niche_cols <- c(
  "BAM_1 (CNS-adapted)" = "#B2182B",
  "BM Mac (in BM)"      = "#4393C3",
  "BM Mac (in CNS)"     = "#9970AB"
)

2. Load Data & Build tradeSeq Input

# ── Load the myeloid Seurat object ──────────────────────────────────────────
# Adjust filename if saved under a different name from 13c
myeloid <- qs_read(file.path(cache_dir, "13d_transition_with_pseudotime.qs"))

# Ensure the pseudotime and niche state columns are present.
# Adjust column names below to match what was used in 13c:
pseudotime_col  <- "pseudotime"     # numeric pseudotime column in meta.data
niche_state_col <- "niche_state"    # categorical: BAM_1, BM Mac in BM/CNS

cat("Cells in object:", ncol(myeloid), "\n")

## Cells in object: 1147

cat("Pseudotime range:", range(myeloid[[pseudotime_col]][[1]], na.rm = TRUE), "\n")

## Pseudotime range: 0 6.202476

cat("\nNiche state breakdown:\n")

## 
## Niche state breakdown:

print(table(myeloid[[niche_state_col]]))

## niche_state
## BAM_1 (CNS-adapted)      BM Mac (in BM)     BM Mac (in CNS) 
##                 565                 499                  83

# ── Keep only cells that are part of the trajectory ─────────────────────────
# (removes any cells with NA pseudotime)
cells_keep <- !is.na(myeloid[[pseudotime_col]][[1]])
myeloid_traj <- myeloid[, cells_keep]

cat("Cells retained for trajectory analysis:", ncol(myeloid_traj), "\n")

## Cells retained for trajectory analysis: 1147

# ── Extract inputs for tradeSeq ──────────────────────────────────────────────
counts_mat <- GetAssayData(myeloid_traj, assay = "originalexp", layer = "counts")

pseudotime_vec <- myeloid_traj[[pseudotime_col]][[1]]

# For a single lineage: pseudotime matrix = n_cells × 1, weights = all 1s
pt_matrix <- matrix(pseudotime_vec, ncol = 1,
                    dimnames = list(colnames(myeloid_traj), "Lineage1"))
wt_matrix <- matrix(1, nrow = ncol(myeloid_traj), ncol = 1,
                    dimnames = list(colnames(myeloid_traj), "Lineage1"))

cat("\nPseudotime matrix dim:", dim(pt_matrix), "\n")

## 
## Pseudotime matrix dim: 1147 1

cat("Counts matrix dim:", dim(counts_mat), "\n")

## Counts matrix dim: 26694 1147

# ── Gene filtering: expressed in ≥ 5% of trajectory cells ───────────────────
pct_expressed <- rowSums(counts_mat > 0) / ncol(counts_mat)
genes_keep     <- names(pct_expressed[pct_expressed >= 0.05])

cat("Genes passing filter:", length(genes_keep), "of", nrow(counts_mat), "\n")

## Genes passing filter: 6267 of 26694

counts_filt <- counts_mat[genes_keep, ]

3. Fit GAMs with tradeSeq

# ── Select number of knots via AIC ───────────────────────────────────────────
# Sample 200 genes for speed; real run uses full set
set.seed(42)
sample_genes <- sample(genes_keep, min(200, length(genes_keep)))

aic_res <- evaluateK(
  counts    = counts_filt[sample_genes, ],
  pseudotime = pt_matrix,
  cellWeights = wt_matrix,
  k = 3:8,
  nGenes = 200,
  verbose = FALSE
)

# Plot AIC vs k (tradeSeq built-in)
# Recommended k is typically where AIC stabilises

# ── Fit GAMs ─────────────────────────────────────────────────────────────────
# Set k based on evaluateK output above (typically 5-6 for a single trajectory)
# Adjust k if the AIC plot suggests otherwise
k_use <- 6

cat("Fitting GAMs with k =", k_use, "knots on", length(genes_keep), "genes...\n")

## Fitting GAMs with k = 6 knots on 6267 genes...

cat("(This takes ~20–40 min on a standard server. Run on Eddie.)\n")

## (This takes ~20–40 min on a standard server. Run on Eddie.)

sce <- fitGAM(
  counts      = counts_filt,
  pseudotime  = pt_matrix,
  cellWeights = wt_matrix,
  nknots      = k_use,
  verbose     = TRUE,
  parallel    = TRUE   # uses BiocParallel; set BPPARAM if needed
)

##   |                                                                              |                                                                      |   0%  |                                                                              |==                                                                    |   3%  |                                                                              |====                                                                  |   5%  |                                                                              |=====                                                                 |   8%  |                                                                              |=======                                                               |  10%  |                                                                              |=========                                                             |  13%  |                                                                              |===========                                                           |  15%  |                                                                              |============                                                          |  18%  |                                                                              |==============                                                        |  20%  |                                                                              |================                                                      |  23%  |                                                                              |==================                                                    |  25%  |                                                                              |===================                                                   |  28%  |                                                                              |=====================                                                 |  30%  |                                                                              |=======================                                               |  33%  |                                                                              |=========================                                             |  35%  |                                                                              |==========================                                            |  38%  |                                                                              |============================                                          |  40%  |                                                                              |==============================                                        |  43%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  48%  |                                                                              |===================================                                   |  50%  |                                                                              |=====================================                                 |  53%  |                                                                              |=======================================                               |  55%  |                                                                              |========================================                              |  58%  |                                                                              |==========================================                            |  60%  |                                                                              |============================================                          |  63%  |                                                                              |==============================================                        |  65%  |                                                                              |===============================================                       |  68%  |                                                                              |=================================================                     |  70%  |                                                                              |===================================================                   |  73%  |                                                                              |=====================================================                 |  75%  |                                                                              |======================================================                |  78%  |                                                                              |========================================================              |  80%  |                                                                              |==========================================================            |  82%  |                                                                              |===========================================================           |  85%  |                                                                              |=============================================================         |  87%  |                                                                              |===============================================================       |  90%  |                                                                              |=================================================================     |  92%  |                                                                              |==================================================================    |  95%  |                                                                              |====================================================================  |  97%  |                                                                              |======================================================================| 100%

# Save immediately
qs_save(sce, file.path(cache_dir, "13d_tradeseq_sce.qs"))
cat("GAM fit saved.\n")

## GAM fit saved.

# ── If re-running after GAM fit: load cached SCE ────────────────────────────
sce <- qs_read(file.path(cache_dir, "13d_tradeseq_sce.qs"))

4. Statistical Tests

4a — Association Test (changes along pseudotime)

# Tests whether smoothed expression changes significantly along pseudotime
# (Wald test on GAM coefficients; FDR-corrected)
assoc_res <- associationTest(sce, lineages = TRUE, l2fc = log2(1.5))

assoc_res <- assoc_res %>%
  tibble::rownames_to_column("gene") %>%
  arrange(pvalue_1) %>%
  mutate(
    padj     = p.adjust(pvalue_1, method = "BH"),
    sig      = padj < 0.05 & !is.na(padj)
  )

cat("Genes significantly associated with pseudotime (FDR < 0.05):",
    sum(assoc_res$sig, na.rm = TRUE), "\n")

## Genes significantly associated with pseudotime (FDR < 0.05): 810

assoc_res %>%
  filter(sig) %>%
  slice_head(n = 30) %>%
  select(gene, waldStat_1, df_1, pvalue_1, padj) %>%
  mutate(across(where(is.numeric), ~round(.x, 4))) %>%
  kable(caption = "Top 30 pseudotime-associated genes (association test)")

Top 30 pseudotime-associated genes (association test)
gene	waldStat_1	df_1
Slc40a1	118.6676	4
Camk1d	86.9442	4
Mrc1	85.9262	4
Fcna	214.6035	4
Nr1h3	126.0166	4
Cd82	112.8044	4
B2m	124.5736	4
Sirpa	215.1976	4
Srxn1	187.1373	4
Ctsa	90.0110	4
Fabp5	107.2492	4
Cd5l	385.2321	4
S100a6	93.6406	4
S100a11	95.1629	4
S100a10	246.7843	4
Ctss	121.5157	4
Vcam1	766.2468	4
Gclm	119.5957	4
Atp6v0d2	161.0261	4
Txn1	157.3168	4
Ptgr1	95.5842	4
Plin2	93.5091	4
Osbpl9	166.1383	4
Eps15	104.6871	4
Akr1a1	262.3329	4
Prdx1	775.9843	4
Sdc3	262.7220	4
Laptm5	90.4100	4
C1qc	102.4812	4
C1qa	113.6652	4

4b — Start vs End Test

# Tests whether expression differs between trajectory start (BM) and end (CNS-adapted)
se_res <- startVsEndTest(sce, pseudotimeValues = c(0, 1))  # 0=start, 1=end (scaled)

se_res <- se_res %>%
  tibble::rownames_to_column("gene") %>%
  arrange(pvalue) %>%
  mutate(
    padj      = p.adjust(pvalue, method = "BH"),
    sig       = padj < 0.05 & !is.na(padj),
    direction = case_when(
      sig & logFClineage1 > 0  ~ "Up at CNS end",
      sig & logFClineage1 < 0  ~ "Up at BM start",
      TRUE                     ~ "NS"
    )
  )

cat("Start vs End — genes UP at CNS end (FDR < 0.05):",
    sum(se_res$direction == "Up at CNS end"), "\n")

## Start vs End — genes UP at CNS end (FDR < 0.05): 1127

cat("Start vs End — genes UP at BM start (FDR < 0.05):",
    sum(se_res$direction == "Up at BM start"), "\n")

## Start vs End — genes UP at BM start (FDR < 0.05): 443

se_res %>%
  filter(sig) %>%
  arrange(desc(abs(logFClineage1))) %>%
  slice_head(n = 40) %>%
  select(gene, logFClineage1, waldStat, padj, direction) %>%
  mutate(across(where(is.numeric), ~round(.x, 4))) %>%
  kable(caption = "Top 40 start-vs-end genes (ranked by |logFC|)")

Top 40 start-vs-end genes (ranked by |logFC|)
gene	logFClineage1	waldStat	padj	direction
Itgad	-2.9457	11.9779	0.0055	Up at BM start
Adamtsl5	2.8682	20.4097	0.0002	Up at CNS end
Lyve1	2.6760	8.6924	0.0194	Up at CNS end
Gstm2	2.6489	23.0931	0.0000	Up at CNS end
Ccnd2	-2.6180	14.5589	0.0019	Up at BM start
Zswim8	-2.5428	8.8083	0.0186	Up at BM start
Akr1b8	2.5335	84.1452	0.0000	Up at CNS end
Smim1	2.4026	8.6082	0.0201	Up at CNS end
Cd209f	-2.3774	10.6871	0.0091	Up at BM start
Prnp	-2.2791	11.9348	0.0055	Up at BM start
Gbe1	2.1072	33.3001	0.0000	Up at CNS end
Selenom	1.9875	31.1851	0.0000	Up at CNS end
C3	-1.9644	19.1925	0.0003	Up at BM start
F13a1	1.9278	77.8909	0.0000	Up at CNS end
S100a6	1.9234	23.3230	0.0000	Up at CNS end
Srxn1	1.8002	39.4293	0.0000	Up at CNS end
Mrap	-1.7799	24.1869	0.0000	Up at BM start
Ski	-1.7529	9.0315	0.0171	Up at BM start
S100a4	1.7422	16.7860	0.0008	Up at CNS end
Clec4d	1.7384	62.0723	0.0000	Up at CNS end
Prdx1	1.7028	283.6154	0.0000	Up at CNS end
Maoa	1.6820	22.8418	0.0001	Up at CNS end
S100a10	1.6377	59.3892	0.0000	Up at CNS end
Cbr3	1.5778	32.0446	0.0000	Up at CNS end
Mt2	1.5185	62.9749	0.0000	Up at CNS end
Rnf19b	-1.4949	10.2058	0.0109	Up at BM start
Esd	1.4648	117.7728	0.0000	Up at CNS end
Abcg3	-1.4398	18.3847	0.0004	Up at BM start
Pira2	-1.4266	12.2960	0.0047	Up at BM start
Arhgap15	-1.4191	8.7897	0.0187	Up at BM start
S100a11	1.4074	40.9119	0.0000	Up at CNS end
Chchd10	1.3841	29.8393	0.0000	Up at CNS end
Ppp3cb	-1.3802	7.7640	0.0280	Up at BM start
Camk1d	-1.3619	61.7830	0.0000	Up at BM start
Mmp12	1.3570	9.8485	0.0125	Up at CNS end
Stab2	-1.3540	10.0549	0.0115	Up at BM start
Axl	-1.3417	51.1273	0.0000	Up at BM start
Anxa1	1.3284	27.7306	0.0000	Up at CNS end
Thoc2	-1.3110	6.8503	0.0394	Up at BM start
Tspan10	-1.2973	30.8553	0.0000	Up at BM start

5. Gene Clustering by Dynamics

# ── Pull smoothed fitted values for sig. genes ───────────────────────────────
sig_genes <- assoc_res %>%
  filter(sig) %>%
  pull(gene)

cat("Clustering", length(sig_genes), "significantly dynamic genes...\n")

## Clustering 810 significantly dynamic genes...

# Get smoothed expression values at 100 pseudotime points
yhat <- predictSmooth(sce, gene = sig_genes, nPoints = 100, tidy = FALSE)

# Scale each gene to [0,1] for clustering
yhat_scaled <- t(apply(yhat, 1, function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / diff(rng)
}))

# k-means clustering into 6 expression dynamics groups
set.seed(42)
n_clusters  <- 6
km_fit      <- kmeans(yhat_scaled, centers = n_clusters, nstart = 25, iter.max = 100)
gene_cluster <- data.frame(
  gene    = sig_genes,
  cluster = km_fit$cluster
)

# Assign interpretable labels based on trajectory shape
# (will auto-label after inspection — update if needed)
cluster_labels <- c(
  "1" = "Early-high / lost",
  "2" = "Late-gained",
  "3" = "Transient mid",
  "4" = "Monotone increase",
  "5" = "Monotone decrease",
  "6" = "Late surge"
)

gene_cluster <- gene_cluster %>%
  mutate(cluster_label = cluster_labels[as.character(cluster)])

cat("\nGenes per cluster:\n")

## 
## Genes per cluster:

print(table(gene_cluster$cluster_label))

## 
## Early-high / lost        Late surge       Late-gained Monotone decrease 
##               130               179                67               141 
## Monotone increase     Transient mid 
##               127               166

6. Publication Figures

Fig 1 — Smooth Expression Curves: Top Dynamic Genes

# ── Select top genes per cluster for display ─────────────────────────────────
top_per_cluster <- gene_cluster %>%
  left_join(assoc_res %>% select(gene, waldStat_1, padj), by = "gene") %>%
  group_by(cluster_label) %>%
  slice_max(waldStat_1, n = 4) %>%
  ungroup()

display_genes <- top_per_cluster$gene

# Get predicted smooth values in tidy format
smooth_tidy <- predictSmooth(sce, gene = display_genes, nPoints = 100, tidy = TRUE)

# Add cell-level observed expression + pseudotime for jitter layer
obs_df <- data.frame(
  gene        = rep(display_genes, each = ncol(myeloid_traj)),
  pseudotime  = rep(pseudotime_vec, times = length(display_genes)),
  expression  = as.numeric(
    t(GetAssayData(myeloid_traj, assay = "originalexp", layer = "data")[display_genes, ])
  ),
  niche_state = rep(myeloid_traj[[niche_state_col]][[1]], times = length(display_genes))
) %>%
  filter(!is.na(pseudotime))

# Merge cluster label for facet ordering
smooth_tidy <- smooth_tidy %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")

obs_df <- obs_df %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")

ggplot() +
  # Jittered observed expression coloured by niche state
  geom_point(data = obs_df,
             aes(x = pseudotime, y = expression, colour = niche_state),
             alpha = 0.25, size = 0.7, stroke = 0) +
  # Smooth GAM fit
  geom_line(data = smooth_tidy,
            aes(x = time, y = yhat),
            colour = "black", linewidth = 1.1) +
  scale_colour_manual(values = niche_cols, name = "Niche state") +
  facet_wrap(~gene, scales = "free_y", ncol = 4) +
  labs(
    x        = "Pseudotime (BM → CNS)",
    y        = "Normalised expression",
    title    = "Gene Expression Dynamics Along BM → CNS Macrophage Transition",
    subtitle = "GAM smooth fit ± SE; points coloured by niche state"
  ) +
  theme_pub +
  theme(
    strip.text      = element_text(face = "italic", size = 10),
    legend.position = "bottom",
    legend.key.size = unit(0.4, "cm")
  )

ggsave(file.path(cache_dir, "Fig_smooth_curves_top_genes.pdf"),
       width = 16, height = 14)

Fig 2 — Heatmap of All Significant Genes by Cluster

# ── Build matrix: genes × pseudotime points, scaled per gene ─────────────────
hm_genes   <- gene_cluster$gene
yhat_hm    <- predictSmooth(sce, gene = hm_genes, nPoints = 100, tidy = FALSE)

yhat_hm_sc <- t(apply(yhat_hm, 1, function(x) {
  rng <- range(x, na.rm = TRUE)
  if (diff(rng) == 0) return(rep(0, length(x)))
  (x - rng[1]) / diff(rng)
}))

# Order genes within each cluster by peak pseudotime
gene_order <- gene_cluster %>%
  mutate(
    peak_pt = apply(yhat_hm_sc[gene, ], 1, which.max)
  ) %>%
  arrange(cluster, peak_pt) %>%
  pull(gene)

# Annotation colour for clusters
cluster_cols <- setNames(
  brewer.pal(n_clusters, "Set2"),
  unique(gene_cluster$cluster_label[match(1:n_clusters,
                                          gene_cluster$cluster)])
)

row_ann <- rowAnnotation(
  Cluster = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
  col = list(Cluster = cluster_cols),
  annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
  simple_anno_size = unit(0.5, "cm")
)

# Column (pseudotime) annotation
col_ann <- HeatmapAnnotation(
  Pseudotime = anno_lines(
    seq(0, 1, length.out = 100),
    gp = gpar(col = "grey30", lwd = 1.5),
    axis = FALSE
  ),
  annotation_name_gp = gpar(fontsize = 9)
)

col_fun <- colorRamp2(
  c(0, 0.5, 1),
  c("#4575B4", "#FFFFBF", "#D73027")
)

Heatmap(
  yhat_hm_sc[gene_order, ],
  name            = "Scaled\nExpression",
  col             = col_fun,
  cluster_rows    = FALSE,
  cluster_columns = FALSE,
  show_row_names  = length(gene_order) <= 150,   # hide names if too many genes
  row_names_gp    = gpar(fontsize = 6, fontface = "italic"),
  show_column_names = FALSE,
  column_title    = "Pseudotime  (BM start → CNS end)",
  column_title_gp = gpar(fontsize = 11, fontface = "bold"),
  left_annotation = row_ann,
  top_annotation  = col_ann,
  heatmap_legend_param = list(
    title_gp     = gpar(fontsize = 9, fontface = "bold"),
    labels_gp    = gpar(fontsize = 8),
    legend_height = unit(3, "cm")
  ),
  row_split       = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
  row_title_gp    = gpar(fontsize = 9, fontface = "bold"),
  border          = FALSE
)

Fig 3 — Volcano Plot: Start vs End

# Label top genes in each direction
top_up   <- se_res %>% filter(direction == "Up at CNS end") %>%
              slice_max(logFClineage1, n = 15) %>% pull(gene)
top_down <- se_res %>% filter(direction == "Up at BM start") %>%
              slice_min(logFClineage1, n = 15) %>% pull(gene)
label_genes <- c(top_up, top_down)

volcano_df <- se_res %>%
  mutate(
    log10p = -log10(pmax(padj, 1e-100)),
    label  = ifelse(gene %in% label_genes, gene, NA)
  )

ggplot(volcano_df, aes(x = logFClineage1, y = log10p)) +
  geom_point(aes(colour = direction), alpha = 0.5, size = 1.2) +
  geom_text_repel(
    data = volcano_df %>% filter(!is.na(label)),
    aes(label = label),
    fontface  = "italic",
    size      = 3,
    max.overlaps = 30,
    segment.colour = "grey60",
    segment.size   = 0.3,
    box.padding    = 0.35
  ) +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed",
             colour = "grey40", linewidth = 0.5) +
  geom_vline(xintercept = c(-log2(1.5), log2(1.5)), linetype = "dashed",
             colour = "grey40", linewidth = 0.5) +
  scale_colour_manual(
    values = c(
      "Up at CNS end"   = "#B2182B",
      "Up at BM start"  = "#4393C3",
      "NS"              = "grey75"
    ),
    name = NULL
  ) +
  scale_x_continuous(
    labels = function(x) ifelse(x > 0, paste0("+", round(x, 1)), round(x, 1))
  ) +
  labs(
    x        = expression(log[2]*" fold change (CNS end / BM start)"),
    y        = expression(-log[10]*" adjusted p-value"),
    title    = "Macrophage Reprogramming: BM Start vs CNS End",
    subtitle = "Trajectory start vs end test (tradeSeq); FDR < 0.05 threshold shown"
  ) +
  theme_pub +
  theme(legend.position = "top")

ggsave(file.path(cache_dir, "Fig_volcano_start_vs_end.pdf"),
       width = 9, height = 7)

7. GO Enrichment by Trajectory Cluster

# Run gProfiler2 on each cluster separately
go_results <- lapply(unique(gene_cluster$cluster_label), function(cl) {
  genes <- gene_cluster %>% filter(cluster_label == cl) %>% pull(gene)
  if (length(genes) < 5) return(NULL)

  res <- gost(
    query           = genes,
    organism        = "mmusculus",
    sources         = c("GO:BP", "GO:MF", "KEGG", "REACTOME"),
    evcodes         = FALSE,
    correction_method = "fdr",
    user_threshold  = 0.05
  )

  if (is.null(res)) return(NULL)

  res$result %>%
    select(term_name, term_id, source, p_value, term_size, intersection_size) %>%
    mutate(cluster = cl) %>%
    arrange(p_value)
})

go_all <- bind_rows(go_results)

# Show top 5 terms per cluster
go_all %>%
  group_by(cluster) %>%
  slice_head(n = 5) %>%
  ungroup() %>%
  select(cluster, term_name, source, p_value, intersection_size) %>%
  mutate(p_value = signif(p_value, 3)) %>%
  kable(caption = "Top GO/pathway terms per gene expression cluster")

Top GO/pathway terms per gene expression cluster
cluster	term_name	source	p_value	intersection_size
Early-high / lost	catabolic process	GO:BP	0.00e+00	49
Early-high / lost	small molecule metabolic process	GO:BP	0.00e+00	38
Early-high / lost	catalytic activity	GO:MF	0.00e+00	75
Early-high / lost	oxidoreductase activity	GO:MF	0.00e+00	26
Early-high / lost	monosaccharide metabolic process	GO:BP	0.00e+00	17
Late surge	immune system process	GO:BP	0.00e+00	72
Late surge	immune response	GO:BP	0.00e+00	55
Late surge	regulation of immune system process	GO:BP	0.00e+00	46
Late surge	defense response	GO:BP	0.00e+00	47
Late surge	adaptive immune response	GO:BP	0.00e+00	27
Late-gained	defense response	GO:BP	0.00e+00	28
Late-gained	response to other organism	GO:BP	0.00e+00	25
Late-gained	biological process involved in interspecies interaction between organisms	GO:BP	0.00e+00	26
Late-gained	response to external biotic stimulus	GO:BP	0.00e+00	25
Late-gained	immune response	GO:BP	0.00e+00	26
Monotone decrease	Lysosome	KEGG	0.00e+00	24
Monotone decrease	establishment of localization	GO:BP	0.00e+00	76
Monotone decrease	transport	GO:BP	0.00e+00	74
Monotone decrease	localization	GO:BP	0.00e+00	80
Monotone decrease	regulation of immune system process	GO:BP	0.00e+00	43
Monotone increase	endosomal transport	GO:BP	0.00e+00	16
Monotone increase	vesicle-mediated transport	GO:BP	0.00e+00	31
Monotone increase	Endocytosis	KEGG	2.00e-07	14
Monotone increase	transport	GO:BP	4.00e-07	50
Monotone increase	intracellular transport	GO:BP	4.00e-07	26
Transient mid	Proteasome	KEGG	2.00e-07	9
Transient mid	cellular localization	GO:BP	1.20e-06	50
Transient mid	protein binding	GO:MF	1.30e-06	101
Transient mid	cytoskeletal protein binding	GO:MF	1.60e-05	23
Transient mid	establishment of localization in cell	GO:BP	2.19e-05	34

# ── Dot plot: top terms per cluster ──────────────────────────────────────────
go_plot_df <- go_all %>%
  group_by(cluster) %>%
  slice_head(n = 6) %>%
  ungroup() %>%
  mutate(
    log10p     = -log10(p_value),
    GeneRatio  = intersection_size / term_size,
    term_name  = stringr::str_wrap(term_name, 40)
  )

ggplot(go_plot_df,
       aes(x = cluster, y = reorder(term_name, log10p),
           size = GeneRatio, colour = log10p)) +
  geom_point() +
  scale_colour_gradientn(
    colours = c("#FDAE61", "#D73027", "#A50026"),
    name    = expression(-log[10]*p)
  ) +
  scale_size_continuous(name = "Gene ratio", range = c(2, 8)) +
  labs(
    x     = "Expression cluster",
    y     = NULL,
    title = "GO / Pathway Enrichment by Pseudotime Gene Cluster"
  ) +
  theme_pub +
  theme(
    axis.text.x  = element_text(angle = 35, hjust = 1, size = 9),
    axis.text.y  = element_text(size = 9),
    panel.grid.major.x = element_line(colour = "grey88")
  )

ggsave(file.path(cache_dir, "Fig_GO_dotplot_by_cluster.pdf"),
       width = 13, height = 10)

8. Key Genes Summary Table

# Merge association test + start-vs-end test + cluster assignment
summary_tbl <- assoc_res %>%
  filter(sig) %>%
  select(gene, waldStat_assoc = waldStat_1, padj_assoc = padj) %>%
  left_join(
    se_res %>% select(gene, logFC_CNS_vs_BM = logFClineage1,
                      padj_sve = padj, direction),
    by = "gene"
  ) %>%
  left_join(gene_cluster %>% select(gene, cluster_label), by = "gene") %>%
  arrange(desc(abs(logFC_CNS_vs_BM))) %>%
  mutate(across(where(is.numeric), ~round(.x, 4)))

write.csv(summary_tbl,
          file.path(cache_dir, "13d_pseudotime_DE_results.csv"),
          row.names = FALSE)

summary_tbl %>%
  slice_head(n = 50) %>%
  kable(caption = "Top 50 dynamic genes: merged association + start-vs-end results")

Top 50 dynamic genes: merged association + start-vs-end results
gene	waldStat_assoc	padj_assoc	logFC_CNS_vs_BM	padj_sve	direction	cluster_label
Adamtsl5	38.1662	0.0000	2.8682	0.0002	Up at CNS end	Transient mid
Lyve1	46.6962	0.0000	2.6760	0.0194	Up at CNS end	Late-gained
Gstm2	57.1275	0.0000	2.6489	0.0000	Up at CNS end	Early-high / lost
Ccnd2	20.2038	0.0036	-2.6180	0.0019	Up at BM start	Monotone decrease
Akr1b8	224.4304	0.0000	2.5335	0.0000	Up at CNS end	Early-high / lost
Smim1	21.0625	0.0009	2.4026	0.0201	Up at CNS end	Monotone increase
Cd209f	123.3976	0.0000	-2.3774	0.0091	Up at BM start	Late-gained
Prnp	15.5021	0.0239	-2.2791	0.0055	Up at BM start	Late surge
Gbe1	91.0071	0.0000	2.1072	0.0000	Up at CNS end	Early-high / lost
Selenom	74.8904	0.0000	1.9875	0.0000	Up at CNS end	Transient mid
C3	53.9416	0.0000	-1.9644	0.0003	Up at BM start	Late surge
F13a1	789.2216	0.0000	1.9278	0.0000	Up at CNS end	Monotone increase
S100a6	93.6406	0.0000	1.9234	0.0000	Up at CNS end	Transient mid
Srxn1	187.1373	0.0000	1.8002	0.0000	Up at CNS end	Early-high / lost
Mrap	67.9125	0.0000	-1.7799	0.0000	Up at BM start	Late surge
S100a4	64.7953	0.0000	1.7422	0.0008	Up at CNS end	Transient mid
Clec4d	200.2591	0.0000	1.7384	0.0000	Up at CNS end	Early-high / lost
Prdx1	775.9843	0.0000	1.7028	0.0000	Up at CNS end	Early-high / lost
Maoa	65.1603	0.0000	1.6820	0.0001	Up at CNS end	Early-high / lost
S100a10	246.7843	0.0000	1.6377	0.0000	Up at CNS end	Transient mid
Cbr3	51.0573	0.0000	1.5778	0.0000	Up at CNS end	Early-high / lost
Mt2	293.3153	0.0000	1.5185	0.0000	Up at CNS end	Early-high / lost
Rgs18	28.2941	0.0001	1.4839	0.0516	NS	Monotone increase
Esd	340.2193	0.0000	1.4648	0.0000	Up at CNS end	Early-high / lost
Abcg3	56.3380	0.0000	-1.4398	0.0004	Up at BM start	Late surge
Pira2	33.2112	0.0000	-1.4266	0.0047	Up at BM start	Late surge
S100a11	95.1629	0.0000	1.4074	0.0000	Up at CNS end	Early-high / lost
Chchd10	76.4537	0.0000	1.3841	0.0000	Up at CNS end	Transient mid
Camk1d	86.9442	0.0000	-1.3619	0.0000	Up at BM start	Monotone decrease
Mmp12	40.1056	0.0000	1.3570	0.0125	Up at CNS end	Late surge
Stab2	25.7148	0.0004	-1.3540	0.0115	Up at BM start	Late surge
Ifit3	25.7624	0.0003	1.3430	0.1081	NS	Late-gained
Axl	288.9259	0.0000	-1.3417	0.0000	Up at BM start	Late surge
Anxa1	27.6325	0.0002	1.3284	0.0000	Up at CNS end	Early-high / lost
Susd1	16.4412	0.0164	-1.2726	0.0021	Up at BM start	Late-gained
Vcam1	766.2468	0.0000	-1.2347	0.0000	Up at BM start	Late surge
Tuba4a	37.7111	0.0000	1.2225	0.0039	Up at CNS end	Early-high / lost
Crip2	55.1452	0.0000	-1.2212	0.0002	Up at BM start	Late surge
Bnip3	62.5282	0.0000	1.2205	0.0000	Up at CNS end	Early-high / lost
Cd5l	385.2321	0.0000	-1.2181	0.0000	Up at BM start	Late surge
Slc43a2	64.9738	0.0000	-1.2180	0.0000	Up at BM start	Monotone decrease
Fxyd2	72.7558	0.0000	1.1991	0.0002	Up at CNS end	Transient mid
Slc8b1	14.4239	0.0358	-1.1881	0.0075	Up at BM start	Late surge
Pstpip1	41.3718	0.0000	1.1877	0.0010	Up at CNS end	Transient mid
Slc12a7	22.3011	0.0015	-1.1846	0.0101	Up at BM start	Monotone decrease
Myo10	115.4465	0.0000	-1.1794	0.0000	Up at BM start	Late surge
Prdx6	72.2733	0.0000	1.1670	0.0000	Up at CNS end	Early-high / lost
Bin1	214.8838	0.0000	1.1454	0.0000	Up at CNS end	Monotone increase
Sel1l	28.1588	0.0001	-1.1450	0.0012	Up at BM start	Monotone decrease
Mt1	405.8848	0.0000	1.1402	0.0000	Up at CNS end	Early-high / lost

Session Information

sessionInfo()

## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.4.0                viridis_0.6.5              
##  [3] viridisLite_0.4.3           knitr_1.51                 
##  [5] gprofiler2_0.2.4            ggrepel_0.9.8              
##  [7] RColorBrewer_1.1-3          circlize_0.4.17            
##  [9] ComplexHeatmap_2.25.3       patchwork_1.3.2            
## [11] ggplot2_4.0.2               tidyr_1.3.2                
## [13] dplyr_1.2.0                 SingleCellExperiment_1.28.1
## [15] SummarizedExperiment_1.36.0 Biobase_2.66.0             
## [17] GenomicRanges_1.58.0        GenomeInfoDb_1.42.3        
## [19] IRanges_2.40.1              S4Vectors_0.44.0           
## [21] BiocGenerics_0.52.0         MatrixGenerics_1.18.1      
## [23] matrixStats_1.5.0           tradeSeq_1.20.0            
## [25] qs2_0.1.7                   Seurat_5.4.0               
## [27] SeuratObject_5.3.0          sp_2.2-1                   
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.23        TrajectoryUtils_1.14.0  slingshot_2.14.0       
##   [4] splines_4.4.2           later_1.4.8             bitops_1.0-9           
##   [7] tibble_3.3.1            polyclip_1.10-7         fastDummies_1.7.5      
##  [10] lifecycle_1.0.5         edgeR_4.4.2             doParallel_1.0.17      
##  [13] globals_0.19.1          lattice_0.22-9          MASS_7.3-65            
##  [16] magrittr_2.0.4          limma_3.62.2            plotly_4.12.0          
##  [19] sass_0.4.10             rmarkdown_2.30          jquerylib_0.1.4        
##  [22] yaml_2.3.12             httpuv_1.6.16           otel_0.2.0             
##  [25] sctransform_0.4.3       spam_2.11-3             spatstat.sparse_3.1-0  
##  [28] reticulate_1.45.0       cowplot_1.2.0           pbapply_1.7-4          
##  [31] abind_1.4-8             zlibbioc_1.52.0         Rtsne_0.17             
##  [34] purrr_1.2.1             RCurl_1.98-1.17         GenomeInfoDbData_1.2.13
##  [37] irlba_2.3.7             listenv_0.10.1          spatstat.utils_3.2-2   
##  [40] goftest_1.2-3           RSpectra_0.16-2         spatstat.random_3.4-4  
##  [43] fitdistrplus_1.2-6      parallelly_1.46.1       codetools_0.2-20       
##  [46] DelayedArray_0.32.0     shape_1.4.6.1           tidyselect_1.2.1       
##  [49] UCSC.utils_1.2.0        farver_2.1.2            spatstat.explore_3.7-0 
##  [52] jsonlite_2.0.0          GetoptLong_1.1.0        progressr_0.18.0       
##  [55] ggridges_0.5.7          survival_3.8-6          iterators_1.0.14       
##  [58] systemfonts_1.3.2       foreach_1.5.2           tools_4.4.2            
##  [61] ragg_1.5.1              ica_1.0-3               Rcpp_1.1.1             
##  [64] glue_1.8.0              gridExtra_2.3           SparseArray_1.6.2      
##  [67] xfun_0.56               mgcv_1.9-4              withr_3.0.2            
##  [70] fastmap_1.2.0           digest_0.6.39           R6_2.6.1               
##  [73] mime_0.13               textshaping_1.0.5       colorspace_2.1-2       
##  [76] Cairo_1.7-0             scattermore_1.2         tensor_1.5.1           
##  [79] dichromat_2.0-0.1       spatstat.data_3.1-9     generics_0.1.4         
##  [82] data.table_1.18.2.1     httr_1.4.8              htmlwidgets_1.6.4      
##  [85] S4Arrays_1.6.0          uwot_0.2.4              pkgconfig_2.0.3        
##  [88] gtable_0.3.6            lmtest_0.9-40           S7_0.2.1               
##  [91] XVector_0.46.0          htmltools_0.5.9         dotCall64_1.2          
##  [94] clue_0.3-67             png_0.1-9               spatstat.univar_3.1-6  
##  [97] rstudioapi_0.18.0       reshape2_1.4.5          rjson_0.2.23           
## [100] curl_7.0.0              nlme_3.1-168            cachem_1.1.0           
## [103] zoo_1.8-15              GlobalOptions_0.1.3     stringr_1.6.0          
## [106] KernSmooth_2.23-26      parallel_4.4.2          miniUI_0.1.2           
## [109] pillar_1.11.1           vctrs_0.7.1             RANN_2.6.2             
## [112] promises_1.5.0          stringfish_0.18.0       xtable_1.8-8           
## [115] cluster_2.1.8.2         princurve_2.1.6         evaluate_1.0.5         
## [118] cli_3.6.5               locfit_1.5-9.12         compiler_4.4.2         
## [121] rlang_1.1.7             crayon_1.5.3            future.apply_1.20.2    
## [124] labeling_0.4.3          plyr_1.8.9              stringi_1.8.7          
## [127] deldir_2.0-4            BiocParallel_1.40.2     lazyeval_0.2.2         
## [130] spatstat.geom_3.7-0     Matrix_1.7-4            RcppHNSW_0.6.0         
## [133] future_1.70.0           statmod_1.5.1           shiny_1.13.0           
## [136] ROCR_1.0-12             igraph_2.2.2            RcppParallel_5.1.11-2  
## [139] bslib_0.10.0

13d: tradeSeq — Pseudotime-Correlated Gene Expression

BM → CNS Macrophage Transition: Formal Statistical Modelling

2026-03-19