Overview
This script fits generalised additive models (GAMs) to the BM → CNS
macrophage pseudotime trajectory using tradeSeq,
providing statistically rigorous, publication-quality results for genes
whose expression changes along the transition.
Pipeline: 1. Load pseudotime trajectory object and counts 2. Fit GAMs
via fitGAM() 3. associationTest() — genes that
change significantly along pseudotime 4. startVsEndTest() —
genes that differ between trajectory start and end 5. Cluster genes by
expression dynamics → figure-ready heatmap 6. GO enrichment on gene
clusters 7. Publication figures: smooth curves, heatmap, dot plots
1. Setup
# Run once:
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("tradeSeq")
BiocManager::install("clusterExperiment")
install.packages("mgcv")
suppressPackageStartupMessages({
library(Seurat)
library(qs2)
library(tradeSeq)
library(SingleCellExperiment)
library(dplyr)
library(tidyr)
library(ggplot2)
library(patchwork)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(ggrepel)
library(gprofiler2)
library(knitr)
library(viridis)
library(scales)
})
base_path <- "/Users/alasdairduguid/Documents/Projects/BSH start up grant 2024 - scRNAseq miR128a/scRNAseq_proj/analysis_feb26"
cache_dir <- file.path(base_path, "data")
# ── Plot theme ──────────────────────────────────────────────────────────────
theme_pub <- theme_classic(base_size = 12) +
theme(
strip.background = element_blank(),
strip.text = element_text(face = "bold", size = 11),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(size = 10, colour = "grey40"),
legend.title = element_text(face = "bold"),
axis.line = element_line(colour = "grey30"),
panel.grid.major = element_line(colour = "grey92", linewidth = 0.3)
)
# Niche state palette (matching 13c plots)
niche_cols <- c(
"BAM_1 (CNS-adapted)" = "#B2182B",
"BM Mac (in BM)" = "#4393C3",
"BM Mac (in CNS)" = "#9970AB"
)
2. Load Data & Build tradeSeq Input
# ── Load the myeloid Seurat object ──────────────────────────────────────────
# Adjust filename if saved under a different name from 13c
myeloid <- qs_read(file.path(cache_dir, "13d_transition_with_pseudotime.qs"))
# Ensure the pseudotime and niche state columns are present.
# Adjust column names below to match what was used in 13c:
pseudotime_col <- "pseudotime" # numeric pseudotime column in meta.data
niche_state_col <- "niche_state" # categorical: BAM_1, BM Mac in BM/CNS
cat("Cells in object:", ncol(myeloid), "\n")
## Cells in object: 1147
cat("Pseudotime range:", range(myeloid[[pseudotime_col]][[1]], na.rm = TRUE), "\n")
## Pseudotime range: 0 6.202476
cat("\nNiche state breakdown:\n")
##
## Niche state breakdown:
print(table(myeloid[[niche_state_col]]))
## niche_state
## BAM_1 (CNS-adapted) BM Mac (in BM) BM Mac (in CNS)
## 565 499 83
# ── Keep only cells that are part of the trajectory ─────────────────────────
# (removes any cells with NA pseudotime)
cells_keep <- !is.na(myeloid[[pseudotime_col]][[1]])
myeloid_traj <- myeloid[, cells_keep]
cat("Cells retained for trajectory analysis:", ncol(myeloid_traj), "\n")
## Cells retained for trajectory analysis: 1147
# ── Extract inputs for tradeSeq ──────────────────────────────────────────────
counts_mat <- GetAssayData(myeloid_traj, assay = "originalexp", layer = "counts")
pseudotime_vec <- myeloid_traj[[pseudotime_col]][[1]]
# For a single lineage: pseudotime matrix = n_cells × 1, weights = all 1s
pt_matrix <- matrix(pseudotime_vec, ncol = 1,
dimnames = list(colnames(myeloid_traj), "Lineage1"))
wt_matrix <- matrix(1, nrow = ncol(myeloid_traj), ncol = 1,
dimnames = list(colnames(myeloid_traj), "Lineage1"))
cat("\nPseudotime matrix dim:", dim(pt_matrix), "\n")
##
## Pseudotime matrix dim: 1147 1
cat("Counts matrix dim:", dim(counts_mat), "\n")
## Counts matrix dim: 26694 1147
# ── Gene filtering: expressed in ≥ 5% of trajectory cells ───────────────────
pct_expressed <- rowSums(counts_mat > 0) / ncol(counts_mat)
genes_keep <- names(pct_expressed[pct_expressed >= 0.05])
cat("Genes passing filter:", length(genes_keep), "of", nrow(counts_mat), "\n")
## Genes passing filter: 6267 of 26694
counts_filt <- counts_mat[genes_keep, ]
3. Fit GAMs with tradeSeq
# ── Select number of knots via AIC ───────────────────────────────────────────
# Sample 200 genes for speed; real run uses full set
set.seed(42)
sample_genes <- sample(genes_keep, min(200, length(genes_keep)))
aic_res <- evaluateK(
counts = counts_filt[sample_genes, ],
pseudotime = pt_matrix,
cellWeights = wt_matrix,
k = 3:8,
nGenes = 200,
verbose = FALSE
)

# Plot AIC vs k (tradeSeq built-in)
# Recommended k is typically where AIC stabilises
# ── Fit GAMs ─────────────────────────────────────────────────────────────────
# Set k based on evaluateK output above (typically 5-6 for a single trajectory)
# Adjust k if the AIC plot suggests otherwise
k_use <- 6
cat("Fitting GAMs with k =", k_use, "knots on", length(genes_keep), "genes...\n")
## Fitting GAMs with k = 6 knots on 6267 genes...
cat("(This takes ~20–40 min on a standard server. Run on Eddie.)\n")
## (This takes ~20–40 min on a standard server. Run on Eddie.)
sce <- fitGAM(
counts = counts_filt,
pseudotime = pt_matrix,
cellWeights = wt_matrix,
nknots = k_use,
verbose = TRUE,
parallel = TRUE # uses BiocParallel; set BPPARAM if needed
)
## | | | 0% | |== | 3% | |==== | 5% | |===== | 8% | |======= | 10% | |========= | 13% | |=========== | 15% | |============ | 18% | |============== | 20% | |================ | 23% | |================== | 25% | |=================== | 28% | |===================== | 30% | |======================= | 33% | |========================= | 35% | |========================== | 38% | |============================ | 40% | |============================== | 43% | |================================ | 45% | |================================= | 48% | |=================================== | 50% | |===================================== | 53% | |======================================= | 55% | |======================================== | 58% | |========================================== | 60% | |============================================ | 63% | |============================================== | 65% | |=============================================== | 68% | |================================================= | 70% | |=================================================== | 73% | |===================================================== | 75% | |====================================================== | 78% | |======================================================== | 80% | |========================================================== | 82% | |=========================================================== | 85% | |============================================================= | 87% | |=============================================================== | 90% | |================================================================= | 92% | |================================================================== | 95% | |==================================================================== | 97% | |======================================================================| 100%
# Save immediately
qs_save(sce, file.path(cache_dir, "13d_tradeseq_sce.qs"))
cat("GAM fit saved.\n")
## GAM fit saved.
# ── If re-running after GAM fit: load cached SCE ────────────────────────────
sce <- qs_read(file.path(cache_dir, "13d_tradeseq_sce.qs"))
4. Statistical Tests
4a — Association Test (changes along pseudotime)
# Tests whether smoothed expression changes significantly along pseudotime
# (Wald test on GAM coefficients; FDR-corrected)
assoc_res <- associationTest(sce, lineages = TRUE, l2fc = log2(1.5))
assoc_res <- assoc_res %>%
tibble::rownames_to_column("gene") %>%
arrange(pvalue_1) %>%
mutate(
padj = p.adjust(pvalue_1, method = "BH"),
sig = padj < 0.05 & !is.na(padj)
)
cat("Genes significantly associated with pseudotime (FDR < 0.05):",
sum(assoc_res$sig, na.rm = TRUE), "\n")
## Genes significantly associated with pseudotime (FDR < 0.05): 810
assoc_res %>%
filter(sig) %>%
slice_head(n = 30) %>%
select(gene, waldStat_1, df_1, pvalue_1, padj) %>%
mutate(across(where(is.numeric), ~round(.x, 4))) %>%
kable(caption = "Top 30 pseudotime-associated genes (association test)")
Top 30 pseudotime-associated genes (association test)
| Slc40a1 |
118.6676 |
4 |
0 |
0 |
| Camk1d |
86.9442 |
4 |
0 |
0 |
| Mrc1 |
85.9262 |
4 |
0 |
0 |
| Fcna |
214.6035 |
4 |
0 |
0 |
| Nr1h3 |
126.0166 |
4 |
0 |
0 |
| Cd82 |
112.8044 |
4 |
0 |
0 |
| B2m |
124.5736 |
4 |
0 |
0 |
| Sirpa |
215.1976 |
4 |
0 |
0 |
| Srxn1 |
187.1373 |
4 |
0 |
0 |
| Ctsa |
90.0110 |
4 |
0 |
0 |
| Fabp5 |
107.2492 |
4 |
0 |
0 |
| Cd5l |
385.2321 |
4 |
0 |
0 |
| S100a6 |
93.6406 |
4 |
0 |
0 |
| S100a11 |
95.1629 |
4 |
0 |
0 |
| S100a10 |
246.7843 |
4 |
0 |
0 |
| Ctss |
121.5157 |
4 |
0 |
0 |
| Vcam1 |
766.2468 |
4 |
0 |
0 |
| Gclm |
119.5957 |
4 |
0 |
0 |
| Atp6v0d2 |
161.0261 |
4 |
0 |
0 |
| Txn1 |
157.3168 |
4 |
0 |
0 |
| Ptgr1 |
95.5842 |
4 |
0 |
0 |
| Plin2 |
93.5091 |
4 |
0 |
0 |
| Osbpl9 |
166.1383 |
4 |
0 |
0 |
| Eps15 |
104.6871 |
4 |
0 |
0 |
| Akr1a1 |
262.3329 |
4 |
0 |
0 |
| Prdx1 |
775.9843 |
4 |
0 |
0 |
| Sdc3 |
262.7220 |
4 |
0 |
0 |
| Laptm5 |
90.4100 |
4 |
0 |
0 |
| C1qc |
102.4812 |
4 |
0 |
0 |
| C1qa |
113.6652 |
4 |
0 |
0 |
4b — Start vs End Test
# Tests whether expression differs between trajectory start (BM) and end (CNS-adapted)
se_res <- startVsEndTest(sce, pseudotimeValues = c(0, 1)) # 0=start, 1=end (scaled)
se_res <- se_res %>%
tibble::rownames_to_column("gene") %>%
arrange(pvalue) %>%
mutate(
padj = p.adjust(pvalue, method = "BH"),
sig = padj < 0.05 & !is.na(padj),
direction = case_when(
sig & logFClineage1 > 0 ~ "Up at CNS end",
sig & logFClineage1 < 0 ~ "Up at BM start",
TRUE ~ "NS"
)
)
cat("Start vs End — genes UP at CNS end (FDR < 0.05):",
sum(se_res$direction == "Up at CNS end"), "\n")
## Start vs End — genes UP at CNS end (FDR < 0.05): 1127
cat("Start vs End — genes UP at BM start (FDR < 0.05):",
sum(se_res$direction == "Up at BM start"), "\n")
## Start vs End — genes UP at BM start (FDR < 0.05): 443
se_res %>%
filter(sig) %>%
arrange(desc(abs(logFClineage1))) %>%
slice_head(n = 40) %>%
select(gene, logFClineage1, waldStat, padj, direction) %>%
mutate(across(where(is.numeric), ~round(.x, 4))) %>%
kable(caption = "Top 40 start-vs-end genes (ranked by |logFC|)")
Top 40 start-vs-end genes (ranked by |logFC|)
| Itgad |
-2.9457 |
11.9779 |
0.0055 |
Up at BM start |
| Adamtsl5 |
2.8682 |
20.4097 |
0.0002 |
Up at CNS end |
| Lyve1 |
2.6760 |
8.6924 |
0.0194 |
Up at CNS end |
| Gstm2 |
2.6489 |
23.0931 |
0.0000 |
Up at CNS end |
| Ccnd2 |
-2.6180 |
14.5589 |
0.0019 |
Up at BM start |
| Zswim8 |
-2.5428 |
8.8083 |
0.0186 |
Up at BM start |
| Akr1b8 |
2.5335 |
84.1452 |
0.0000 |
Up at CNS end |
| Smim1 |
2.4026 |
8.6082 |
0.0201 |
Up at CNS end |
| Cd209f |
-2.3774 |
10.6871 |
0.0091 |
Up at BM start |
| Prnp |
-2.2791 |
11.9348 |
0.0055 |
Up at BM start |
| Gbe1 |
2.1072 |
33.3001 |
0.0000 |
Up at CNS end |
| Selenom |
1.9875 |
31.1851 |
0.0000 |
Up at CNS end |
| C3 |
-1.9644 |
19.1925 |
0.0003 |
Up at BM start |
| F13a1 |
1.9278 |
77.8909 |
0.0000 |
Up at CNS end |
| S100a6 |
1.9234 |
23.3230 |
0.0000 |
Up at CNS end |
| Srxn1 |
1.8002 |
39.4293 |
0.0000 |
Up at CNS end |
| Mrap |
-1.7799 |
24.1869 |
0.0000 |
Up at BM start |
| Ski |
-1.7529 |
9.0315 |
0.0171 |
Up at BM start |
| S100a4 |
1.7422 |
16.7860 |
0.0008 |
Up at CNS end |
| Clec4d |
1.7384 |
62.0723 |
0.0000 |
Up at CNS end |
| Prdx1 |
1.7028 |
283.6154 |
0.0000 |
Up at CNS end |
| Maoa |
1.6820 |
22.8418 |
0.0001 |
Up at CNS end |
| S100a10 |
1.6377 |
59.3892 |
0.0000 |
Up at CNS end |
| Cbr3 |
1.5778 |
32.0446 |
0.0000 |
Up at CNS end |
| Mt2 |
1.5185 |
62.9749 |
0.0000 |
Up at CNS end |
| Rnf19b |
-1.4949 |
10.2058 |
0.0109 |
Up at BM start |
| Esd |
1.4648 |
117.7728 |
0.0000 |
Up at CNS end |
| Abcg3 |
-1.4398 |
18.3847 |
0.0004 |
Up at BM start |
| Pira2 |
-1.4266 |
12.2960 |
0.0047 |
Up at BM start |
| Arhgap15 |
-1.4191 |
8.7897 |
0.0187 |
Up at BM start |
| S100a11 |
1.4074 |
40.9119 |
0.0000 |
Up at CNS end |
| Chchd10 |
1.3841 |
29.8393 |
0.0000 |
Up at CNS end |
| Ppp3cb |
-1.3802 |
7.7640 |
0.0280 |
Up at BM start |
| Camk1d |
-1.3619 |
61.7830 |
0.0000 |
Up at BM start |
| Mmp12 |
1.3570 |
9.8485 |
0.0125 |
Up at CNS end |
| Stab2 |
-1.3540 |
10.0549 |
0.0115 |
Up at BM start |
| Axl |
-1.3417 |
51.1273 |
0.0000 |
Up at BM start |
| Anxa1 |
1.3284 |
27.7306 |
0.0000 |
Up at CNS end |
| Thoc2 |
-1.3110 |
6.8503 |
0.0394 |
Up at BM start |
| Tspan10 |
-1.2973 |
30.8553 |
0.0000 |
Up at BM start |
5. Gene Clustering by Dynamics
# ── Pull smoothed fitted values for sig. genes ───────────────────────────────
sig_genes <- assoc_res %>%
filter(sig) %>%
pull(gene)
cat("Clustering", length(sig_genes), "significantly dynamic genes...\n")
## Clustering 810 significantly dynamic genes...
# Get smoothed expression values at 100 pseudotime points
yhat <- predictSmooth(sce, gene = sig_genes, nPoints = 100, tidy = FALSE)
# Scale each gene to [0,1] for clustering
yhat_scaled <- t(apply(yhat, 1, function(x) {
rng <- range(x, na.rm = TRUE)
if (diff(rng) == 0) return(rep(0, length(x)))
(x - rng[1]) / diff(rng)
}))
# k-means clustering into 6 expression dynamics groups
set.seed(42)
n_clusters <- 6
km_fit <- kmeans(yhat_scaled, centers = n_clusters, nstart = 25, iter.max = 100)
gene_cluster <- data.frame(
gene = sig_genes,
cluster = km_fit$cluster
)
# Assign interpretable labels based on trajectory shape
# (will auto-label after inspection — update if needed)
cluster_labels <- c(
"1" = "Early-high / lost",
"2" = "Late-gained",
"3" = "Transient mid",
"4" = "Monotone increase",
"5" = "Monotone decrease",
"6" = "Late surge"
)
gene_cluster <- gene_cluster %>%
mutate(cluster_label = cluster_labels[as.character(cluster)])
cat("\nGenes per cluster:\n")
##
## Genes per cluster:
print(table(gene_cluster$cluster_label))
##
## Early-high / lost Late surge Late-gained Monotone decrease
## 130 179 67 141
## Monotone increase Transient mid
## 127 166
6. Publication Figures
Fig 1 — Smooth Expression Curves: Top Dynamic Genes
# ── Select top genes per cluster for display ─────────────────────────────────
top_per_cluster <- gene_cluster %>%
left_join(assoc_res %>% select(gene, waldStat_1, padj), by = "gene") %>%
group_by(cluster_label) %>%
slice_max(waldStat_1, n = 4) %>%
ungroup()
display_genes <- top_per_cluster$gene
# Get predicted smooth values in tidy format
smooth_tidy <- predictSmooth(sce, gene = display_genes, nPoints = 100, tidy = TRUE)
# Add cell-level observed expression + pseudotime for jitter layer
obs_df <- data.frame(
gene = rep(display_genes, each = ncol(myeloid_traj)),
pseudotime = rep(pseudotime_vec, times = length(display_genes)),
expression = as.numeric(
t(GetAssayData(myeloid_traj, assay = "originalexp", layer = "data")[display_genes, ])
),
niche_state = rep(myeloid_traj[[niche_state_col]][[1]], times = length(display_genes))
) %>%
filter(!is.na(pseudotime))
# Merge cluster label for facet ordering
smooth_tidy <- smooth_tidy %>%
left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")
obs_df <- obs_df %>%
left_join(gene_cluster %>% select(gene, cluster_label), by = "gene")
ggplot() +
# Jittered observed expression coloured by niche state
geom_point(data = obs_df,
aes(x = pseudotime, y = expression, colour = niche_state),
alpha = 0.25, size = 0.7, stroke = 0) +
# Smooth GAM fit
geom_line(data = smooth_tidy,
aes(x = time, y = yhat),
colour = "black", linewidth = 1.1) +
scale_colour_manual(values = niche_cols, name = "Niche state") +
facet_wrap(~gene, scales = "free_y", ncol = 4) +
labs(
x = "Pseudotime (BM → CNS)",
y = "Normalised expression",
title = "Gene Expression Dynamics Along BM → CNS Macrophage Transition",
subtitle = "GAM smooth fit ± SE; points coloured by niche state"
) +
theme_pub +
theme(
strip.text = element_text(face = "italic", size = 10),
legend.position = "bottom",
legend.key.size = unit(0.4, "cm")
)

ggsave(file.path(cache_dir, "Fig_smooth_curves_top_genes.pdf"),
width = 16, height = 14)
Fig 2 — Heatmap of All Significant Genes by Cluster
# ── Build matrix: genes × pseudotime points, scaled per gene ─────────────────
hm_genes <- gene_cluster$gene
yhat_hm <- predictSmooth(sce, gene = hm_genes, nPoints = 100, tidy = FALSE)
yhat_hm_sc <- t(apply(yhat_hm, 1, function(x) {
rng <- range(x, na.rm = TRUE)
if (diff(rng) == 0) return(rep(0, length(x)))
(x - rng[1]) / diff(rng)
}))
# Order genes within each cluster by peak pseudotime
gene_order <- gene_cluster %>%
mutate(
peak_pt = apply(yhat_hm_sc[gene, ], 1, which.max)
) %>%
arrange(cluster, peak_pt) %>%
pull(gene)
# Annotation colour for clusters
cluster_cols <- setNames(
brewer.pal(n_clusters, "Set2"),
unique(gene_cluster$cluster_label[match(1:n_clusters,
gene_cluster$cluster)])
)
row_ann <- rowAnnotation(
Cluster = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
col = list(Cluster = cluster_cols),
annotation_name_gp = gpar(fontsize = 10, fontface = "bold"),
simple_anno_size = unit(0.5, "cm")
)
# Column (pseudotime) annotation
col_ann <- HeatmapAnnotation(
Pseudotime = anno_lines(
seq(0, 1, length.out = 100),
gp = gpar(col = "grey30", lwd = 1.5),
axis = FALSE
),
annotation_name_gp = gpar(fontsize = 9)
)
col_fun <- colorRamp2(
c(0, 0.5, 1),
c("#4575B4", "#FFFFBF", "#D73027")
)
Heatmap(
yhat_hm_sc[gene_order, ],
name = "Scaled\nExpression",
col = col_fun,
cluster_rows = FALSE,
cluster_columns = FALSE,
show_row_names = length(gene_order) <= 150, # hide names if too many genes
row_names_gp = gpar(fontsize = 6, fontface = "italic"),
show_column_names = FALSE,
column_title = "Pseudotime (BM start → CNS end)",
column_title_gp = gpar(fontsize = 11, fontface = "bold"),
left_annotation = row_ann,
top_annotation = col_ann,
heatmap_legend_param = list(
title_gp = gpar(fontsize = 9, fontface = "bold"),
labels_gp = gpar(fontsize = 8),
legend_height = unit(3, "cm")
),
row_split = gene_cluster$cluster_label[match(gene_order, gene_cluster$gene)],
row_title_gp = gpar(fontsize = 9, fontface = "bold"),
border = FALSE
)

Fig 3 — Volcano Plot: Start vs End
# Label top genes in each direction
top_up <- se_res %>% filter(direction == "Up at CNS end") %>%
slice_max(logFClineage1, n = 15) %>% pull(gene)
top_down <- se_res %>% filter(direction == "Up at BM start") %>%
slice_min(logFClineage1, n = 15) %>% pull(gene)
label_genes <- c(top_up, top_down)
volcano_df <- se_res %>%
mutate(
log10p = -log10(pmax(padj, 1e-100)),
label = ifelse(gene %in% label_genes, gene, NA)
)
ggplot(volcano_df, aes(x = logFClineage1, y = log10p)) +
geom_point(aes(colour = direction), alpha = 0.5, size = 1.2) +
geom_text_repel(
data = volcano_df %>% filter(!is.na(label)),
aes(label = label),
fontface = "italic",
size = 3,
max.overlaps = 30,
segment.colour = "grey60",
segment.size = 0.3,
box.padding = 0.35
) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed",
colour = "grey40", linewidth = 0.5) +
geom_vline(xintercept = c(-log2(1.5), log2(1.5)), linetype = "dashed",
colour = "grey40", linewidth = 0.5) +
scale_colour_manual(
values = c(
"Up at CNS end" = "#B2182B",
"Up at BM start" = "#4393C3",
"NS" = "grey75"
),
name = NULL
) +
scale_x_continuous(
labels = function(x) ifelse(x > 0, paste0("+", round(x, 1)), round(x, 1))
) +
labs(
x = expression(log[2]*" fold change (CNS end / BM start)"),
y = expression(-log[10]*" adjusted p-value"),
title = "Macrophage Reprogramming: BM Start vs CNS End",
subtitle = "Trajectory start vs end test (tradeSeq); FDR < 0.05 threshold shown"
) +
theme_pub +
theme(legend.position = "top")

ggsave(file.path(cache_dir, "Fig_volcano_start_vs_end.pdf"),
width = 9, height = 7)
7. GO Enrichment by Trajectory Cluster
# Run gProfiler2 on each cluster separately
go_results <- lapply(unique(gene_cluster$cluster_label), function(cl) {
genes <- gene_cluster %>% filter(cluster_label == cl) %>% pull(gene)
if (length(genes) < 5) return(NULL)
res <- gost(
query = genes,
organism = "mmusculus",
sources = c("GO:BP", "GO:MF", "KEGG", "REACTOME"),
evcodes = FALSE,
correction_method = "fdr",
user_threshold = 0.05
)
if (is.null(res)) return(NULL)
res$result %>%
select(term_name, term_id, source, p_value, term_size, intersection_size) %>%
mutate(cluster = cl) %>%
arrange(p_value)
})
go_all <- bind_rows(go_results)
# Show top 5 terms per cluster
go_all %>%
group_by(cluster) %>%
slice_head(n = 5) %>%
ungroup() %>%
select(cluster, term_name, source, p_value, intersection_size) %>%
mutate(p_value = signif(p_value, 3)) %>%
kable(caption = "Top GO/pathway terms per gene expression cluster")
Top GO/pathway terms per gene expression cluster
| Early-high / lost |
catabolic process |
GO:BP |
0.00e+00 |
49 |
| Early-high / lost |
small molecule metabolic process |
GO:BP |
0.00e+00 |
38 |
| Early-high / lost |
catalytic activity |
GO:MF |
0.00e+00 |
75 |
| Early-high / lost |
oxidoreductase activity |
GO:MF |
0.00e+00 |
26 |
| Early-high / lost |
monosaccharide metabolic process |
GO:BP |
0.00e+00 |
17 |
| Late surge |
immune system process |
GO:BP |
0.00e+00 |
72 |
| Late surge |
immune response |
GO:BP |
0.00e+00 |
55 |
| Late surge |
regulation of immune system process |
GO:BP |
0.00e+00 |
46 |
| Late surge |
defense response |
GO:BP |
0.00e+00 |
47 |
| Late surge |
adaptive immune response |
GO:BP |
0.00e+00 |
27 |
| Late-gained |
defense response |
GO:BP |
0.00e+00 |
28 |
| Late-gained |
response to other organism |
GO:BP |
0.00e+00 |
25 |
| Late-gained |
biological process involved in interspecies interaction
between organisms |
GO:BP |
0.00e+00 |
26 |
| Late-gained |
response to external biotic stimulus |
GO:BP |
0.00e+00 |
25 |
| Late-gained |
immune response |
GO:BP |
0.00e+00 |
26 |
| Monotone decrease |
Lysosome |
KEGG |
0.00e+00 |
24 |
| Monotone decrease |
establishment of localization |
GO:BP |
0.00e+00 |
76 |
| Monotone decrease |
transport |
GO:BP |
0.00e+00 |
74 |
| Monotone decrease |
localization |
GO:BP |
0.00e+00 |
80 |
| Monotone decrease |
regulation of immune system process |
GO:BP |
0.00e+00 |
43 |
| Monotone increase |
endosomal transport |
GO:BP |
0.00e+00 |
16 |
| Monotone increase |
vesicle-mediated transport |
GO:BP |
0.00e+00 |
31 |
| Monotone increase |
Endocytosis |
KEGG |
2.00e-07 |
14 |
| Monotone increase |
transport |
GO:BP |
4.00e-07 |
50 |
| Monotone increase |
intracellular transport |
GO:BP |
4.00e-07 |
26 |
| Transient mid |
Proteasome |
KEGG |
2.00e-07 |
9 |
| Transient mid |
cellular localization |
GO:BP |
1.20e-06 |
50 |
| Transient mid |
protein binding |
GO:MF |
1.30e-06 |
101 |
| Transient mid |
cytoskeletal protein binding |
GO:MF |
1.60e-05 |
23 |
| Transient mid |
establishment of localization in cell |
GO:BP |
2.19e-05 |
34 |
# ── Dot plot: top terms per cluster ──────────────────────────────────────────
go_plot_df <- go_all %>%
group_by(cluster) %>%
slice_head(n = 6) %>%
ungroup() %>%
mutate(
log10p = -log10(p_value),
GeneRatio = intersection_size / term_size,
term_name = stringr::str_wrap(term_name, 40)
)
ggplot(go_plot_df,
aes(x = cluster, y = reorder(term_name, log10p),
size = GeneRatio, colour = log10p)) +
geom_point() +
scale_colour_gradientn(
colours = c("#FDAE61", "#D73027", "#A50026"),
name = expression(-log[10]*p)
) +
scale_size_continuous(name = "Gene ratio", range = c(2, 8)) +
labs(
x = "Expression cluster",
y = NULL,
title = "GO / Pathway Enrichment by Pseudotime Gene Cluster"
) +
theme_pub +
theme(
axis.text.x = element_text(angle = 35, hjust = 1, size = 9),
axis.text.y = element_text(size = 9),
panel.grid.major.x = element_line(colour = "grey88")
)

ggsave(file.path(cache_dir, "Fig_GO_dotplot_by_cluster.pdf"),
width = 13, height = 10)
8. Key Genes Summary Table
# Merge association test + start-vs-end test + cluster assignment
summary_tbl <- assoc_res %>%
filter(sig) %>%
select(gene, waldStat_assoc = waldStat_1, padj_assoc = padj) %>%
left_join(
se_res %>% select(gene, logFC_CNS_vs_BM = logFClineage1,
padj_sve = padj, direction),
by = "gene"
) %>%
left_join(gene_cluster %>% select(gene, cluster_label), by = "gene") %>%
arrange(desc(abs(logFC_CNS_vs_BM))) %>%
mutate(across(where(is.numeric), ~round(.x, 4)))
write.csv(summary_tbl,
file.path(cache_dir, "13d_pseudotime_DE_results.csv"),
row.names = FALSE)
summary_tbl %>%
slice_head(n = 50) %>%
kable(caption = "Top 50 dynamic genes: merged association + start-vs-end results")
Top 50 dynamic genes: merged association + start-vs-end
results
| Adamtsl5 |
38.1662 |
0.0000 |
2.8682 |
0.0002 |
Up at CNS end |
Transient mid |
| Lyve1 |
46.6962 |
0.0000 |
2.6760 |
0.0194 |
Up at CNS end |
Late-gained |
| Gstm2 |
57.1275 |
0.0000 |
2.6489 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Ccnd2 |
20.2038 |
0.0036 |
-2.6180 |
0.0019 |
Up at BM start |
Monotone decrease |
| Akr1b8 |
224.4304 |
0.0000 |
2.5335 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Smim1 |
21.0625 |
0.0009 |
2.4026 |
0.0201 |
Up at CNS end |
Monotone increase |
| Cd209f |
123.3976 |
0.0000 |
-2.3774 |
0.0091 |
Up at BM start |
Late-gained |
| Prnp |
15.5021 |
0.0239 |
-2.2791 |
0.0055 |
Up at BM start |
Late surge |
| Gbe1 |
91.0071 |
0.0000 |
2.1072 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Selenom |
74.8904 |
0.0000 |
1.9875 |
0.0000 |
Up at CNS end |
Transient mid |
| C3 |
53.9416 |
0.0000 |
-1.9644 |
0.0003 |
Up at BM start |
Late surge |
| F13a1 |
789.2216 |
0.0000 |
1.9278 |
0.0000 |
Up at CNS end |
Monotone increase |
| S100a6 |
93.6406 |
0.0000 |
1.9234 |
0.0000 |
Up at CNS end |
Transient mid |
| Srxn1 |
187.1373 |
0.0000 |
1.8002 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Mrap |
67.9125 |
0.0000 |
-1.7799 |
0.0000 |
Up at BM start |
Late surge |
| S100a4 |
64.7953 |
0.0000 |
1.7422 |
0.0008 |
Up at CNS end |
Transient mid |
| Clec4d |
200.2591 |
0.0000 |
1.7384 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Prdx1 |
775.9843 |
0.0000 |
1.7028 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Maoa |
65.1603 |
0.0000 |
1.6820 |
0.0001 |
Up at CNS end |
Early-high / lost |
| S100a10 |
246.7843 |
0.0000 |
1.6377 |
0.0000 |
Up at CNS end |
Transient mid |
| Cbr3 |
51.0573 |
0.0000 |
1.5778 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Mt2 |
293.3153 |
0.0000 |
1.5185 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Rgs18 |
28.2941 |
0.0001 |
1.4839 |
0.0516 |
NS |
Monotone increase |
| Esd |
340.2193 |
0.0000 |
1.4648 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Abcg3 |
56.3380 |
0.0000 |
-1.4398 |
0.0004 |
Up at BM start |
Late surge |
| Pira2 |
33.2112 |
0.0000 |
-1.4266 |
0.0047 |
Up at BM start |
Late surge |
| S100a11 |
95.1629 |
0.0000 |
1.4074 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Chchd10 |
76.4537 |
0.0000 |
1.3841 |
0.0000 |
Up at CNS end |
Transient mid |
| Camk1d |
86.9442 |
0.0000 |
-1.3619 |
0.0000 |
Up at BM start |
Monotone decrease |
| Mmp12 |
40.1056 |
0.0000 |
1.3570 |
0.0125 |
Up at CNS end |
Late surge |
| Stab2 |
25.7148 |
0.0004 |
-1.3540 |
0.0115 |
Up at BM start |
Late surge |
| Ifit3 |
25.7624 |
0.0003 |
1.3430 |
0.1081 |
NS |
Late-gained |
| Axl |
288.9259 |
0.0000 |
-1.3417 |
0.0000 |
Up at BM start |
Late surge |
| Anxa1 |
27.6325 |
0.0002 |
1.3284 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Susd1 |
16.4412 |
0.0164 |
-1.2726 |
0.0021 |
Up at BM start |
Late-gained |
| Vcam1 |
766.2468 |
0.0000 |
-1.2347 |
0.0000 |
Up at BM start |
Late surge |
| Tuba4a |
37.7111 |
0.0000 |
1.2225 |
0.0039 |
Up at CNS end |
Early-high / lost |
| Crip2 |
55.1452 |
0.0000 |
-1.2212 |
0.0002 |
Up at BM start |
Late surge |
| Bnip3 |
62.5282 |
0.0000 |
1.2205 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Cd5l |
385.2321 |
0.0000 |
-1.2181 |
0.0000 |
Up at BM start |
Late surge |
| Slc43a2 |
64.9738 |
0.0000 |
-1.2180 |
0.0000 |
Up at BM start |
Monotone decrease |
| Fxyd2 |
72.7558 |
0.0000 |
1.1991 |
0.0002 |
Up at CNS end |
Transient mid |
| Slc8b1 |
14.4239 |
0.0358 |
-1.1881 |
0.0075 |
Up at BM start |
Late surge |
| Pstpip1 |
41.3718 |
0.0000 |
1.1877 |
0.0010 |
Up at CNS end |
Transient mid |
| Slc12a7 |
22.3011 |
0.0015 |
-1.1846 |
0.0101 |
Up at BM start |
Monotone decrease |
| Myo10 |
115.4465 |
0.0000 |
-1.1794 |
0.0000 |
Up at BM start |
Late surge |
| Prdx6 |
72.2733 |
0.0000 |
1.1670 |
0.0000 |
Up at CNS end |
Early-high / lost |
| Bin1 |
214.8838 |
0.0000 |
1.1454 |
0.0000 |
Up at CNS end |
Monotone increase |
| Sel1l |
28.1588 |
0.0001 |
-1.1450 |
0.0012 |
Up at BM start |
Monotone decrease |
| Mt1 |
405.8848 |
0.0000 |
1.1402 |
0.0000 |
Up at CNS end |
Early-high / lost |