Overview

This script characterises CD8 T cell dysfunction in the B-ALL CNS niche. The central finding from Script 15b and unbiased DE is that the dominant biology is not deeper exhaustion of Tex cells, but rather a distinct CNS-specific programme of retention, intrinsic signalling suppression, and survival maintenance — with effector function lost.

Key findings to illustrate

Tissue retention — Itga1 (VLA-1; 5.4× CNS-enriched, p=2×10⁻⁵), Cxcr6 (CXCL16 receptor), loss of Klf2 (S1PR1 egress regulator)
Intrinsic signalling suppression — Socs1 (JAK-STAT suppressor) and Cish (TCR signalling suppressor) upregulated in CNS; cell-autonomous brake above and beyond checkpoint receptor expression
Survival without function — Bcl2 upregulated; effector molecules (Gzmb, Gzmk, Cx3cr1, Ccl3) lost in CNS
Absent Tpex — Tcf7+ progenitor pool absent; PD-1 reinvigoration has no substrate in the CNS niche

Annotation note

CD8_Tpex_unvalidated (from Script 15b) → renamed CD8_Tex_intermediate throughout this script. These cells are Tcf7-negative, checkpoint-expressing, and score highest on the Beltra Texint (intermediate exhausted) signature.

References

Beltra JC et al. Immunity 2020 — four-state exhaustion continuum
Miller BC et al. Nature Immunology 2019 — Tpex definition
Andreatta M et al. Nature Communications 2021 — ProjecTILs

1. Libraries

library(Seurat)
library(ggplot2)
library(dplyr)
library(patchwork)
library(qs2)
library(UCell)
library(tidyr)
library(tibble)
library(RColorBrewer)
library(ggrepel)
library(pheatmap)
library(ggpubr)

BASE_DIR <- "/Volumes/Al_seq/scratch_backup/harmony_clustering/"

tissue_cols <- c("BM" = "#b2182b", "CNS" = "#2166ac")

state_cols <- c(
  "CD8_Naive"            = "#74add1",
  "CD8_Tex"              = "#8c2d04",
  "CD8_Tex_intermediate" = "#f16913",
  "CD8_EffectorMemory"   = "#4dac26",
  "CD8_EarlyActiv"       = "#fdae6b",
  "other"                = "grey80"
)

module_cols <- c(
  "Tissue retention"       = "#1a9641",
  "Signalling suppression" = "#d73027",
  "Survival"               = "#984ea3",
  "Exhaustion TF"          = "#e31a1c",
  "Checkpoint receptors"   = "#f16913",
  "Effector (CNS-lost)"    = "#2166ac",
  "Stemness (absent)"      = "#74add1"
)

2. Load data and apply annotation update

seu <- qs_read(paste0(BASE_DIR, "15b_immune_cd8_revised.qs"))
cd8 <- qs_read(paste0(BASE_DIR, "15b_cd8_projected.qs"))
DefaultAssay(cd8) <- "originalexp"

# Rename CD8_Tpex_unvalidated → CD8_Tex_intermediate
cd8$cd8_class_final <- dplyr::recode(
  cd8$cd8_class_corrected,
  "CD8_Tpex_unvalidated" = "CD8_Tex_intermediate"
)

seu$cd8_class_final <- dplyr::recode(
  seu$cd8_class_corrected,
  "CD8_Tpex_unvalidated" = "CD8_Tex_intermediate"
)

# Non-naive working subset
cd8_active <- subset(cd8, cells = colnames(cd8)[
  cd8$cd8_class_final %in%
    c("CD8_Tex", "CD8_EffectorMemory",
      "CD8_EarlyActiv", "CD8_Tex_intermediate")
])

cat("=== Final CD8 classification ===\n")

## === Final CD8 classification ===

print(table(cd8$cd8_class_final, cd8$Tissue))

##                       
##                          BM  CNS
##   CD8_EarlyActiv         13    2
##   CD8_EffectorMemory    246  173
##   CD8_Naive            1610  127
##   CD8_Tex                99   15
##   CD8_Tex_intermediate    4    0
##   other                  44    8

cat("\n=== Non-naive cells for analysis ===\n")

## 
## === Non-naive cells for analysis ===

print(table(cd8_active$cd8_class_final, cd8_active$Tissue))

##                       
##                         BM CNS
##   CD8_EarlyActiv        13   2
##   CD8_EffectorMemory   246 173
##   CD8_Tex               99  15
##   CD8_Tex_intermediate   4   0

3. Load or rerun DE results

de_file <- paste0(BASE_DIR, "16_DE_CD8_CNS_vs_BM.csv")

if (file.exists(de_file)) {
  de_cd8 <- read.csv(de_file, row.names = 1)
  de_cd8$gene <- rownames(de_cd8)
  cat("DE results loaded from file:", nrow(de_cd8), "genes\n")
} else {
  cat("Running DE analysis...\n")
  DefaultAssay(cd8_active) <- "originalexp"
  Idents(cd8_active) <- "Tissue"

  de_cd8 <- FindMarkers(
    cd8_active,
    ident.1         = "CNS",
    ident.2         = "BM",
    min.pct         = 0.1,
    logfc.threshold = 0.2,
    test.use        = "wilcox"
  )
  de_cd8$gene <- rownames(de_cd8)
  write.csv(de_cd8, de_file)
  cat("DE complete:", nrow(de_cd8), "genes tested\n")
}

## DE results loaded from file: 3319 genes

# Remove known contaminants
de_cd8 <- de_cd8 %>%
  filter(!gene %in% c("Igkv8-27", "Hexb"))

cat("\n=== Significant DE genes ===\n")

## 
## === Significant DE genes ===

cat("CNS-enriched (p<0.05, FC>0.2):",
    sum(de_cd8$avg_log2FC > 0.2 & de_cd8$p_val_adj < 0.05), "\n")

## CNS-enriched (p<0.05, FC>0.2): 16

cat("BM-enriched  (p<0.05, FC>0.2):",
    sum(de_cd8$avg_log2FC < -0.2 & de_cd8$p_val_adj < 0.05), "\n")

## BM-enriched  (p<0.05, FC>0.2): 29

4. Checkpoint burden scoring

checkpoint_genes <- intersect(
  c("Pdcd1", "Havcr2", "Lag3", "Tigit",
    "Ctla4", "Cd244a", "Entpd1", "Cd160"),
  rownames(cd8)
)

expr_mat <- GetAssayData(cd8, layer = "data")[checkpoint_genes, ]
cd8$checkpoint_burden <- colSums((expr_mat > 0) * 1)

# Add to active subset
cd8_active$checkpoint_burden <- cd8$checkpoint_burden[
  colnames(cd8_active)
]

FIGURE 1: Volcano Plot — Primary Evidence Figure

Four programmes annotated: retention, suppression, survival, effector loss

# ── Define key genes per programme ──────────────────────────────────────────
retention   <- c("Itga1", "Cxcr6", "Klf2")
suppression <- c("Socs1", "Cish")
survival    <- c("Bcl2")
exhaustion  <- c("Nr4a1", "Nr4a2", "Id2", "Tox", "Prdm1")
effector    <- c("Cx3cr1", "Ccl3", "Gzmb", "Gzmk", "Prf1", "Klrg1", "Ifng")
receptor    <- c("Ifngr1", "Ltb")

de_volcano <- de_cd8 %>%
  mutate(
    sig = p_val_adj < 0.05 & abs(avg_log2FC) > 0.2,
    category = case_when(
      gene %in% retention   & avg_log2FC > 0  ~ "Tissue retention (CNS↑)",
      gene %in% suppression & avg_log2FC > 0  ~ "Signalling suppression (CNS↑)",
      gene %in% survival    & avg_log2FC > 0  ~ "Survival (CNS↑)",
      gene %in% exhaustion  & avg_log2FC > 0  ~ "Exhaustion TF (CNS↑)",
      gene %in% effector    & avg_log2FC < 0  ~ "Effector function (BM↑/CNS lost)",
      avg_log2FC > 0.2  & p_val_adj < 0.05   ~ "Other CNS-enriched",
      avg_log2FC < -0.2 & p_val_adj < 0.05   ~ "Other BM-enriched",
      TRUE                                    ~ "ns"
    ),
    label = case_when(
      gene %in% c(retention, suppression, survival,
                   exhaustion, effector, receptor) & sig ~ gene,
      TRUE ~ NA_character_
    )
  )

volcano_cols <- c(
  "Tissue retention (CNS↑)"          = "#1a9641",
  "Signalling suppression (CNS↑)"    = "#d73027",
  "Survival (CNS↑)"                  = "#984ea3",
  "Exhaustion TF (CNS↑)"             = "#e31a1c",
  "Effector function (BM↑/CNS lost)" = "#2166ac",
  "Other CNS-enriched"               = "#fc8d59",
  "Other BM-enriched"                = "#91bfdb",
  "ns"                               = "grey85"
)

fig1 <- ggplot(de_volcano,
               aes(x = avg_log2FC,
                   y = -log10(p_val_adj + 1e-300),
                   colour = category)) +
  geom_point(aes(size = ifelse(category == "ns", 0.3, 0.7)),
             alpha = 0.75) +
  geom_label_repel(
    aes(label = label),
    size          = 3.2,
    fontface      = "italic",
    box.padding   = 0.45,
    point.padding = 0.3,
    label.size    = 0.2,
    max.overlaps  = 25,
    na.rm         = TRUE,
    show.legend   = FALSE
  ) +
  geom_vline(xintercept = c(-0.2, 0.2),
             linetype = "dashed", colour = "grey50", linewidth = 0.4) +
  geom_hline(yintercept = -log10(0.05),
             linetype = "dashed", colour = "grey50", linewidth = 0.4) +
  scale_colour_manual(values = volcano_cols) +
  scale_size_identity() +
  theme_classic(base_size = 12) +
  theme(
    legend.position = "right",
    legend.text     = element_text(size = 9),
    legend.key.size = unit(0.45, "cm"),
    plot.subtitle   = element_text(size = 9, colour = "grey30")
  ) +
  labs(
    title    = "CNS vs BM — non-naive CD8 T cells",
    subtitle = paste0(
      "Retention: Itga1↑ Cxcr6↑ Klf2↓ | ",
      "Suppression: Socs1↑ Cish↑ | ",
      "Survival: Bcl2↑ | ",
      "Effector loss: Cx3cr1↓ Gzmb↓ Ccl3↓"
    ),
    x      = "Average log2FC (CNS vs BM)",
    y      = "-log10 adjusted p-value",
    colour = "Functional category"
  )

fig1

FIGURE 2: Key Gene Bar Chart with Statistics

Per-gene % expressing, CNS vs BM, grouped by module, with p-values

key_genes <- c(
  # Effector lost — BM > CNS
  "Ccl3", "Cx3cr1", "Gzmb",
  # Exhaustion TF — CNS > BM
  "Nr4a1", "Prdm1", "Tox",
  # Retention — CNS > BM
  "Cxcr6", "Itga1", "Klf2",
  # Suppression — CNS > BM
  "Cish", "Socs1",
  # Survival — CNS > BM
  "Bcl2"
)
key_genes <- intersect(key_genes, rownames(cd8_active))

module_map <- c(
  "Ccl3" = "Effector (lost)", "Cx3cr1" = "Effector (lost)",
  "Gzmb" = "Effector (lost)",
  "Nr4a1" = "Exhaustion TF", "Prdm1" = "Exhaustion TF",
  "Tox"  = "Exhaustion TF",
  "Cxcr6" = "Retention", "Itga1" = "Retention", "Klf2" = "Retention",
  "Cish"  = "Suppression", "Socs1" = "Suppression",
  "Bcl2"  = "Survival"
)

# Compute % expressing and run Wilcoxon per gene
gene_stats <- lapply(key_genes, function(g) {
  expr   <- GetAssayData(cd8_active, layer = "data")[g, ]
  meta   <- cd8_active@meta.data %>%
    mutate(expr = expr, pos = expr > 0)

  pct_df <- meta %>%
    group_by(Tissue) %>%
    summarise(pct  = round(mean(pos) * 100, 1),
              n    = n(),
              .groups = "drop") %>%
    mutate(gene   = g,
           module = module_map[g])

  # Wilcoxon test CNS vs BM on raw expression
  bm_e  <- expr[cd8_active$Tissue == "BM"]
  cns_e <- expr[cd8_active$Tissue == "CNS"]
  wt    <- wilcox.test(cns_e, bm_e)

  pct_df$p_val     <- wt$p.value
  pct_df$p_adj     <- p.adjust(wt$p.value, method = "BH")  # corrected below
  pct_df
}) %>% bind_rows()

# Apply BH correction across all genes
p_vals <- gene_stats %>%
  distinct(gene, p_val) %>%
  mutate(p_adj_global = p.adjust(p_val, method = "BH"))

gene_stats <- gene_stats %>%
  left_join(p_vals %>% select(gene, p_adj_global), by = "gene") %>%
  mutate(
    sig_label = case_when(
      p_adj_global < 0.001 ~ "***",
      p_adj_global < 0.01  ~ "**",
      p_adj_global < 0.05  ~ "*",
      TRUE                 ~ "ns"
    )
  )

cat("=== Gene-level statistics ===\n")

## === Gene-level statistics ===

gene_stats %>%
  distinct(gene, module, p_val, p_adj_global, sig_label) %>%
  arrange(module, p_adj_global) %>%
  print()

## # A tibble: 12 × 5
##    gene   module             p_val p_adj_global sig_label
##    <chr>  <chr>              <dbl>        <dbl> <chr>    
##  1 Ccl3   Effector (lost) 3.38e- 7     5.79e- 7 ***      
##  2 Cx3cr1 Effector (lost) 5.75e- 7     8.62e- 7 ***      
##  3 Gzmb   Effector (lost) 4.53e- 1     4.94e- 1 ns       
##  4 Tox    Exhaustion TF   5.17e- 3     6.89e- 3 **       
##  5 Prdm1  Exhaustion TF   1.92e- 1     2.31e- 1 ns       
##  6 Nr4a1  Exhaustion TF   7.38e- 1     7.38e- 1 ns       
##  7 Klf2   Retention       6.29e-11     4.54e-10 ***      
##  8 Itga1  Retention       7.81e-10     3.13e- 9 ***      
##  9 Cxcr6  Retention       7.49e- 9     1.80e- 8 ***      
## 10 Cish   Suppression     4.32e- 9     1.30e- 8 ***      
## 11 Socs1  Suppression     5.42e- 8     1.08e- 7 ***      
## 12 Bcl2   Survival        7.57e-11     4.54e-10 ***

# Module order for faceting
gene_stats$module <- factor(
  gene_stats$module,
  levels = c("Effector (lost)", "Exhaustion TF",
             "Retention", "Suppression", "Survival")
)
gene_stats$gene <- factor(gene_stats$gene, levels = key_genes)

# Significance annotation at CNS bar
sig_annot <- gene_stats %>%
  filter(Tissue == "CNS") %>%
  select(gene, module, pct, sig_label) %>%
  mutate(y_pos = pct + 5)

module_fill_cols <- c(
  "Effector (lost)" = "#2166ac",
  "Exhaustion TF"   = "#e31a1c",
  "Retention"       = "#1a9641",
  "Suppression"     = "#d73027",
  "Survival"        = "#984ea3"
)

fig2 <- ggplot(gene_stats,
               aes(x = gene, y = pct, fill = Tissue)) +
  geom_col(position = position_dodge(0.72), width = 0.65,
           colour = "white") +
  geom_text(data = sig_annot,
            aes(x = gene, y = y_pos, label = sig_label,
                fill = NULL),
            size = 3.5, colour = "black",
            position = position_nudge(x = 0.18)) +
  facet_grid(. ~ module, scales = "free_x", space = "free_x") +
  scale_fill_manual(values = tissue_cols) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 108)) +
  theme_classic(base_size = 11) +
  theme(
    axis.text.x      = element_text(angle = 40, hjust = 1, face = "italic"),
    strip.background = element_rect(fill = "grey92", colour = NA),
    strip.text       = element_text(size = 9, face = "bold"),
    panel.spacing    = unit(0.3, "cm"),
    legend.position  = "right"
  ) +
  labs(
    title    = "Key genes — non-naive CD8 T cells, CNS vs BM",
    subtitle = "Stars = BH-adjusted Wilcoxon: *** p<0.001, ** p<0.01, * p<0.05",
    y        = "% cells expressing",
    x        = NULL,
    fill     = NULL
  )

fig2

FIGURE 3: Heatmap — Functional Modules × State × Tissue

heatmap_genes <- c(
  "Itga1", "Cxcr6", "Klf2",          # Retention
  "Socs1", "Cish",                    # Suppression
  "Bcl2",                             # Survival
  "Tox", "Nr4a1", "Nr4a2", "Prdm1", "Id2",  # Exhaustion TF
  "Pdcd1", "Havcr2", "Lag3", "Tigit", "Ctla4", "Entpd1",  # Checkpoints
  "Gzmb", "Gzmk", "Prf1", "Cx3cr1", "Ccl3", "Ifng",  # Effector (lost)
  "Tcf7", "Slamf6", "Il7r"           # Stemness (absent)
)
heatmap_genes <- intersect(heatmap_genes, rownames(cd8))

cd8$state_tissue_clean <- paste0(cd8$cd8_class_final, "_", cd8$Tissue)
Idents(cd8) <- "state_tissue_clean"

avg_expr <- AverageExpression(
  cd8,
  features = heatmap_genes,
  group.by = "state_tissue_clean",
  layer     = "data"
)$originalexp

avg_expr <- avg_expr[, colSums(avg_expr) > 0, drop = FALSE]

# Fix Seurat hyphen conversion
colnames(avg_expr) <- gsub("-", "_", colnames(avg_expr))

col_annot <- data.frame(
  Tissue = ifelse(grepl("CNS", colnames(avg_expr)), "CNS", "BM"),
  State  = gsub("_BM$|_CNS$", "", colnames(avg_expr)),
  row.names = colnames(avg_expr)
)
col_annot$Tissue <- as.character(col_annot$Tissue)
col_annot$State  <- as.character(col_annot$State)

row_modules <- c(
  rep("Tissue retention",       3),
  rep("Signalling suppression", 2),
  rep("Survival",               1),
  rep("Exhaustion TF",          5),
  rep("Checkpoint receptors",   6),
  rep("Effector (CNS-lost)",    6),
  rep("Stemness (absent)",      3)
)
row_modules <- row_modules[seq_along(heatmap_genes)]
row_annot   <- data.frame(Module = as.character(row_modules),
                           row.names = heatmap_genes)

all_states <- sort(unique(col_annot$State))
state_palette <- c(
  "CD8_Tex"              = "#8c2d04",
  "CD8_Tex_intermediate" = "#f16913",
  "CD8_EffectorMemory"   = "#4dac26",
  "CD8_EarlyActiv"       = "#fdae6b",
  "CD8_Naive"            = "#74add1",
  "other"                = "grey80"
)
missing_states <- setdiff(all_states, names(state_palette))
if (length(missing_states) > 0) {
  state_palette <- c(state_palette,
                     setNames(rep("grey70", length(missing_states)),
                              missing_states))
}

annot_colours <- list(
  Tissue = c("BM" = "#b2182b", "CNS" = "#2166ac"),
  State  = state_palette[all_states],
  Module = module_cols[intersect(names(module_cols), unique(row_annot$Module))]
)

pheatmap(
  avg_expr,
  scale             = "row",
  color             = colorRampPalette(
                        rev(RColorBrewer::brewer.pal(9, "RdBu")))(100),
  cluster_rows      = FALSE,
  cluster_cols      = TRUE,
  annotation_col    = col_annot,
  annotation_row    = row_annot,
  annotation_colors = annot_colours,
  fontsize_row      = 9,
  fontsize_col      = 9,
  angle_col         = 45,
  gaps_row          = cumsum(c(3, 2, 1, 5, 6, 6)),
  main              = paste0(
    "CD8 T cell dysfunction — functional gene modules × state × tissue\n",
    "CD8_Tex_CNS: highest retention + suppression + exhaustion TF programme"
  )
)

FIGURE 4: Checkpoint DotPlot — State × Tissue

cd8$state_tissue_clean <- paste0(cd8$cd8_class_final, "_", cd8$Tissue)
Idents(cd8) <- "state_tissue_clean"

fig4 <- DotPlot(
  cd8,
  features = checkpoint_genes,
  idents   = c("CD8_Tex_BM",              "CD8_Tex_CNS",
               "CD8_Tex_intermediate_BM", "CD8_Tex_intermediate_CNS",
               "CD8_EffectorMemory_BM",   "CD8_EffectorMemory_CNS"),
  cols     = c("lightgrey", "#8c2d04"),
  dot.scale = 9
) +
  RotatedAxis() +
  labs(
    title    = "Checkpoint receptor co-expression — exhausted CD8 states",
    subtitle = "CD8_Tex_CNS shows broadest co-inhibitory receptor programme"
  ) +
  theme(axis.text.y = element_text(size = 10))

fig4

FIGURE 5: Checkpoint Burden with Statistics

# Wilcoxon test — checkpoint burden CNS vs BM (non-naive)
bm_burden  <- cd8_active$checkpoint_burden[cd8_active$Tissue == "BM"]
cns_burden <- cd8_active$checkpoint_burden[cd8_active$Tissue == "CNS"]
wt_burden  <- wilcox.test(cns_burden, bm_burden)
cat(sprintf("Checkpoint burden Wilcoxon p = %.4f\n", wt_burden$p.value))

## Checkpoint burden Wilcoxon p = 0.4457

# Proportion ≥3 checkpoints
burden_summary <- cd8_active@meta.data %>%
  group_by(Tissue) %>%
  summarise(
    n           = n(),
    n_high      = sum(checkpoint_burden >= 3),
    pct_high    = round(mean(checkpoint_burden >= 3) * 100, 1),
    .groups     = "drop"
  )
cat("\n=== ≥3 checkpoints co-expressed (flow-equivalent triple+) ===\n")

## 
## === ≥3 checkpoints co-expressed (flow-equivalent triple+) ===

print(burden_summary)

## # A tibble: 2 × 4
##   Tissue     n n_high pct_high
##   <fct>  <int>  <int>    <dbl>
## 1 BM       362    242     66.9
## 2 CNS      190    140     73.7

# Chi-squared test on ≥3 proportion
cont_table <- matrix(
  c(burden_summary$n_high[burden_summary$Tissue == "BM"],
    burden_summary$n[burden_summary$Tissue == "BM"] -
      burden_summary$n_high[burden_summary$Tissue == "BM"],
    burden_summary$n_high[burden_summary$Tissue == "CNS"],
    burden_summary$n[burden_summary$Tissue == "CNS"] -
      burden_summary$n_high[burden_summary$Tissue == "CNS"]),
  nrow = 2,
  dimnames = list(c("≥3", "<3"), c("BM", "CNS"))
)
chi_res <- chisq.test(cont_table)
cat(sprintf("\nChi-squared test (≥3 vs <3 checkpoints, CNS vs BM): p = %.4f\n",
            chi_res$p.value))

## 
## Chi-squared test (≥3 vs <3 checkpoints, CNS vs BM): p = 0.1199

# Distribution plot
burden_data <- cd8_active@meta.data %>%
  group_by(Tissue, checkpoint_burden) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(Tissue) %>%
  mutate(pct = round(n / sum(n) * 100, 1))

fig5 <- ggplot(burden_data,
               aes(x = factor(checkpoint_burden), y = pct, fill = Tissue)) +
  geom_col(position = "dodge", colour = "white", width = 0.7) +
  scale_fill_manual(values = tissue_cols) +
  geom_vline(xintercept = 3.5, linetype = "dashed",
             colour = "grey40", linewidth = 0.5) +
  annotate("text", x = 4.3, y = max(burden_data$pct) * 0.88,
           label = paste0("≥3 checkpoints\n",
                          "BM: ", burden_summary$pct_high[1], "%  ",
                          "CNS: ", burden_summary$pct_high[2], "%\n",
                          "χ² p = ", signif(chi_res$p.value, 2)),
           size = 3, colour = "grey20", hjust = 0) +
  theme_classic(base_size = 12) +
  labs(
    title    = "Co-inhibitory receptor co-expression burden",
    subtitle = "Transcriptional correlate of flow PD-1+TIM-3+LAG-3+ (Figure 3E)",
    x        = "Number of checkpoints co-expressed per cell",
    y        = "% of non-naive CD8 T cells",
    fill     = NULL
  )

fig5

FIGURE 6: Summary Panel for Manuscript

Panels A–C combining the three strongest signals

# Panel A — Key genes (simplified: retention + suppression + effector loss)
pA_genes <- intersect(c("Itga1", "Cxcr6", "Klf2",
                          "Socs1", "Cish", "Gzmb", "Cx3cr1"),
                       rownames(cd8_active))

pA_module_map <- c(
  "Itga1" = "Retention", "Cxcr6" = "Retention", "Klf2" = "Retention",
  "Socs1" = "Suppression", "Cish" = "Suppression",
  "Gzmb"  = "Effector\n(lost)", "Cx3cr1" = "Effector\n(lost)"
)

pA_data <- lapply(pA_genes, function(g) {
  expr <- GetAssayData(cd8_active, layer = "data")[g, ]
  cd8_active@meta.data %>%
    mutate(expr = expr, pos = expr > 0) %>%
    group_by(Tissue) %>%
    summarise(pct = round(mean(pos) * 100, 1), .groups = "drop") %>%
    mutate(gene = g, module = pA_module_map[g])
}) %>% bind_rows() %>%
  mutate(
    module = factor(module, levels = c("Effector\n(lost)",
                                        "Retention", "Suppression")),
    gene   = factor(gene, levels = pA_genes)
  )

pA <- ggplot(pA_data, aes(x = gene, y = pct, fill = Tissue)) +
  geom_col(position = position_dodge(0.72), width = 0.65, colour = "white") +
  facet_grid(. ~ module, scales = "free_x", space = "free_x") +
  scale_fill_manual(values = tissue_cols) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 105)) +
  theme_classic(base_size = 10) +
  theme(
    axis.text.x      = element_text(angle = 35, hjust = 1, face = "italic"),
    strip.background = element_rect(fill = "grey92", colour = NA),
    strip.text       = element_text(size = 8, face = "bold"),
    legend.position  = "none",
    panel.spacing    = unit(0.2, "cm")
  ) +
  labs(title = "A. Key gene expression",
       y = "% expressing", x = NULL)

# Panel B — Checkpoint burden violin
pB <- cd8_active@meta.data %>%
  ggplot(aes(x = Tissue, y = checkpoint_burden, fill = Tissue)) +
  geom_violin(trim = FALSE, alpha = 0.8) +
  geom_boxplot(width = 0.15, fill = "white", outlier.size = 0.3) +
  scale_fill_manual(values = tissue_cols) +
  theme_classic(base_size = 10) +
  theme(legend.position = "none") +
  annotate("text", x = 1.5,
           y = max(cd8_active$checkpoint_burden, na.rm = TRUE) * 0.95,
           label = paste0("p = ", signif(chi_res$p.value, 2)),
           size = 3) +
  labs(title = "B. Checkpoint burden",
       subtitle = "Non-naive CD8",
       y = "Co-inhibitory receptors\nco-expressed per cell",
       x = NULL)

# Panel C — Per-cell checkpoint burden violin (all non-naive, more robust than Tex-only)
# Note: Tex-specific UCell comparison removed — only 15 CNS Tex cells (unreliable)
pC <- cd8_active@meta.data %>%
  ggplot(aes(x = Tissue, y = checkpoint_burden, fill = Tissue)) +
  geom_violin(trim = FALSE, alpha = 0.8) +
  geom_boxplot(width = 0.15, fill = "white", outlier.size = 0.3) +
  scale_fill_manual(values = tissue_cols) +
  theme_classic(base_size = 10) +
  theme(legend.position = "none") +
  annotate("text", x = 1.5,
           y = max(cd8_active$checkpoint_burden, na.rm = TRUE) * 0.95,
           label = paste0("p = ", signif(chi_res$p.value, 2),
                          "\n(χ², ≥3 checkpoints)"),
           size = 3) +
  labs(title = "C. Checkpoint burden",
       subtitle = "Non-naive CD8 — trend corroborates flow\n(underpowered; n CNS=190)",
       y = "Co-inhibitory receptors\nco-expressed per cell",
       x = NULL)

pA | pB | pC

7. Statistical summary table

cat("=== STATISTICAL SUMMARY ===\n\n")

## === STATISTICAL SUMMARY ===

cat("--- DE statistics (non-naive CD8, CNS vs BM) ---\n")

## --- DE statistics (non-naive CD8, CNS vs BM) ---

de_cd8 %>%
  filter(gene %in% c(retention, suppression, survival,
                      exhaustion[1:3], effector[1:4]),
         p_val_adj < 0.05) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  mutate(
    direction = ifelse(avg_log2FC > 0, "CNS↑", "BM↑"),
    module    = case_when(
      gene %in% retention   ~ "Retention",
      gene %in% suppression ~ "Suppression",
      gene %in% survival    ~ "Survival",
      gene %in% exhaustion  ~ "Exhaustion TF",
      gene %in% effector    ~ "Effector (lost)",
      TRUE                  ~ "Other"
    )
  ) %>%
  arrange(module, desc(abs(avg_log2FC))) %>%
  print()

##          gene avg_log2FC pct.1 pct.2    p_val_adj direction          module
## Ccl3     Ccl3 -3.4097432 0.332 0.517 9.010495e-03       BM↑ Effector (lost)
## Cx3cr1 Cx3cr1 -2.2907627 0.079 0.249 1.534438e-02       BM↑ Effector (lost)
## Id2       Id2  0.5892804 0.953 0.823 6.725999e-06      CNS↑   Exhaustion TF
## Itga1   Itga1  5.4316979 0.111 0.003 2.085989e-05      CNS↑       Retention
## Klf2     Klf2 -1.4080463 0.189 0.470 1.678487e-06       BM↑       Retention
## Cxcr6   Cxcr6  0.6877332 0.858 0.671 1.998938e-04      CNS↑       Retention
## Socs1   Socs1  1.5244681 0.321 0.138 1.445672e-03      CNS↑     Suppression
## Cish     Cish  1.3457616 0.416 0.182 1.152505e-04      CNS↑     Suppression
## Bcl2     Bcl2  1.3115126 0.563 0.301 2.020573e-06      CNS↑        Survival

cat("\n--- Checkpoint burden (non-naive CD8) ---\n")

## 
## --- Checkpoint burden (non-naive CD8) ---

cat(sprintf("BM:  %.1f%% ≥3 checkpoints (n=%d)\n",
            burden_summary$pct_high[1], burden_summary$n[1]))

## BM:  66.9% ≥3 checkpoints (n=362)

cat(sprintf("CNS: %.1f%% ≥3 checkpoints (n=%d)\n",
            burden_summary$pct_high[2], burden_summary$n[2]))

## CNS: 73.7% ≥3 checkpoints (n=190)

cat(sprintf("Chi-squared p = %.4f\n", chi_res$p.value))

## Chi-squared p = 0.1199

cat("Note: trend consistent with flow data but does not reach significance\n")

## Note: trend consistent with flow data but does not reach significance

cat("      with available cell numbers (CNS n=190 non-naive)\n\n")

##       with available cell numbers (CNS n=190 non-naive)

cat("--- Tex-specific comparisons ---\n")

## --- Tex-specific comparisons ---

cat("CNS Tex n =", sum(cd8$cd8_class_final == "CD8_Tex" &
                         cd8$Tissue == "CNS"), "\n")

## CNS Tex n = 15

cat("BM Tex  n =", sum(cd8$cd8_class_final == "CD8_Tex" &
                         cd8$Tissue == "BM"),  "\n")

## BM Tex  n = 99

cat("WARNING: CNS Tex too few cells (n=15) for reliable per-state comparisons.\n")

## WARNING: CNS Tex too few cells (n=15) for reliable per-state comparisons.

cat("         Tex-specific statistical tests not reported.\n")

##          Tex-specific statistical tests not reported.

cat("         Primary evidence: DE on pooled non-naive CD8 (n=362 BM, 190 CNS)\n")

##          Primary evidence: DE on pooled non-naive CD8 (n=362 BM, 190 CNS)

SUPPLEMENTARY: Cross-Population DE — CNS Dysfunction Programme is Stage-Specific

This section runs DE analyses within CD8_Naive and CD8_EffectorMemory subsets separately, then builds a cross-population summary showing which genes are consistently CNS-dysregulated across differentiation states.

The key hypothesis being tested: is the CNS dysfunction programme imprinted pre-activation (present in naive cells) or acquired post-activation (effector/exhausted only)?

S1. DE within CD8_Naive

cd8_naive <- subset(cd8, cells = colnames(cd8)[
  cd8$cd8_class_final == "CD8_Naive"
])

cat("CD8_Naive — BM:", sum(cd8_naive$Tissue == "BM"),
    "CNS:", sum(cd8_naive$Tissue == "CNS"), "\n")

## CD8_Naive — BM: 1610 CNS: 127

DefaultAssay(cd8_naive) <- "originalexp"
Idents(cd8_naive) <- "Tissue"

de_naive <- FindMarkers(
  cd8_naive,
  ident.1         = "CNS",
  ident.2         = "BM",
  min.pct         = 0.1,
  logfc.threshold = 0.2,
  test.use        = "wilcox"
)
de_naive$gene <- rownames(de_naive)
de_naive <- de_naive %>% filter(!gene %in% c("Igkv8-27", "Hexb"))

# Save
write.csv(de_naive, paste0(BASE_DIR, "16_DE_CD8_Naive_CNS_vs_BM.csv"))

cat("\n=== Significant CNS-enriched (Naive) ===\n")

## 
## === Significant CNS-enriched (Naive) ===

de_naive %>%
  filter(avg_log2FC > 0, p_val_adj < 0.05) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(20) %>% print()

##            gene avg_log2FC pct.1 pct.2    p_val_adj
## Tubb2a   Tubb2a  2.5322529 0.173 0.062 2.512123e-02
## Gm20400 Gm20400  2.2472479 0.205 0.055 1.567392e-06
## P2ry10   P2ry10  1.9727135 0.236 0.098 5.517051e-03
## Ifngr1   Ifngr1  1.6971483 0.748 0.404 1.013364e-18
## Cenpa     Cenpa  1.4593508 0.268 0.114 7.674045e-03
## Eno1       Eno1  1.2458947 0.488 0.257 7.006852e-05
## Gm20186 Gm20186  1.0865512 0.661 0.467 1.302930e-04
## Cblb       Cblb  1.0735975 0.583 0.381 4.378591e-03
## Emb         Emb  0.9861165 0.890 0.778 7.059731e-05
## Crlf3     Crlf3  0.9799556 0.843 0.642 1.163050e-07
## Ssh2       Ssh2  0.8054321 0.732 0.590 1.695829e-02
## Rsrp1     Rsrp1  0.7968474 0.937 0.792 3.504935e-08
## Gm2000   Gm2000  0.7033242 0.811 0.573 3.813668e-05
## Selenop Selenop  0.5950898 0.717 0.475 3.999458e-03
## Il7r       Il7r  0.5802482 0.874 0.773 1.873355e-02
## Rpl22     Rpl22  0.3740871 1.000 0.948 2.842694e-03
## Ubb         Ubb  0.3647927 0.976 0.967 1.918169e-03
## Rpl29     Rpl29  0.3397343 1.000 0.990 3.957375e-04
## Rpl24     Rpl24  0.3121637 0.992 0.986 1.964800e-02
## Eef1a1   Eef1a1  0.3118056 1.000 0.996 2.870242e-08

cat("\n=== Significant BM-enriched (Naive) ===\n")

## 
## === Significant BM-enriched (Naive) ===

de_naive %>%
  filter(avg_log2FC < 0, p_val_adj < 0.05) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(20) %>% print()

##              gene avg_log2FC pct.1 pct.2    p_val_adj
## Lamp1       Lamp1  -3.052206 0.039 0.263 5.398942e-04
## Alyref     Alyref  -2.610384 0.047 0.245 6.459482e-03
## Ccr9         Ccr9  -2.411585 0.087 0.301 3.493188e-03
## Cbx4         Cbx4  -2.383938 0.063 0.293 6.207037e-04
## Gm44174   Gm44174  -2.099778 0.118 0.371 1.865380e-04
## Tle5         Tle5  -2.030861 0.354 0.844 2.080934e-28
## Ppp1r14b Ppp1r14b  -2.028207 0.094 0.320 2.636669e-03
## Dusp2       Dusp2  -1.915718 0.157 0.420 5.340605e-05
## Pcbp1       Pcbp1  -1.731345 0.213 0.511 4.062716e-07
## Smap1       Smap1  -1.691617 0.126 0.330 1.264928e-02
## Dhx36       Dhx36  -1.605892 0.181 0.368 4.337792e-02
## Cd226       Cd226  -1.550974 0.236 0.450 5.547021e-03
## Pten         Pten  -1.532090 0.189 0.418 1.999356e-03
## Gnb1         Gnb1  -1.503779 0.197 0.432 9.870828e-04
## Cd8a         Cd8a  -1.461509 0.197 0.411 1.910702e-02
## Ppp2ca     Ppp2ca  -1.412073 0.228 0.480 3.152742e-04
## Pabpn1     Pabpn1  -1.374874 0.228 0.537 4.359942e-06
## Psip1       Psip1  -1.353518 0.433 0.681 2.009874e-08
## Cd8b1       Cd8b1  -1.312228 0.307 0.509 4.026721e-03
## Rac1         Rac1  -1.308222 0.307 0.611 1.755666e-07

S2. DE within CD8_EffectorMemory

cd8_em <- subset(cd8, cells = colnames(cd8)[
  cd8$cd8_class_final == "CD8_EffectorMemory"
])

cat("CD8_EffectorMemory — BM:", sum(cd8_em$Tissue == "BM"),
    "CNS:", sum(cd8_em$Tissue == "CNS"), "\n")

## CD8_EffectorMemory — BM: 246 CNS: 173

DefaultAssay(cd8_em) <- "originalexp"
Idents(cd8_em) <- "Tissue"

de_em <- FindMarkers(
  cd8_em,
  ident.1         = "CNS",
  ident.2         = "BM",
  min.pct         = 0.1,
  logfc.threshold = 0.2,
  test.use        = "wilcox"
)
de_em$gene <- rownames(de_em)
de_em <- de_em %>% filter(!gene %in% c("Igkv8-27", "Hexb"))

# Save
write.csv(de_em, paste0(BASE_DIR, "16_DE_CD8_EffectorMemory_CNS_vs_BM.csv"))

cat("\n=== Significant CNS-enriched (EffectorMemory) ===\n")

## 
## === Significant CNS-enriched (EffectorMemory) ===

de_em %>%
  filter(avg_log2FC > 0, p_val_adj < 0.05) %>%
  arrange(desc(avg_log2FC)) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(20) %>% print()

##          gene avg_log2FC pct.1 pct.2    p_val_adj
## Itga1   Itga1  4.9549745 0.116 0.004 6.906356e-03
## Actn2   Actn2  4.2993382 0.116 0.008 3.037135e-02
## Socs1   Socs1  1.5638079 0.324 0.118 2.566568e-03
## Bcl2     Bcl2  1.3390401 0.561 0.293 9.916415e-05
## Cish     Cish  1.3102973 0.405 0.163 3.560541e-03
## Pde3b   Pde3b  1.2289887 0.497 0.236 9.466770e-04
## Ltb       Ltb  1.0452177 0.659 0.467 4.353920e-02
## Cxcr6   Cxcr6  0.8751228 0.861 0.638 2.840978e-06
## Fth1     Fth1  0.7523288 0.971 0.915 9.373437e-08
## Id2       Id2  0.7169672 0.948 0.813 3.854075e-07
## Pdcd1   Pdcd1  0.7121066 0.786 0.589 2.125140e-02
## Shisa5 Shisa5  0.5198572 1.000 0.955 2.857072e-04
## Cd3g     Cd3g  0.3976863 0.983 0.976 3.042966e-02

cat("\n=== Significant BM-enriched (EffectorMemory) ===\n")

## 
## === Significant BM-enriched (EffectorMemory) ===

de_em %>%
  filter(avg_log2FC < 0, p_val_adj < 0.05) %>%
  arrange(avg_log2FC) %>%
  select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
  head(20) %>% print()

##            gene avg_log2FC pct.1 pct.2    p_val_adj
## Sap30l   Sap30l  -3.472871 0.017 0.183 4.021400e-03
## Cx3cr1   Cx3cr1  -2.796472 0.069 0.313 2.286927e-05
## Srsf9     Srsf9  -2.293137 0.046 0.228 1.119509e-02
## Smap1     Smap1  -2.274688 0.185 0.610 3.659858e-15
## Pabpn1   Pabpn1  -2.088035 0.139 0.443 2.335918e-07
## Chic2     Chic2  -2.030218 0.121 0.374 4.705441e-05
## Tle5       Tle5  -1.923783 0.462 0.821 2.007052e-23
## Pcbp1     Pcbp1  -1.897547 0.133 0.423 3.471154e-06
## Klf2       Klf2  -1.797944 0.202 0.622 4.736790e-13
## Rac1       Rac1  -1.772328 0.289 0.679 4.388464e-13
## Ppp2ca   Ppp2ca  -1.730241 0.179 0.508 3.248475e-08
## Txndc5   Txndc5  -1.597127 0.110 0.305 4.089413e-02
## Rap1b     Rap1b  -1.522393 0.543 0.841 1.479788e-16
## Bag1       Bag1  -1.508549 0.156 0.366 2.289025e-02
## Zdhhc18 Zdhhc18  -1.470444 0.162 0.407 1.440292e-03
## Gzma       Gzma  -1.406087 0.133 0.354 9.509290e-03
## Psip1     Psip1  -1.404431 0.277 0.577 2.973652e-06
## Grb2       Grb2  -1.347852 0.306 0.541 3.607127e-04
## Gnb1       Gnb1  -1.331263 0.272 0.492 3.873065e-03
## Arglu1   Arglu1  -1.296344 0.266 0.533 2.631924e-04

S3. Cross-population consistency table

# Key genes to track across all three DE analyses
track_genes <- c(
  # Retention
  "Itga1", "Cxcr6", "Klf2",
  # Suppression
  "Socs1", "Cish",
  # Survival / IFN axis
  "Bcl2", "Ifngr1",
  # Pre-activation brake (naive-specific)
  "Cblb", "Lck", "Cd8a",
  # Effector lost
  "Cx3cr1", "Gzmb", "Gzma", "Ccl3", "Ccl5",
  # Signalling GTPases (BM-enriched)
  "Rac1", "Rap1b", "Gnb1", "Grb2",
  # Exhaustion TF
  "Tox", "Nr4a1", "Id2", "Pdcd1"
)

# Build summary table
get_fc <- function(de_df, gene) {
  if (!gene %in% de_df$gene) return(NA_real_)
  de_df$avg_log2FC[de_df$gene == gene]
}

get_sig <- function(de_df, gene) {
  if (!gene %in% de_df$gene) return("")
  p <- de_df$p_val_adj[de_df$gene == gene]
  case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", TRUE ~ "ns")
}

cross_table <- data.frame(
  gene = track_genes,
  Naive_FC      = sapply(track_genes, get_fc,  de_df = de_naive),
  Naive_sig     = sapply(track_genes, get_sig, de_df = de_naive),
  EffMem_FC     = sapply(track_genes, get_fc,  de_df = de_em),
  EffMem_sig    = sapply(track_genes, get_sig, de_df = de_em),
  NonNaive_FC   = sapply(track_genes, get_fc,  de_df = de_cd8),
  NonNaive_sig  = sapply(track_genes, get_sig, de_df = de_cd8),
  stringsAsFactors = FALSE
) %>%
  mutate(
    n_sig = (Naive_sig %in% c("*","**","***")) +
            (EffMem_sig %in% c("*","**","***")) +
            (NonNaive_sig %in% c("*","**","***")),
    consistent = n_sig >= 2
  ) %>%
  arrange(desc(n_sig), desc(abs(NonNaive_FC)))

cat("=== Cross-population CNS vs BM consistency ===\n")

## === Cross-population CNS vs BM consistency ===

print(cross_table)

##          gene   Naive_FC Naive_sig  EffMem_FC EffMem_sig NonNaive_FC
## Rac1     Rac1 -1.3082220       *** -1.7723281        ***  -1.7658225
## Gnb1     Gnb1 -1.5037789       *** -1.3312627         **  -1.4112814
## Rap1b   Rap1b -0.9092306       *** -1.5223928        ***  -1.3417480
## Itga1   Itga1         NA            4.9549745         **   5.4316979
## Cx3cr1 Cx3cr1         NA           -2.7964719        ***  -2.2907627
## Socs1   Socs1  0.2981150        ns  1.5638079         **   1.5244681
## Klf2     Klf2         NA           -1.7979436        ***  -1.4080463
## Grb2     Grb2 -1.0354094        ns -1.3478516        ***  -1.3526159
## Cish     Cish         NA            1.3102973         **   1.3457616
## Bcl2     Bcl2         NA            1.3390401        ***   1.3115126
## Ifngr1 Ifngr1  1.6971483       ***  0.9510170         ns   1.2444885
## Ccl5     Ccl5         NA           -0.7002649        ***  -0.8036593
## Cxcr6   Cxcr6         NA            0.8751228        ***   0.6877332
## Id2       Id2 -0.2266070        ns  0.7169672        ***   0.5892804
## Ccl3     Ccl3         NA           -1.3539602         ns  -3.4097432
## Gzma     Gzma         NA           -1.4060865         **  -1.1232990
## Cd8a     Cd8a -1.4615091         *  0.7593601         ns   0.5917631
## Cblb     Cblb  1.0735975        **  0.2315359         ns          NA
## Lck       Lck -0.5480987       ***         NA                     NA
## Pdcd1   Pdcd1         NA            0.7121066          *          NA
## Tox       Tox         NA           -0.2750036         ns  -0.4715752
## Gzmb     Gzmb         NA                   NA                     NA
## Nr4a1   Nr4a1         NA                   NA                     NA
##        NonNaive_sig n_sig consistent
## Rac1            ***     3       TRUE
## Gnb1            ***     3       TRUE
## Rap1b           ***     3       TRUE
## Itga1           ***     2       TRUE
## Cx3cr1            *     2       TRUE
## Socs1            **     2       TRUE
## Klf2            ***     2       TRUE
## Grb2            ***     2       TRUE
## Cish            ***     2       TRUE
## Bcl2            ***     2       TRUE
## Ifngr1          ***     2       TRUE
## Ccl5            ***     2       TRUE
## Cxcr6           ***     2       TRUE
## Id2             ***     2       TRUE
## Ccl3             **     1      FALSE
## Gzma             ns     1      FALSE
## Cd8a             ns     1      FALSE
## Cblb                    1      FALSE
## Lck                     1      FALSE
## Pdcd1                   1      FALSE
## Tox              ns     0      FALSE
## Gzmb                    0      FALSE
## Nr4a1                   0      FALSE

cat("\n=== Genes significant in ≥2 of 3 comparisons ===\n")

## 
## === Genes significant in ≥2 of 3 comparisons ===

cross_table %>% filter(consistent) %>% print()

##          gene   Naive_FC Naive_sig  EffMem_FC EffMem_sig NonNaive_FC
## Rac1     Rac1 -1.3082220       *** -1.7723281        ***  -1.7658225
## Gnb1     Gnb1 -1.5037789       *** -1.3312627         **  -1.4112814
## Rap1b   Rap1b -0.9092306       *** -1.5223928        ***  -1.3417480
## Itga1   Itga1         NA            4.9549745         **   5.4316979
## Cx3cr1 Cx3cr1         NA           -2.7964719        ***  -2.2907627
## Socs1   Socs1  0.2981150        ns  1.5638079         **   1.5244681
## Klf2     Klf2         NA           -1.7979436        ***  -1.4080463
## Grb2     Grb2 -1.0354094        ns -1.3478516        ***  -1.3526159
## Cish     Cish         NA            1.3102973         **   1.3457616
## Bcl2     Bcl2         NA            1.3390401        ***   1.3115126
## Ifngr1 Ifngr1  1.6971483       ***  0.9510170         ns   1.2444885
## Ccl5     Ccl5         NA           -0.7002649        ***  -0.8036593
## Cxcr6   Cxcr6         NA            0.8751228        ***   0.6877332
## Id2       Id2 -0.2266070        ns  0.7169672        ***   0.5892804
##        NonNaive_sig n_sig consistent
## Rac1            ***     3       TRUE
## Gnb1            ***     3       TRUE
## Rap1b           ***     3       TRUE
## Itga1           ***     2       TRUE
## Cx3cr1            *     2       TRUE
## Socs1            **     2       TRUE
## Klf2            ***     2       TRUE
## Grb2            ***     2       TRUE
## Cish            ***     2       TRUE
## Bcl2            ***     2       TRUE
## Ifngr1          ***     2       TRUE
## Ccl5            ***     2       TRUE
## Cxcr6           ***     2       TRUE
## Id2             ***     2       TRUE

S4. Cross-population combined figure

# Genes significant in ≥2 comparisons — the consistent CNS programme
consistent_genes <- cross_table %>%
  filter(consistent) %>%
  pull(gene)

cat("Consistent genes (≥2/3 DE analyses):", 
    paste(consistent_genes, collapse = ", "), "\n")

## Consistent genes (≥2/3 DE analyses): Rac1, Gnb1, Rap1b, Itga1, Cx3cr1, Socs1, Klf2, Grb2, Cish, Bcl2, Ifngr1, Ccl5, Cxcr6, Id2

# Build long format FC table for plotting
fc_long <- cross_table %>%
  filter(gene %in% consistent_genes) %>%
  select(gene, Naive_FC, EffMem_FC, NonNaive_FC) %>%
  pivot_longer(
    cols      = c(Naive_FC, EffMem_FC, NonNaive_FC),
    names_to  = "Population",
    values_to = "log2FC"
  ) %>%
  mutate(
    Population = recode(Population,
      "Naive_FC"    = "CD8_Naive\n(n BM=1610, CNS=127)",
      "EffMem_FC"   = "CD8_EffectorMemory\n(n BM=246, CNS=173)",
      "NonNaive_FC" = "Non-naive pooled\n(n BM=362, CNS=190)"
    ),
    module = case_when(
      gene %in% c("Itga1", "Cxcr6", "Klf2")        ~ "Retention",
      gene %in% c("Socs1", "Cish")                  ~ "Suppression",
      gene %in% c("Bcl2", "Ifngr1")                 ~ "Survival/IFN",
      gene %in% c("Cblb", "Lck", "Cd8a")            ~ "Pre-activation\nbrake",
      gene %in% c("Cx3cr1","Gzmb","Gzma",
                   "Ccl3","Ccl5")                    ~ "Effector\n(lost)",
      gene %in% c("Rac1","Rap1b","Gnb1","Grb2")     ~ "Signalling\nGTPases",
      gene %in% c("Tox","Nr4a1","Id2","Pdcd1")      ~ "Exhaustion TF",
      TRUE                                           ~ "Other"
    ),
    sig = case_when(
      Population == "CD8_Naive\n(n BM=1610, CNS=127)" ~
        cross_table$Naive_sig[match(gene, cross_table$gene)],
      Population == "CD8_EffectorMemory\n(n BM=246, CNS=173)" ~
        cross_table$EffMem_sig[match(gene, cross_table$gene)],
      TRUE ~
        cross_table$NonNaive_sig[match(gene, cross_table$gene)]
    )
  ) %>%
  filter(!is.na(log2FC)) %>%
  mutate(
    module = factor(module, levels = c(
      "Pre-activation\nbrake", "Retention", "Suppression",
      "Survival/IFN", "Effector\n(lost)",
      "Signalling\nGTPases", "Exhaustion TF"
    ))
  )

# Dot plot: x = gene, y = population, size = |FC|, colour = direction
supp_fig <- ggplot(fc_long,
                   aes(x    = gene,
                       y    = Population,
                       size = abs(log2FC),
                       fill = log2FC)) +
  geom_point(shape = 21, colour = "white", stroke = 0.3) +
  geom_text(aes(label = sig), size = 2.8, vjust = -1.2, colour = "grey20") +
  facet_grid(. ~ module, scales = "free_x", space = "free_x") +
  scale_fill_gradientn(
    colours  = c("#2166ac", "white", "#b2182b"),
    values   = scales::rescale(c(-4, 0, 4)),
    limits   = c(-4, 4),
    oob      = scales::squish,
    name     = "log2FC\n(CNS vs BM)"
  ) +
  scale_size_continuous(range = c(2, 9), name = "|log2FC|") +
  theme_classic(base_size = 11) +
  theme(
    axis.text.x      = element_text(angle = 40, hjust = 1, face = "italic"),
    axis.text.y      = element_text(size = 9),
    strip.background = element_rect(fill = "grey92", colour = NA),
    strip.text       = element_text(size = 8, face = "bold"),
    panel.spacing    = unit(0.25, "cm"),
    legend.position  = "right"
  ) +
  labs(
    title    = "CNS dysfunction programme — consistency across CD8 differentiation states",
    subtitle = paste0(
      "Red = CNS-enriched; Blue = BM-enriched (lost in CNS)\n",
      "Stars = BH-adjusted significance: *** p<0.001, ** p<0.01, * p<0.05"
    ),
    x = NULL, y = NULL
  )

supp_fig

S5. Stage-specificity summary

cat("=== Stage-specific vs shared components ===\n\n")

## === Stage-specific vs shared components ===

naive_only <- cross_table %>%
  filter(Naive_sig %in% c("*","**","***"),
         !EffMem_sig %in% c("*","**","***"),
         !NonNaive_sig %in% c("*","**","***")) %>%
  pull(gene)

em_only <- cross_table %>%
  filter(!Naive_sig %in% c("*","**","***"),
         EffMem_sig %in% c("*","**","***"),
         !NonNaive_sig %in% c("*","**","***")) %>%
  pull(gene)

shared_all <- cross_table %>%
  filter(Naive_sig %in% c("*","**","***"),
         EffMem_sig %in% c("*","**","***"),
         NonNaive_sig %in% c("*","**","***")) %>%
  pull(gene)

em_and_nonnative <- cross_table %>%
  filter(!Naive_sig %in% c("*","**","***"),
         EffMem_sig %in% c("*","**","***"),
         NonNaive_sig %in% c("*","**","***")) %>%
  pull(gene)

cat("Significant in ALL THREE comparisons (core programme):\n")

## Significant in ALL THREE comparisons (core programme):

print(shared_all)

## [1] "Rac1"  "Gnb1"  "Rap1b"

cat("\nSignificant in EffectorMemory + NonNaive but NOT Naive",
    "(acquired post-activation):\n")

## 
## Significant in EffectorMemory + NonNaive but NOT Naive (acquired post-activation):

print(em_and_nonnative)

##  [1] "Itga1"  "Cx3cr1" "Socs1"  "Klf2"   "Grb2"   "Cish"   "Bcl2"   "Ccl5"  
##  [9] "Cxcr6"  "Id2"

cat("\nSignificant in Naive ONLY (pre-activation brake):\n")

## 
## Significant in Naive ONLY (pre-activation brake):

print(naive_only)

## [1] "Cd8a" "Cblb" "Lck"

cat("\nInterpretation:\n")

## 
## Interpretation:

cat("  Core programme (all states): shared retention/suppression/effector loss\n")

##   Core programme (all states): shared retention/suppression/effector loss

cat("  Post-activation acquired: Itga1, Socs1, Cish — imprinted on activated cells\n")

##   Post-activation acquired: Itga1, Socs1, Cish — imprinted on activated cells

cat("  Pre-activation brake: Cblb — dampens TCR signalling before any antigen exposure\n")

##   Pre-activation brake: Cblb — dampens TCR signalling before any antigen exposure

# Add final labels and scores to full immune object
cd8_meta <- cd8@meta.data %>%
  select(cd8_class_final, checkpoint_burden)
seu <- AddMetaData(seu, metadata = cd8_meta)

qs_save(cd8, paste0(BASE_DIR, "16_cd8_final.qs"))
qs_save(seu, paste0(BASE_DIR, "16_immune_final.qs"))

cat("Saved: 16_cd8_final.qs\n")

## Saved: 16_cd8_final.qs

cat("Saved: 16_immune_final.qs\n\n")

## Saved: 16_immune_final.qs

cat("=== Final CD8 annotation: cd8_class_final ===\n")

## === Final CD8 annotation: cd8_class_final ===

cat("  CD8_Naive            — validated naive (ProjecTILs)\n")

##   CD8_Naive            — validated naive (ProjecTILs)

cat("  CD8_EarlyActiv       — early activated\n")

##   CD8_EarlyActiv       — early activated

cat("  CD8_EffectorMemory   — effector memory\n")

##   CD8_EffectorMemory   — effector memory

cat("  CD8_Tex_intermediate — intermediate exhausted (Texint; original Tpex relabelled)\n")

##   CD8_Tex_intermediate — intermediate exhausted (Texint; original Tpex relabelled)

cat("  CD8_Tex              — terminally exhausted\n")

##   CD8_Tex              — terminally exhausted

cat("  [No Tpex]            — Tpex absent; confirmed by ProjecTILs + Tcf7 + flow\n\n")

##   [No Tpex]            — Tpex absent; confirmed by ProjecTILs + Tcf7 + flow

cat("Use 16_immune_final.qs as input for Script 17 (NicheNet).\n")

## Use 16_immune_final.qs as input for Script 17 (NicheNet).

Session Info

sessionInfo()

## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/London
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.6.3       pheatmap_1.0.13    ggrepel_0.9.6      RColorBrewer_1.1-3
##  [5] tibble_3.3.1       tidyr_1.3.2        UCell_2.10.1       qs2_0.1.7         
##  [9] patchwork_1.3.2    dplyr_1.2.0        ggplot2_4.0.2      Seurat_5.4.0      
## [13] SeuratObject_5.3.0 sp_2.2-1          
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.18.0           jsonlite_2.0.0             
##   [3] magrittr_2.0.4              spatstat.utils_3.2-2       
##   [5] farver_2.1.2                rmarkdown_2.30             
##   [7] zlibbioc_1.52.0             vctrs_0.7.1                
##   [9] ROCR_1.0-12                 spatstat.explore_3.7-0     
##  [11] rstatix_0.7.3               S4Arrays_1.6.0             
##  [13] htmltools_0.5.9             broom_1.0.12               
##  [15] BiocNeighbors_2.0.1         Formula_1.2-5              
##  [17] SparseArray_1.6.2           sass_0.4.10                
##  [19] sctransform_0.4.3           parallelly_1.46.1          
##  [21] KernSmooth_2.23-26          bslib_0.10.0               
##  [23] htmlwidgets_1.6.4           ica_1.0-3                  
##  [25] plyr_1.8.9                  plotly_4.12.0              
##  [27] zoo_1.8-15                  cachem_1.1.0               
##  [29] igraph_2.2.2                mime_0.13                  
##  [31] lifecycle_1.0.5             pkgconfig_2.0.3            
##  [33] Matrix_1.7-4                R6_2.6.1                   
##  [35] fastmap_1.2.0               GenomeInfoDbData_1.2.13    
##  [37] MatrixGenerics_1.18.1       fitdistrplus_1.2-6         
##  [39] future_1.69.0               shiny_1.13.0               
##  [41] digest_0.6.39               S4Vectors_0.44.0           
##  [43] tensor_1.5.1                RSpectra_0.16-2            
##  [45] irlba_2.3.7                 GenomicRanges_1.58.0       
##  [47] labeling_0.4.3              progressr_0.18.0           
##  [49] spatstat.sparse_3.1-0       httr_1.4.8                 
##  [51] polyclip_1.10-7             abind_1.4-8                
##  [53] compiler_4.4.2              withr_3.0.2                
##  [55] backports_1.5.0             S7_0.2.1                   
##  [57] BiocParallel_1.40.2         carData_3.0-6              
##  [59] fastDummies_1.7.5           ggsignif_0.6.4             
##  [61] MASS_7.3-65                 DelayedArray_0.32.0        
##  [63] tools_4.4.2                 lmtest_0.9-40              
##  [65] otel_0.2.0                  httpuv_1.6.16              
##  [67] future.apply_1.20.2         goftest_1.2-3              
##  [69] glue_1.8.0                  nlme_3.1-168               
##  [71] promises_1.5.0              grid_4.4.2                 
##  [73] Rtsne_0.17                  cluster_2.1.8.2            
##  [75] reshape2_1.4.5              generics_0.1.4             
##  [77] gtable_0.3.6                spatstat.data_3.1-9        
##  [79] data.table_1.18.2.1         utf8_1.2.6                 
##  [81] car_3.1-5                   XVector_0.46.0             
##  [83] stringfish_0.18.0           BiocGenerics_0.52.0        
##  [85] spatstat.geom_3.7-0         RcppAnnoy_0.0.23           
##  [87] RANN_2.6.2                  pillar_1.11.1              
##  [89] stringr_1.6.0               limma_3.62.2               
##  [91] spam_2.11-3                 RcppHNSW_0.6.0             
##  [93] later_1.4.8                 splines_4.4.2              
##  [95] lattice_0.22-9              survival_3.8-6             
##  [97] deldir_2.0-4                tidyselect_1.2.1           
##  [99] SingleCellExperiment_1.28.1 miniUI_0.1.2               
## [101] pbapply_1.7-4               knitr_1.51                 
## [103] gridExtra_2.3               IRanges_2.40.1             
## [105] SummarizedExperiment_1.36.0 scattermore_1.2            
## [107] stats4_4.4.2                xfun_0.56                  
## [109] Biobase_2.66.0              statmod_1.5.1              
## [111] matrixStats_1.5.0           UCSC.utils_1.2.0           
## [113] stringi_1.8.7               lazyeval_0.2.2             
## [115] yaml_2.3.12                 evaluate_1.0.5             
## [117] codetools_0.2-20            cli_3.6.5                  
## [119] uwot_0.2.4                  RcppParallel_5.1.11-2      
## [121] xtable_1.8-8                reticulate_1.45.0          
## [123] jquerylib_0.1.4             GenomeInfoDb_1.42.3        
## [125] dichromat_2.0-0.1           Rcpp_1.1.1                 
## [127] globals_0.19.1              spatstat.random_3.4-4      
## [129] png_0.1-8                   spatstat.univar_3.1-6      
## [131] parallel_4.4.2              presto_1.0.0               
## [133] dotCall64_1.2               listenv_0.10.1             
## [135] viridisLite_0.4.3           scales_1.4.0               
## [137] ggridges_0.5.7              crayon_1.5.3               
## [139] purrr_1.2.1                 rlang_1.1.7                
## [141] cowplot_1.2.0

Script 16: CD8 T Cell Dysfunction in the B-ALL CNS Niche

B-ALL CNS Niche Project

2026-03-15