This script characterises CD8 T cell dysfunction in the B-ALL CNS niche. The central finding from Script 15b and unbiased DE is that the dominant biology is not deeper exhaustion of Tex cells, but rather a distinct CNS-specific programme of retention, intrinsic signalling suppression, and survival maintenance — with effector function lost.
Tissue retention — Itga1 (VLA-1; 5.4× CNS-enriched, p=2×10⁻⁵), Cxcr6 (CXCL16 receptor), loss of Klf2 (S1PR1 egress regulator)
Intrinsic signalling suppression — Socs1 (JAK-STAT suppressor) and Cish (TCR signalling suppressor) upregulated in CNS; cell-autonomous brake above and beyond checkpoint receptor expression
Survival without function — Bcl2 upregulated; effector molecules (Gzmb, Gzmk, Cx3cr1, Ccl3) lost in CNS
Absent Tpex — Tcf7+ progenitor pool absent; PD-1 reinvigoration has no substrate in the CNS niche
CD8_Tpex_unvalidated (from Script 15b) → renamed
CD8_Tex_intermediate throughout this
script. These cells are Tcf7-negative, checkpoint-expressing, and score
highest on the Beltra Texint (intermediate exhausted) signature.
library(Seurat)
library(ggplot2)
library(dplyr)
library(patchwork)
library(qs2)
library(UCell)
library(tidyr)
library(tibble)
library(RColorBrewer)
library(ggrepel)
library(pheatmap)
library(ggpubr)
BASE_DIR <- "/Volumes/Al_seq/scratch_backup/harmony_clustering/"
tissue_cols <- c("BM" = "#b2182b", "CNS" = "#2166ac")
state_cols <- c(
"CD8_Naive" = "#74add1",
"CD8_Tex" = "#8c2d04",
"CD8_Tex_intermediate" = "#f16913",
"CD8_EffectorMemory" = "#4dac26",
"CD8_EarlyActiv" = "#fdae6b",
"other" = "grey80"
)
module_cols <- c(
"Tissue retention" = "#1a9641",
"Signalling suppression" = "#d73027",
"Survival" = "#984ea3",
"Exhaustion TF" = "#e31a1c",
"Checkpoint receptors" = "#f16913",
"Effector (CNS-lost)" = "#2166ac",
"Stemness (absent)" = "#74add1"
)
seu <- qs_read(paste0(BASE_DIR, "15b_immune_cd8_revised.qs"))
cd8 <- qs_read(paste0(BASE_DIR, "15b_cd8_projected.qs"))
DefaultAssay(cd8) <- "originalexp"
# Rename CD8_Tpex_unvalidated → CD8_Tex_intermediate
cd8$cd8_class_final <- dplyr::recode(
cd8$cd8_class_corrected,
"CD8_Tpex_unvalidated" = "CD8_Tex_intermediate"
)
seu$cd8_class_final <- dplyr::recode(
seu$cd8_class_corrected,
"CD8_Tpex_unvalidated" = "CD8_Tex_intermediate"
)
# Non-naive working subset
cd8_active <- subset(cd8, cells = colnames(cd8)[
cd8$cd8_class_final %in%
c("CD8_Tex", "CD8_EffectorMemory",
"CD8_EarlyActiv", "CD8_Tex_intermediate")
])
cat("=== Final CD8 classification ===\n")
## === Final CD8 classification ===
print(table(cd8$cd8_class_final, cd8$Tissue))
##
## BM CNS
## CD8_EarlyActiv 13 2
## CD8_EffectorMemory 246 173
## CD8_Naive 1610 127
## CD8_Tex 99 15
## CD8_Tex_intermediate 4 0
## other 44 8
cat("\n=== Non-naive cells for analysis ===\n")
##
## === Non-naive cells for analysis ===
print(table(cd8_active$cd8_class_final, cd8_active$Tissue))
##
## BM CNS
## CD8_EarlyActiv 13 2
## CD8_EffectorMemory 246 173
## CD8_Tex 99 15
## CD8_Tex_intermediate 4 0
de_file <- paste0(BASE_DIR, "16_DE_CD8_CNS_vs_BM.csv")
if (file.exists(de_file)) {
de_cd8 <- read.csv(de_file, row.names = 1)
de_cd8$gene <- rownames(de_cd8)
cat("DE results loaded from file:", nrow(de_cd8), "genes\n")
} else {
cat("Running DE analysis...\n")
DefaultAssay(cd8_active) <- "originalexp"
Idents(cd8_active) <- "Tissue"
de_cd8 <- FindMarkers(
cd8_active,
ident.1 = "CNS",
ident.2 = "BM",
min.pct = 0.1,
logfc.threshold = 0.2,
test.use = "wilcox"
)
de_cd8$gene <- rownames(de_cd8)
write.csv(de_cd8, de_file)
cat("DE complete:", nrow(de_cd8), "genes tested\n")
}
## DE results loaded from file: 3319 genes
# Remove known contaminants
de_cd8 <- de_cd8 %>%
filter(!gene %in% c("Igkv8-27", "Hexb"))
cat("\n=== Significant DE genes ===\n")
##
## === Significant DE genes ===
cat("CNS-enriched (p<0.05, FC>0.2):",
sum(de_cd8$avg_log2FC > 0.2 & de_cd8$p_val_adj < 0.05), "\n")
## CNS-enriched (p<0.05, FC>0.2): 16
cat("BM-enriched (p<0.05, FC>0.2):",
sum(de_cd8$avg_log2FC < -0.2 & de_cd8$p_val_adj < 0.05), "\n")
## BM-enriched (p<0.05, FC>0.2): 29
checkpoint_genes <- intersect(
c("Pdcd1", "Havcr2", "Lag3", "Tigit",
"Ctla4", "Cd244a", "Entpd1", "Cd160"),
rownames(cd8)
)
expr_mat <- GetAssayData(cd8, layer = "data")[checkpoint_genes, ]
cd8$checkpoint_burden <- colSums((expr_mat > 0) * 1)
# Add to active subset
cd8_active$checkpoint_burden <- cd8$checkpoint_burden[
colnames(cd8_active)
]
# ── Define key genes per programme ──────────────────────────────────────────
retention <- c("Itga1", "Cxcr6", "Klf2")
suppression <- c("Socs1", "Cish")
survival <- c("Bcl2")
exhaustion <- c("Nr4a1", "Nr4a2", "Id2", "Tox", "Prdm1")
effector <- c("Cx3cr1", "Ccl3", "Gzmb", "Gzmk", "Prf1", "Klrg1", "Ifng")
receptor <- c("Ifngr1", "Ltb")
de_volcano <- de_cd8 %>%
mutate(
sig = p_val_adj < 0.05 & abs(avg_log2FC) > 0.2,
category = case_when(
gene %in% retention & avg_log2FC > 0 ~ "Tissue retention (CNS↑)",
gene %in% suppression & avg_log2FC > 0 ~ "Signalling suppression (CNS↑)",
gene %in% survival & avg_log2FC > 0 ~ "Survival (CNS↑)",
gene %in% exhaustion & avg_log2FC > 0 ~ "Exhaustion TF (CNS↑)",
gene %in% effector & avg_log2FC < 0 ~ "Effector function (BM↑/CNS lost)",
avg_log2FC > 0.2 & p_val_adj < 0.05 ~ "Other CNS-enriched",
avg_log2FC < -0.2 & p_val_adj < 0.05 ~ "Other BM-enriched",
TRUE ~ "ns"
),
label = case_when(
gene %in% c(retention, suppression, survival,
exhaustion, effector, receptor) & sig ~ gene,
TRUE ~ NA_character_
)
)
volcano_cols <- c(
"Tissue retention (CNS↑)" = "#1a9641",
"Signalling suppression (CNS↑)" = "#d73027",
"Survival (CNS↑)" = "#984ea3",
"Exhaustion TF (CNS↑)" = "#e31a1c",
"Effector function (BM↑/CNS lost)" = "#2166ac",
"Other CNS-enriched" = "#fc8d59",
"Other BM-enriched" = "#91bfdb",
"ns" = "grey85"
)
fig1 <- ggplot(de_volcano,
aes(x = avg_log2FC,
y = -log10(p_val_adj + 1e-300),
colour = category)) +
geom_point(aes(size = ifelse(category == "ns", 0.3, 0.7)),
alpha = 0.75) +
geom_label_repel(
aes(label = label),
size = 3.2,
fontface = "italic",
box.padding = 0.45,
point.padding = 0.3,
label.size = 0.2,
max.overlaps = 25,
na.rm = TRUE,
show.legend = FALSE
) +
geom_vline(xintercept = c(-0.2, 0.2),
linetype = "dashed", colour = "grey50", linewidth = 0.4) +
geom_hline(yintercept = -log10(0.05),
linetype = "dashed", colour = "grey50", linewidth = 0.4) +
scale_colour_manual(values = volcano_cols) +
scale_size_identity() +
theme_classic(base_size = 12) +
theme(
legend.position = "right",
legend.text = element_text(size = 9),
legend.key.size = unit(0.45, "cm"),
plot.subtitle = element_text(size = 9, colour = "grey30")
) +
labs(
title = "CNS vs BM — non-naive CD8 T cells",
subtitle = paste0(
"Retention: Itga1↑ Cxcr6↑ Klf2↓ | ",
"Suppression: Socs1↑ Cish↑ | ",
"Survival: Bcl2↑ | ",
"Effector loss: Cx3cr1↓ Gzmb↓ Ccl3↓"
),
x = "Average log2FC (CNS vs BM)",
y = "-log10 adjusted p-value",
colour = "Functional category"
)
fig1
key_genes <- c(
# Effector lost — BM > CNS
"Ccl3", "Cx3cr1", "Gzmb",
# Exhaustion TF — CNS > BM
"Nr4a1", "Prdm1", "Tox",
# Retention — CNS > BM
"Cxcr6", "Itga1", "Klf2",
# Suppression — CNS > BM
"Cish", "Socs1",
# Survival — CNS > BM
"Bcl2"
)
key_genes <- intersect(key_genes, rownames(cd8_active))
module_map <- c(
"Ccl3" = "Effector (lost)", "Cx3cr1" = "Effector (lost)",
"Gzmb" = "Effector (lost)",
"Nr4a1" = "Exhaustion TF", "Prdm1" = "Exhaustion TF",
"Tox" = "Exhaustion TF",
"Cxcr6" = "Retention", "Itga1" = "Retention", "Klf2" = "Retention",
"Cish" = "Suppression", "Socs1" = "Suppression",
"Bcl2" = "Survival"
)
# Compute % expressing and run Wilcoxon per gene
gene_stats <- lapply(key_genes, function(g) {
expr <- GetAssayData(cd8_active, layer = "data")[g, ]
meta <- cd8_active@meta.data %>%
mutate(expr = expr, pos = expr > 0)
pct_df <- meta %>%
group_by(Tissue) %>%
summarise(pct = round(mean(pos) * 100, 1),
n = n(),
.groups = "drop") %>%
mutate(gene = g,
module = module_map[g])
# Wilcoxon test CNS vs BM on raw expression
bm_e <- expr[cd8_active$Tissue == "BM"]
cns_e <- expr[cd8_active$Tissue == "CNS"]
wt <- wilcox.test(cns_e, bm_e)
pct_df$p_val <- wt$p.value
pct_df$p_adj <- p.adjust(wt$p.value, method = "BH") # corrected below
pct_df
}) %>% bind_rows()
# Apply BH correction across all genes
p_vals <- gene_stats %>%
distinct(gene, p_val) %>%
mutate(p_adj_global = p.adjust(p_val, method = "BH"))
gene_stats <- gene_stats %>%
left_join(p_vals %>% select(gene, p_adj_global), by = "gene") %>%
mutate(
sig_label = case_when(
p_adj_global < 0.001 ~ "***",
p_adj_global < 0.01 ~ "**",
p_adj_global < 0.05 ~ "*",
TRUE ~ "ns"
)
)
cat("=== Gene-level statistics ===\n")
## === Gene-level statistics ===
gene_stats %>%
distinct(gene, module, p_val, p_adj_global, sig_label) %>%
arrange(module, p_adj_global) %>%
print()
## # A tibble: 12 × 5
## gene module p_val p_adj_global sig_label
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Ccl3 Effector (lost) 3.38e- 7 5.79e- 7 ***
## 2 Cx3cr1 Effector (lost) 5.75e- 7 8.62e- 7 ***
## 3 Gzmb Effector (lost) 4.53e- 1 4.94e- 1 ns
## 4 Tox Exhaustion TF 5.17e- 3 6.89e- 3 **
## 5 Prdm1 Exhaustion TF 1.92e- 1 2.31e- 1 ns
## 6 Nr4a1 Exhaustion TF 7.38e- 1 7.38e- 1 ns
## 7 Klf2 Retention 6.29e-11 4.54e-10 ***
## 8 Itga1 Retention 7.81e-10 3.13e- 9 ***
## 9 Cxcr6 Retention 7.49e- 9 1.80e- 8 ***
## 10 Cish Suppression 4.32e- 9 1.30e- 8 ***
## 11 Socs1 Suppression 5.42e- 8 1.08e- 7 ***
## 12 Bcl2 Survival 7.57e-11 4.54e-10 ***
# Module order for faceting
gene_stats$module <- factor(
gene_stats$module,
levels = c("Effector (lost)", "Exhaustion TF",
"Retention", "Suppression", "Survival")
)
gene_stats$gene <- factor(gene_stats$gene, levels = key_genes)
# Significance annotation at CNS bar
sig_annot <- gene_stats %>%
filter(Tissue == "CNS") %>%
select(gene, module, pct, sig_label) %>%
mutate(y_pos = pct + 5)
module_fill_cols <- c(
"Effector (lost)" = "#2166ac",
"Exhaustion TF" = "#e31a1c",
"Retention" = "#1a9641",
"Suppression" = "#d73027",
"Survival" = "#984ea3"
)
fig2 <- ggplot(gene_stats,
aes(x = gene, y = pct, fill = Tissue)) +
geom_col(position = position_dodge(0.72), width = 0.65,
colour = "white") +
geom_text(data = sig_annot,
aes(x = gene, y = y_pos, label = sig_label,
fill = NULL),
size = 3.5, colour = "black",
position = position_nudge(x = 0.18)) +
facet_grid(. ~ module, scales = "free_x", space = "free_x") +
scale_fill_manual(values = tissue_cols) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 108)) +
theme_classic(base_size = 11) +
theme(
axis.text.x = element_text(angle = 40, hjust = 1, face = "italic"),
strip.background = element_rect(fill = "grey92", colour = NA),
strip.text = element_text(size = 9, face = "bold"),
panel.spacing = unit(0.3, "cm"),
legend.position = "right"
) +
labs(
title = "Key genes — non-naive CD8 T cells, CNS vs BM",
subtitle = "Stars = BH-adjusted Wilcoxon: *** p<0.001, ** p<0.01, * p<0.05",
y = "% cells expressing",
x = NULL,
fill = NULL
)
fig2
heatmap_genes <- c(
"Itga1", "Cxcr6", "Klf2", # Retention
"Socs1", "Cish", # Suppression
"Bcl2", # Survival
"Tox", "Nr4a1", "Nr4a2", "Prdm1", "Id2", # Exhaustion TF
"Pdcd1", "Havcr2", "Lag3", "Tigit", "Ctla4", "Entpd1", # Checkpoints
"Gzmb", "Gzmk", "Prf1", "Cx3cr1", "Ccl3", "Ifng", # Effector (lost)
"Tcf7", "Slamf6", "Il7r" # Stemness (absent)
)
heatmap_genes <- intersect(heatmap_genes, rownames(cd8))
cd8$state_tissue_clean <- paste0(cd8$cd8_class_final, "_", cd8$Tissue)
Idents(cd8) <- "state_tissue_clean"
avg_expr <- AverageExpression(
cd8,
features = heatmap_genes,
group.by = "state_tissue_clean",
layer = "data"
)$originalexp
avg_expr <- avg_expr[, colSums(avg_expr) > 0, drop = FALSE]
# Fix Seurat hyphen conversion
colnames(avg_expr) <- gsub("-", "_", colnames(avg_expr))
col_annot <- data.frame(
Tissue = ifelse(grepl("CNS", colnames(avg_expr)), "CNS", "BM"),
State = gsub("_BM$|_CNS$", "", colnames(avg_expr)),
row.names = colnames(avg_expr)
)
col_annot$Tissue <- as.character(col_annot$Tissue)
col_annot$State <- as.character(col_annot$State)
row_modules <- c(
rep("Tissue retention", 3),
rep("Signalling suppression", 2),
rep("Survival", 1),
rep("Exhaustion TF", 5),
rep("Checkpoint receptors", 6),
rep("Effector (CNS-lost)", 6),
rep("Stemness (absent)", 3)
)
row_modules <- row_modules[seq_along(heatmap_genes)]
row_annot <- data.frame(Module = as.character(row_modules),
row.names = heatmap_genes)
all_states <- sort(unique(col_annot$State))
state_palette <- c(
"CD8_Tex" = "#8c2d04",
"CD8_Tex_intermediate" = "#f16913",
"CD8_EffectorMemory" = "#4dac26",
"CD8_EarlyActiv" = "#fdae6b",
"CD8_Naive" = "#74add1",
"other" = "grey80"
)
missing_states <- setdiff(all_states, names(state_palette))
if (length(missing_states) > 0) {
state_palette <- c(state_palette,
setNames(rep("grey70", length(missing_states)),
missing_states))
}
annot_colours <- list(
Tissue = c("BM" = "#b2182b", "CNS" = "#2166ac"),
State = state_palette[all_states],
Module = module_cols[intersect(names(module_cols), unique(row_annot$Module))]
)
pheatmap(
avg_expr,
scale = "row",
color = colorRampPalette(
rev(RColorBrewer::brewer.pal(9, "RdBu")))(100),
cluster_rows = FALSE,
cluster_cols = TRUE,
annotation_col = col_annot,
annotation_row = row_annot,
annotation_colors = annot_colours,
fontsize_row = 9,
fontsize_col = 9,
angle_col = 45,
gaps_row = cumsum(c(3, 2, 1, 5, 6, 6)),
main = paste0(
"CD8 T cell dysfunction — functional gene modules × state × tissue\n",
"CD8_Tex_CNS: highest retention + suppression + exhaustion TF programme"
)
)
cd8$state_tissue_clean <- paste0(cd8$cd8_class_final, "_", cd8$Tissue)
Idents(cd8) <- "state_tissue_clean"
fig4 <- DotPlot(
cd8,
features = checkpoint_genes,
idents = c("CD8_Tex_BM", "CD8_Tex_CNS",
"CD8_Tex_intermediate_BM", "CD8_Tex_intermediate_CNS",
"CD8_EffectorMemory_BM", "CD8_EffectorMemory_CNS"),
cols = c("lightgrey", "#8c2d04"),
dot.scale = 9
) +
RotatedAxis() +
labs(
title = "Checkpoint receptor co-expression — exhausted CD8 states",
subtitle = "CD8_Tex_CNS shows broadest co-inhibitory receptor programme"
) +
theme(axis.text.y = element_text(size = 10))
fig4
# Wilcoxon test — checkpoint burden CNS vs BM (non-naive)
bm_burden <- cd8_active$checkpoint_burden[cd8_active$Tissue == "BM"]
cns_burden <- cd8_active$checkpoint_burden[cd8_active$Tissue == "CNS"]
wt_burden <- wilcox.test(cns_burden, bm_burden)
cat(sprintf("Checkpoint burden Wilcoxon p = %.4f\n", wt_burden$p.value))
## Checkpoint burden Wilcoxon p = 0.4457
# Proportion ≥3 checkpoints
burden_summary <- cd8_active@meta.data %>%
group_by(Tissue) %>%
summarise(
n = n(),
n_high = sum(checkpoint_burden >= 3),
pct_high = round(mean(checkpoint_burden >= 3) * 100, 1),
.groups = "drop"
)
cat("\n=== ≥3 checkpoints co-expressed (flow-equivalent triple+) ===\n")
##
## === ≥3 checkpoints co-expressed (flow-equivalent triple+) ===
print(burden_summary)
## # A tibble: 2 × 4
## Tissue n n_high pct_high
## <fct> <int> <int> <dbl>
## 1 BM 362 242 66.9
## 2 CNS 190 140 73.7
# Chi-squared test on ≥3 proportion
cont_table <- matrix(
c(burden_summary$n_high[burden_summary$Tissue == "BM"],
burden_summary$n[burden_summary$Tissue == "BM"] -
burden_summary$n_high[burden_summary$Tissue == "BM"],
burden_summary$n_high[burden_summary$Tissue == "CNS"],
burden_summary$n[burden_summary$Tissue == "CNS"] -
burden_summary$n_high[burden_summary$Tissue == "CNS"]),
nrow = 2,
dimnames = list(c("≥3", "<3"), c("BM", "CNS"))
)
chi_res <- chisq.test(cont_table)
cat(sprintf("\nChi-squared test (≥3 vs <3 checkpoints, CNS vs BM): p = %.4f\n",
chi_res$p.value))
##
## Chi-squared test (≥3 vs <3 checkpoints, CNS vs BM): p = 0.1199
# Distribution plot
burden_data <- cd8_active@meta.data %>%
group_by(Tissue, checkpoint_burden) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Tissue) %>%
mutate(pct = round(n / sum(n) * 100, 1))
fig5 <- ggplot(burden_data,
aes(x = factor(checkpoint_burden), y = pct, fill = Tissue)) +
geom_col(position = "dodge", colour = "white", width = 0.7) +
scale_fill_manual(values = tissue_cols) +
geom_vline(xintercept = 3.5, linetype = "dashed",
colour = "grey40", linewidth = 0.5) +
annotate("text", x = 4.3, y = max(burden_data$pct) * 0.88,
label = paste0("≥3 checkpoints\n",
"BM: ", burden_summary$pct_high[1], "% ",
"CNS: ", burden_summary$pct_high[2], "%\n",
"χ² p = ", signif(chi_res$p.value, 2)),
size = 3, colour = "grey20", hjust = 0) +
theme_classic(base_size = 12) +
labs(
title = "Co-inhibitory receptor co-expression burden",
subtitle = "Transcriptional correlate of flow PD-1+TIM-3+LAG-3+ (Figure 3E)",
x = "Number of checkpoints co-expressed per cell",
y = "% of non-naive CD8 T cells",
fill = NULL
)
fig5
# Panel A — Key genes (simplified: retention + suppression + effector loss)
pA_genes <- intersect(c("Itga1", "Cxcr6", "Klf2",
"Socs1", "Cish", "Gzmb", "Cx3cr1"),
rownames(cd8_active))
pA_module_map <- c(
"Itga1" = "Retention", "Cxcr6" = "Retention", "Klf2" = "Retention",
"Socs1" = "Suppression", "Cish" = "Suppression",
"Gzmb" = "Effector\n(lost)", "Cx3cr1" = "Effector\n(lost)"
)
pA_data <- lapply(pA_genes, function(g) {
expr <- GetAssayData(cd8_active, layer = "data")[g, ]
cd8_active@meta.data %>%
mutate(expr = expr, pos = expr > 0) %>%
group_by(Tissue) %>%
summarise(pct = round(mean(pos) * 100, 1), .groups = "drop") %>%
mutate(gene = g, module = pA_module_map[g])
}) %>% bind_rows() %>%
mutate(
module = factor(module, levels = c("Effector\n(lost)",
"Retention", "Suppression")),
gene = factor(gene, levels = pA_genes)
)
pA <- ggplot(pA_data, aes(x = gene, y = pct, fill = Tissue)) +
geom_col(position = position_dodge(0.72), width = 0.65, colour = "white") +
facet_grid(. ~ module, scales = "free_x", space = "free_x") +
scale_fill_manual(values = tissue_cols) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 105)) +
theme_classic(base_size = 10) +
theme(
axis.text.x = element_text(angle = 35, hjust = 1, face = "italic"),
strip.background = element_rect(fill = "grey92", colour = NA),
strip.text = element_text(size = 8, face = "bold"),
legend.position = "none",
panel.spacing = unit(0.2, "cm")
) +
labs(title = "A. Key gene expression",
y = "% expressing", x = NULL)
# Panel B — Checkpoint burden violin
pB <- cd8_active@meta.data %>%
ggplot(aes(x = Tissue, y = checkpoint_burden, fill = Tissue)) +
geom_violin(trim = FALSE, alpha = 0.8) +
geom_boxplot(width = 0.15, fill = "white", outlier.size = 0.3) +
scale_fill_manual(values = tissue_cols) +
theme_classic(base_size = 10) +
theme(legend.position = "none") +
annotate("text", x = 1.5,
y = max(cd8_active$checkpoint_burden, na.rm = TRUE) * 0.95,
label = paste0("p = ", signif(chi_res$p.value, 2)),
size = 3) +
labs(title = "B. Checkpoint burden",
subtitle = "Non-naive CD8",
y = "Co-inhibitory receptors\nco-expressed per cell",
x = NULL)
# Panel C — Per-cell checkpoint burden violin (all non-naive, more robust than Tex-only)
# Note: Tex-specific UCell comparison removed — only 15 CNS Tex cells (unreliable)
pC <- cd8_active@meta.data %>%
ggplot(aes(x = Tissue, y = checkpoint_burden, fill = Tissue)) +
geom_violin(trim = FALSE, alpha = 0.8) +
geom_boxplot(width = 0.15, fill = "white", outlier.size = 0.3) +
scale_fill_manual(values = tissue_cols) +
theme_classic(base_size = 10) +
theme(legend.position = "none") +
annotate("text", x = 1.5,
y = max(cd8_active$checkpoint_burden, na.rm = TRUE) * 0.95,
label = paste0("p = ", signif(chi_res$p.value, 2),
"\n(χ², ≥3 checkpoints)"),
size = 3) +
labs(title = "C. Checkpoint burden",
subtitle = "Non-naive CD8 — trend corroborates flow\n(underpowered; n CNS=190)",
y = "Co-inhibitory receptors\nco-expressed per cell",
x = NULL)
pA | pB | pC
cat("=== STATISTICAL SUMMARY ===\n\n")
## === STATISTICAL SUMMARY ===
cat("--- DE statistics (non-naive CD8, CNS vs BM) ---\n")
## --- DE statistics (non-naive CD8, CNS vs BM) ---
de_cd8 %>%
filter(gene %in% c(retention, suppression, survival,
exhaustion[1:3], effector[1:4]),
p_val_adj < 0.05) %>%
select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
mutate(
direction = ifelse(avg_log2FC > 0, "CNS↑", "BM↑"),
module = case_when(
gene %in% retention ~ "Retention",
gene %in% suppression ~ "Suppression",
gene %in% survival ~ "Survival",
gene %in% exhaustion ~ "Exhaustion TF",
gene %in% effector ~ "Effector (lost)",
TRUE ~ "Other"
)
) %>%
arrange(module, desc(abs(avg_log2FC))) %>%
print()
## gene avg_log2FC pct.1 pct.2 p_val_adj direction module
## Ccl3 Ccl3 -3.4097432 0.332 0.517 9.010495e-03 BM↑ Effector (lost)
## Cx3cr1 Cx3cr1 -2.2907627 0.079 0.249 1.534438e-02 BM↑ Effector (lost)
## Id2 Id2 0.5892804 0.953 0.823 6.725999e-06 CNS↑ Exhaustion TF
## Itga1 Itga1 5.4316979 0.111 0.003 2.085989e-05 CNS↑ Retention
## Klf2 Klf2 -1.4080463 0.189 0.470 1.678487e-06 BM↑ Retention
## Cxcr6 Cxcr6 0.6877332 0.858 0.671 1.998938e-04 CNS↑ Retention
## Socs1 Socs1 1.5244681 0.321 0.138 1.445672e-03 CNS↑ Suppression
## Cish Cish 1.3457616 0.416 0.182 1.152505e-04 CNS↑ Suppression
## Bcl2 Bcl2 1.3115126 0.563 0.301 2.020573e-06 CNS↑ Survival
cat("\n--- Checkpoint burden (non-naive CD8) ---\n")
##
## --- Checkpoint burden (non-naive CD8) ---
cat(sprintf("BM: %.1f%% ≥3 checkpoints (n=%d)\n",
burden_summary$pct_high[1], burden_summary$n[1]))
## BM: 66.9% ≥3 checkpoints (n=362)
cat(sprintf("CNS: %.1f%% ≥3 checkpoints (n=%d)\n",
burden_summary$pct_high[2], burden_summary$n[2]))
## CNS: 73.7% ≥3 checkpoints (n=190)
cat(sprintf("Chi-squared p = %.4f\n", chi_res$p.value))
## Chi-squared p = 0.1199
cat("Note: trend consistent with flow data but does not reach significance\n")
## Note: trend consistent with flow data but does not reach significance
cat(" with available cell numbers (CNS n=190 non-naive)\n\n")
## with available cell numbers (CNS n=190 non-naive)
cat("--- Tex-specific comparisons ---\n")
## --- Tex-specific comparisons ---
cat("CNS Tex n =", sum(cd8$cd8_class_final == "CD8_Tex" &
cd8$Tissue == "CNS"), "\n")
## CNS Tex n = 15
cat("BM Tex n =", sum(cd8$cd8_class_final == "CD8_Tex" &
cd8$Tissue == "BM"), "\n")
## BM Tex n = 99
cat("WARNING: CNS Tex too few cells (n=15) for reliable per-state comparisons.\n")
## WARNING: CNS Tex too few cells (n=15) for reliable per-state comparisons.
cat(" Tex-specific statistical tests not reported.\n")
## Tex-specific statistical tests not reported.
cat(" Primary evidence: DE on pooled non-naive CD8 (n=362 BM, 190 CNS)\n")
## Primary evidence: DE on pooled non-naive CD8 (n=362 BM, 190 CNS)
This section runs DE analyses within CD8_Naive and CD8_EffectorMemory subsets separately, then builds a cross-population summary showing which genes are consistently CNS-dysregulated across differentiation states.
The key hypothesis being tested: is the CNS dysfunction programme imprinted pre-activation (present in naive cells) or acquired post-activation (effector/exhausted only)?
cd8_naive <- subset(cd8, cells = colnames(cd8)[
cd8$cd8_class_final == "CD8_Naive"
])
cat("CD8_Naive — BM:", sum(cd8_naive$Tissue == "BM"),
"CNS:", sum(cd8_naive$Tissue == "CNS"), "\n")
## CD8_Naive — BM: 1610 CNS: 127
DefaultAssay(cd8_naive) <- "originalexp"
Idents(cd8_naive) <- "Tissue"
de_naive <- FindMarkers(
cd8_naive,
ident.1 = "CNS",
ident.2 = "BM",
min.pct = 0.1,
logfc.threshold = 0.2,
test.use = "wilcox"
)
de_naive$gene <- rownames(de_naive)
de_naive <- de_naive %>% filter(!gene %in% c("Igkv8-27", "Hexb"))
# Save
write.csv(de_naive, paste0(BASE_DIR, "16_DE_CD8_Naive_CNS_vs_BM.csv"))
cat("\n=== Significant CNS-enriched (Naive) ===\n")
##
## === Significant CNS-enriched (Naive) ===
de_naive %>%
filter(avg_log2FC > 0, p_val_adj < 0.05) %>%
arrange(desc(avg_log2FC)) %>%
select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
head(20) %>% print()
## gene avg_log2FC pct.1 pct.2 p_val_adj
## Tubb2a Tubb2a 2.5322529 0.173 0.062 2.512123e-02
## Gm20400 Gm20400 2.2472479 0.205 0.055 1.567392e-06
## P2ry10 P2ry10 1.9727135 0.236 0.098 5.517051e-03
## Ifngr1 Ifngr1 1.6971483 0.748 0.404 1.013364e-18
## Cenpa Cenpa 1.4593508 0.268 0.114 7.674045e-03
## Eno1 Eno1 1.2458947 0.488 0.257 7.006852e-05
## Gm20186 Gm20186 1.0865512 0.661 0.467 1.302930e-04
## Cblb Cblb 1.0735975 0.583 0.381 4.378591e-03
## Emb Emb 0.9861165 0.890 0.778 7.059731e-05
## Crlf3 Crlf3 0.9799556 0.843 0.642 1.163050e-07
## Ssh2 Ssh2 0.8054321 0.732 0.590 1.695829e-02
## Rsrp1 Rsrp1 0.7968474 0.937 0.792 3.504935e-08
## Gm2000 Gm2000 0.7033242 0.811 0.573 3.813668e-05
## Selenop Selenop 0.5950898 0.717 0.475 3.999458e-03
## Il7r Il7r 0.5802482 0.874 0.773 1.873355e-02
## Rpl22 Rpl22 0.3740871 1.000 0.948 2.842694e-03
## Ubb Ubb 0.3647927 0.976 0.967 1.918169e-03
## Rpl29 Rpl29 0.3397343 1.000 0.990 3.957375e-04
## Rpl24 Rpl24 0.3121637 0.992 0.986 1.964800e-02
## Eef1a1 Eef1a1 0.3118056 1.000 0.996 2.870242e-08
cat("\n=== Significant BM-enriched (Naive) ===\n")
##
## === Significant BM-enriched (Naive) ===
de_naive %>%
filter(avg_log2FC < 0, p_val_adj < 0.05) %>%
arrange(avg_log2FC) %>%
select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
head(20) %>% print()
## gene avg_log2FC pct.1 pct.2 p_val_adj
## Lamp1 Lamp1 -3.052206 0.039 0.263 5.398942e-04
## Alyref Alyref -2.610384 0.047 0.245 6.459482e-03
## Ccr9 Ccr9 -2.411585 0.087 0.301 3.493188e-03
## Cbx4 Cbx4 -2.383938 0.063 0.293 6.207037e-04
## Gm44174 Gm44174 -2.099778 0.118 0.371 1.865380e-04
## Tle5 Tle5 -2.030861 0.354 0.844 2.080934e-28
## Ppp1r14b Ppp1r14b -2.028207 0.094 0.320 2.636669e-03
## Dusp2 Dusp2 -1.915718 0.157 0.420 5.340605e-05
## Pcbp1 Pcbp1 -1.731345 0.213 0.511 4.062716e-07
## Smap1 Smap1 -1.691617 0.126 0.330 1.264928e-02
## Dhx36 Dhx36 -1.605892 0.181 0.368 4.337792e-02
## Cd226 Cd226 -1.550974 0.236 0.450 5.547021e-03
## Pten Pten -1.532090 0.189 0.418 1.999356e-03
## Gnb1 Gnb1 -1.503779 0.197 0.432 9.870828e-04
## Cd8a Cd8a -1.461509 0.197 0.411 1.910702e-02
## Ppp2ca Ppp2ca -1.412073 0.228 0.480 3.152742e-04
## Pabpn1 Pabpn1 -1.374874 0.228 0.537 4.359942e-06
## Psip1 Psip1 -1.353518 0.433 0.681 2.009874e-08
## Cd8b1 Cd8b1 -1.312228 0.307 0.509 4.026721e-03
## Rac1 Rac1 -1.308222 0.307 0.611 1.755666e-07
cd8_em <- subset(cd8, cells = colnames(cd8)[
cd8$cd8_class_final == "CD8_EffectorMemory"
])
cat("CD8_EffectorMemory — BM:", sum(cd8_em$Tissue == "BM"),
"CNS:", sum(cd8_em$Tissue == "CNS"), "\n")
## CD8_EffectorMemory — BM: 246 CNS: 173
DefaultAssay(cd8_em) <- "originalexp"
Idents(cd8_em) <- "Tissue"
de_em <- FindMarkers(
cd8_em,
ident.1 = "CNS",
ident.2 = "BM",
min.pct = 0.1,
logfc.threshold = 0.2,
test.use = "wilcox"
)
de_em$gene <- rownames(de_em)
de_em <- de_em %>% filter(!gene %in% c("Igkv8-27", "Hexb"))
# Save
write.csv(de_em, paste0(BASE_DIR, "16_DE_CD8_EffectorMemory_CNS_vs_BM.csv"))
cat("\n=== Significant CNS-enriched (EffectorMemory) ===\n")
##
## === Significant CNS-enriched (EffectorMemory) ===
de_em %>%
filter(avg_log2FC > 0, p_val_adj < 0.05) %>%
arrange(desc(avg_log2FC)) %>%
select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
head(20) %>% print()
## gene avg_log2FC pct.1 pct.2 p_val_adj
## Itga1 Itga1 4.9549745 0.116 0.004 6.906356e-03
## Actn2 Actn2 4.2993382 0.116 0.008 3.037135e-02
## Socs1 Socs1 1.5638079 0.324 0.118 2.566568e-03
## Bcl2 Bcl2 1.3390401 0.561 0.293 9.916415e-05
## Cish Cish 1.3102973 0.405 0.163 3.560541e-03
## Pde3b Pde3b 1.2289887 0.497 0.236 9.466770e-04
## Ltb Ltb 1.0452177 0.659 0.467 4.353920e-02
## Cxcr6 Cxcr6 0.8751228 0.861 0.638 2.840978e-06
## Fth1 Fth1 0.7523288 0.971 0.915 9.373437e-08
## Id2 Id2 0.7169672 0.948 0.813 3.854075e-07
## Pdcd1 Pdcd1 0.7121066 0.786 0.589 2.125140e-02
## Shisa5 Shisa5 0.5198572 1.000 0.955 2.857072e-04
## Cd3g Cd3g 0.3976863 0.983 0.976 3.042966e-02
cat("\n=== Significant BM-enriched (EffectorMemory) ===\n")
##
## === Significant BM-enriched (EffectorMemory) ===
de_em %>%
filter(avg_log2FC < 0, p_val_adj < 0.05) %>%
arrange(avg_log2FC) %>%
select(gene, avg_log2FC, pct.1, pct.2, p_val_adj) %>%
head(20) %>% print()
## gene avg_log2FC pct.1 pct.2 p_val_adj
## Sap30l Sap30l -3.472871 0.017 0.183 4.021400e-03
## Cx3cr1 Cx3cr1 -2.796472 0.069 0.313 2.286927e-05
## Srsf9 Srsf9 -2.293137 0.046 0.228 1.119509e-02
## Smap1 Smap1 -2.274688 0.185 0.610 3.659858e-15
## Pabpn1 Pabpn1 -2.088035 0.139 0.443 2.335918e-07
## Chic2 Chic2 -2.030218 0.121 0.374 4.705441e-05
## Tle5 Tle5 -1.923783 0.462 0.821 2.007052e-23
## Pcbp1 Pcbp1 -1.897547 0.133 0.423 3.471154e-06
## Klf2 Klf2 -1.797944 0.202 0.622 4.736790e-13
## Rac1 Rac1 -1.772328 0.289 0.679 4.388464e-13
## Ppp2ca Ppp2ca -1.730241 0.179 0.508 3.248475e-08
## Txndc5 Txndc5 -1.597127 0.110 0.305 4.089413e-02
## Rap1b Rap1b -1.522393 0.543 0.841 1.479788e-16
## Bag1 Bag1 -1.508549 0.156 0.366 2.289025e-02
## Zdhhc18 Zdhhc18 -1.470444 0.162 0.407 1.440292e-03
## Gzma Gzma -1.406087 0.133 0.354 9.509290e-03
## Psip1 Psip1 -1.404431 0.277 0.577 2.973652e-06
## Grb2 Grb2 -1.347852 0.306 0.541 3.607127e-04
## Gnb1 Gnb1 -1.331263 0.272 0.492 3.873065e-03
## Arglu1 Arglu1 -1.296344 0.266 0.533 2.631924e-04
# Key genes to track across all three DE analyses
track_genes <- c(
# Retention
"Itga1", "Cxcr6", "Klf2",
# Suppression
"Socs1", "Cish",
# Survival / IFN axis
"Bcl2", "Ifngr1",
# Pre-activation brake (naive-specific)
"Cblb", "Lck", "Cd8a",
# Effector lost
"Cx3cr1", "Gzmb", "Gzma", "Ccl3", "Ccl5",
# Signalling GTPases (BM-enriched)
"Rac1", "Rap1b", "Gnb1", "Grb2",
# Exhaustion TF
"Tox", "Nr4a1", "Id2", "Pdcd1"
)
# Build summary table
get_fc <- function(de_df, gene) {
if (!gene %in% de_df$gene) return(NA_real_)
de_df$avg_log2FC[de_df$gene == gene]
}
get_sig <- function(de_df, gene) {
if (!gene %in% de_df$gene) return("")
p <- de_df$p_val_adj[de_df$gene == gene]
case_when(p < 0.001 ~ "***", p < 0.01 ~ "**", p < 0.05 ~ "*", TRUE ~ "ns")
}
cross_table <- data.frame(
gene = track_genes,
Naive_FC = sapply(track_genes, get_fc, de_df = de_naive),
Naive_sig = sapply(track_genes, get_sig, de_df = de_naive),
EffMem_FC = sapply(track_genes, get_fc, de_df = de_em),
EffMem_sig = sapply(track_genes, get_sig, de_df = de_em),
NonNaive_FC = sapply(track_genes, get_fc, de_df = de_cd8),
NonNaive_sig = sapply(track_genes, get_sig, de_df = de_cd8),
stringsAsFactors = FALSE
) %>%
mutate(
n_sig = (Naive_sig %in% c("*","**","***")) +
(EffMem_sig %in% c("*","**","***")) +
(NonNaive_sig %in% c("*","**","***")),
consistent = n_sig >= 2
) %>%
arrange(desc(n_sig), desc(abs(NonNaive_FC)))
cat("=== Cross-population CNS vs BM consistency ===\n")
## === Cross-population CNS vs BM consistency ===
print(cross_table)
## gene Naive_FC Naive_sig EffMem_FC EffMem_sig NonNaive_FC
## Rac1 Rac1 -1.3082220 *** -1.7723281 *** -1.7658225
## Gnb1 Gnb1 -1.5037789 *** -1.3312627 ** -1.4112814
## Rap1b Rap1b -0.9092306 *** -1.5223928 *** -1.3417480
## Itga1 Itga1 NA 4.9549745 ** 5.4316979
## Cx3cr1 Cx3cr1 NA -2.7964719 *** -2.2907627
## Socs1 Socs1 0.2981150 ns 1.5638079 ** 1.5244681
## Klf2 Klf2 NA -1.7979436 *** -1.4080463
## Grb2 Grb2 -1.0354094 ns -1.3478516 *** -1.3526159
## Cish Cish NA 1.3102973 ** 1.3457616
## Bcl2 Bcl2 NA 1.3390401 *** 1.3115126
## Ifngr1 Ifngr1 1.6971483 *** 0.9510170 ns 1.2444885
## Ccl5 Ccl5 NA -0.7002649 *** -0.8036593
## Cxcr6 Cxcr6 NA 0.8751228 *** 0.6877332
## Id2 Id2 -0.2266070 ns 0.7169672 *** 0.5892804
## Ccl3 Ccl3 NA -1.3539602 ns -3.4097432
## Gzma Gzma NA -1.4060865 ** -1.1232990
## Cd8a Cd8a -1.4615091 * 0.7593601 ns 0.5917631
## Cblb Cblb 1.0735975 ** 0.2315359 ns NA
## Lck Lck -0.5480987 *** NA NA
## Pdcd1 Pdcd1 NA 0.7121066 * NA
## Tox Tox NA -0.2750036 ns -0.4715752
## Gzmb Gzmb NA NA NA
## Nr4a1 Nr4a1 NA NA NA
## NonNaive_sig n_sig consistent
## Rac1 *** 3 TRUE
## Gnb1 *** 3 TRUE
## Rap1b *** 3 TRUE
## Itga1 *** 2 TRUE
## Cx3cr1 * 2 TRUE
## Socs1 ** 2 TRUE
## Klf2 *** 2 TRUE
## Grb2 *** 2 TRUE
## Cish *** 2 TRUE
## Bcl2 *** 2 TRUE
## Ifngr1 *** 2 TRUE
## Ccl5 *** 2 TRUE
## Cxcr6 *** 2 TRUE
## Id2 *** 2 TRUE
## Ccl3 ** 1 FALSE
## Gzma ns 1 FALSE
## Cd8a ns 1 FALSE
## Cblb 1 FALSE
## Lck 1 FALSE
## Pdcd1 1 FALSE
## Tox ns 0 FALSE
## Gzmb 0 FALSE
## Nr4a1 0 FALSE
cat("\n=== Genes significant in ≥2 of 3 comparisons ===\n")
##
## === Genes significant in ≥2 of 3 comparisons ===
cross_table %>% filter(consistent) %>% print()
## gene Naive_FC Naive_sig EffMem_FC EffMem_sig NonNaive_FC
## Rac1 Rac1 -1.3082220 *** -1.7723281 *** -1.7658225
## Gnb1 Gnb1 -1.5037789 *** -1.3312627 ** -1.4112814
## Rap1b Rap1b -0.9092306 *** -1.5223928 *** -1.3417480
## Itga1 Itga1 NA 4.9549745 ** 5.4316979
## Cx3cr1 Cx3cr1 NA -2.7964719 *** -2.2907627
## Socs1 Socs1 0.2981150 ns 1.5638079 ** 1.5244681
## Klf2 Klf2 NA -1.7979436 *** -1.4080463
## Grb2 Grb2 -1.0354094 ns -1.3478516 *** -1.3526159
## Cish Cish NA 1.3102973 ** 1.3457616
## Bcl2 Bcl2 NA 1.3390401 *** 1.3115126
## Ifngr1 Ifngr1 1.6971483 *** 0.9510170 ns 1.2444885
## Ccl5 Ccl5 NA -0.7002649 *** -0.8036593
## Cxcr6 Cxcr6 NA 0.8751228 *** 0.6877332
## Id2 Id2 -0.2266070 ns 0.7169672 *** 0.5892804
## NonNaive_sig n_sig consistent
## Rac1 *** 3 TRUE
## Gnb1 *** 3 TRUE
## Rap1b *** 3 TRUE
## Itga1 *** 2 TRUE
## Cx3cr1 * 2 TRUE
## Socs1 ** 2 TRUE
## Klf2 *** 2 TRUE
## Grb2 *** 2 TRUE
## Cish *** 2 TRUE
## Bcl2 *** 2 TRUE
## Ifngr1 *** 2 TRUE
## Ccl5 *** 2 TRUE
## Cxcr6 *** 2 TRUE
## Id2 *** 2 TRUE
# Genes significant in ≥2 comparisons — the consistent CNS programme
consistent_genes <- cross_table %>%
filter(consistent) %>%
pull(gene)
cat("Consistent genes (≥2/3 DE analyses):",
paste(consistent_genes, collapse = ", "), "\n")
## Consistent genes (≥2/3 DE analyses): Rac1, Gnb1, Rap1b, Itga1, Cx3cr1, Socs1, Klf2, Grb2, Cish, Bcl2, Ifngr1, Ccl5, Cxcr6, Id2
# Build long format FC table for plotting
fc_long <- cross_table %>%
filter(gene %in% consistent_genes) %>%
select(gene, Naive_FC, EffMem_FC, NonNaive_FC) %>%
pivot_longer(
cols = c(Naive_FC, EffMem_FC, NonNaive_FC),
names_to = "Population",
values_to = "log2FC"
) %>%
mutate(
Population = recode(Population,
"Naive_FC" = "CD8_Naive\n(n BM=1610, CNS=127)",
"EffMem_FC" = "CD8_EffectorMemory\n(n BM=246, CNS=173)",
"NonNaive_FC" = "Non-naive pooled\n(n BM=362, CNS=190)"
),
module = case_when(
gene %in% c("Itga1", "Cxcr6", "Klf2") ~ "Retention",
gene %in% c("Socs1", "Cish") ~ "Suppression",
gene %in% c("Bcl2", "Ifngr1") ~ "Survival/IFN",
gene %in% c("Cblb", "Lck", "Cd8a") ~ "Pre-activation\nbrake",
gene %in% c("Cx3cr1","Gzmb","Gzma",
"Ccl3","Ccl5") ~ "Effector\n(lost)",
gene %in% c("Rac1","Rap1b","Gnb1","Grb2") ~ "Signalling\nGTPases",
gene %in% c("Tox","Nr4a1","Id2","Pdcd1") ~ "Exhaustion TF",
TRUE ~ "Other"
),
sig = case_when(
Population == "CD8_Naive\n(n BM=1610, CNS=127)" ~
cross_table$Naive_sig[match(gene, cross_table$gene)],
Population == "CD8_EffectorMemory\n(n BM=246, CNS=173)" ~
cross_table$EffMem_sig[match(gene, cross_table$gene)],
TRUE ~
cross_table$NonNaive_sig[match(gene, cross_table$gene)]
)
) %>%
filter(!is.na(log2FC)) %>%
mutate(
module = factor(module, levels = c(
"Pre-activation\nbrake", "Retention", "Suppression",
"Survival/IFN", "Effector\n(lost)",
"Signalling\nGTPases", "Exhaustion TF"
))
)
# Dot plot: x = gene, y = population, size = |FC|, colour = direction
supp_fig <- ggplot(fc_long,
aes(x = gene,
y = Population,
size = abs(log2FC),
fill = log2FC)) +
geom_point(shape = 21, colour = "white", stroke = 0.3) +
geom_text(aes(label = sig), size = 2.8, vjust = -1.2, colour = "grey20") +
facet_grid(. ~ module, scales = "free_x", space = "free_x") +
scale_fill_gradientn(
colours = c("#2166ac", "white", "#b2182b"),
values = scales::rescale(c(-4, 0, 4)),
limits = c(-4, 4),
oob = scales::squish,
name = "log2FC\n(CNS vs BM)"
) +
scale_size_continuous(range = c(2, 9), name = "|log2FC|") +
theme_classic(base_size = 11) +
theme(
axis.text.x = element_text(angle = 40, hjust = 1, face = "italic"),
axis.text.y = element_text(size = 9),
strip.background = element_rect(fill = "grey92", colour = NA),
strip.text = element_text(size = 8, face = "bold"),
panel.spacing = unit(0.25, "cm"),
legend.position = "right"
) +
labs(
title = "CNS dysfunction programme — consistency across CD8 differentiation states",
subtitle = paste0(
"Red = CNS-enriched; Blue = BM-enriched (lost in CNS)\n",
"Stars = BH-adjusted significance: *** p<0.001, ** p<0.01, * p<0.05"
),
x = NULL, y = NULL
)
supp_fig
cat("=== Stage-specific vs shared components ===\n\n")
## === Stage-specific vs shared components ===
naive_only <- cross_table %>%
filter(Naive_sig %in% c("*","**","***"),
!EffMem_sig %in% c("*","**","***"),
!NonNaive_sig %in% c("*","**","***")) %>%
pull(gene)
em_only <- cross_table %>%
filter(!Naive_sig %in% c("*","**","***"),
EffMem_sig %in% c("*","**","***"),
!NonNaive_sig %in% c("*","**","***")) %>%
pull(gene)
shared_all <- cross_table %>%
filter(Naive_sig %in% c("*","**","***"),
EffMem_sig %in% c("*","**","***"),
NonNaive_sig %in% c("*","**","***")) %>%
pull(gene)
em_and_nonnative <- cross_table %>%
filter(!Naive_sig %in% c("*","**","***"),
EffMem_sig %in% c("*","**","***"),
NonNaive_sig %in% c("*","**","***")) %>%
pull(gene)
cat("Significant in ALL THREE comparisons (core programme):\n")
## Significant in ALL THREE comparisons (core programme):
print(shared_all)
## [1] "Rac1" "Gnb1" "Rap1b"
cat("\nSignificant in EffectorMemory + NonNaive but NOT Naive",
"(acquired post-activation):\n")
##
## Significant in EffectorMemory + NonNaive but NOT Naive (acquired post-activation):
print(em_and_nonnative)
## [1] "Itga1" "Cx3cr1" "Socs1" "Klf2" "Grb2" "Cish" "Bcl2" "Ccl5"
## [9] "Cxcr6" "Id2"
cat("\nSignificant in Naive ONLY (pre-activation brake):\n")
##
## Significant in Naive ONLY (pre-activation brake):
print(naive_only)
## [1] "Cd8a" "Cblb" "Lck"
cat("\nInterpretation:\n")
##
## Interpretation:
cat(" Core programme (all states): shared retention/suppression/effector loss\n")
## Core programme (all states): shared retention/suppression/effector loss
cat(" Post-activation acquired: Itga1, Socs1, Cish — imprinted on activated cells\n")
## Post-activation acquired: Itga1, Socs1, Cish — imprinted on activated cells
cat(" Pre-activation brake: Cblb — dampens TCR signalling before any antigen exposure\n")
## Pre-activation brake: Cblb — dampens TCR signalling before any antigen exposure
# Add final labels and scores to full immune object
cd8_meta <- cd8@meta.data %>%
select(cd8_class_final, checkpoint_burden)
seu <- AddMetaData(seu, metadata = cd8_meta)
qs_save(cd8, paste0(BASE_DIR, "16_cd8_final.qs"))
qs_save(seu, paste0(BASE_DIR, "16_immune_final.qs"))
cat("Saved: 16_cd8_final.qs\n")
## Saved: 16_cd8_final.qs
cat("Saved: 16_immune_final.qs\n\n")
## Saved: 16_immune_final.qs
cat("=== Final CD8 annotation: cd8_class_final ===\n")
## === Final CD8 annotation: cd8_class_final ===
cat(" CD8_Naive — validated naive (ProjecTILs)\n")
## CD8_Naive — validated naive (ProjecTILs)
cat(" CD8_EarlyActiv — early activated\n")
## CD8_EarlyActiv — early activated
cat(" CD8_EffectorMemory — effector memory\n")
## CD8_EffectorMemory — effector memory
cat(" CD8_Tex_intermediate — intermediate exhausted (Texint; original Tpex relabelled)\n")
## CD8_Tex_intermediate — intermediate exhausted (Texint; original Tpex relabelled)
cat(" CD8_Tex — terminally exhausted\n")
## CD8_Tex — terminally exhausted
cat(" [No Tpex] — Tpex absent; confirmed by ProjecTILs + Tcf7 + flow\n\n")
## [No Tpex] — Tpex absent; confirmed by ProjecTILs + Tcf7 + flow
cat("Use 16_immune_final.qs as input for Script 17 (NicheNet).\n")
## Use 16_immune_final.qs as input for Script 17 (NicheNet).
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.6.3 pheatmap_1.0.13 ggrepel_0.9.6 RColorBrewer_1.1-3
## [5] tibble_3.3.1 tidyr_1.3.2 UCell_2.10.1 qs2_0.1.7
## [9] patchwork_1.3.2 dplyr_1.2.0 ggplot2_4.0.2 Seurat_5.4.0
## [13] SeuratObject_5.3.0 sp_2.2-1
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.18.0 jsonlite_2.0.0
## [3] magrittr_2.0.4 spatstat.utils_3.2-2
## [5] farver_2.1.2 rmarkdown_2.30
## [7] zlibbioc_1.52.0 vctrs_0.7.1
## [9] ROCR_1.0-12 spatstat.explore_3.7-0
## [11] rstatix_0.7.3 S4Arrays_1.6.0
## [13] htmltools_0.5.9 broom_1.0.12
## [15] BiocNeighbors_2.0.1 Formula_1.2-5
## [17] SparseArray_1.6.2 sass_0.4.10
## [19] sctransform_0.4.3 parallelly_1.46.1
## [21] KernSmooth_2.23-26 bslib_0.10.0
## [23] htmlwidgets_1.6.4 ica_1.0-3
## [25] plyr_1.8.9 plotly_4.12.0
## [27] zoo_1.8-15 cachem_1.1.0
## [29] igraph_2.2.2 mime_0.13
## [31] lifecycle_1.0.5 pkgconfig_2.0.3
## [33] Matrix_1.7-4 R6_2.6.1
## [35] fastmap_1.2.0 GenomeInfoDbData_1.2.13
## [37] MatrixGenerics_1.18.1 fitdistrplus_1.2-6
## [39] future_1.69.0 shiny_1.13.0
## [41] digest_0.6.39 S4Vectors_0.44.0
## [43] tensor_1.5.1 RSpectra_0.16-2
## [45] irlba_2.3.7 GenomicRanges_1.58.0
## [47] labeling_0.4.3 progressr_0.18.0
## [49] spatstat.sparse_3.1-0 httr_1.4.8
## [51] polyclip_1.10-7 abind_1.4-8
## [53] compiler_4.4.2 withr_3.0.2
## [55] backports_1.5.0 S7_0.2.1
## [57] BiocParallel_1.40.2 carData_3.0-6
## [59] fastDummies_1.7.5 ggsignif_0.6.4
## [61] MASS_7.3-65 DelayedArray_0.32.0
## [63] tools_4.4.2 lmtest_0.9-40
## [65] otel_0.2.0 httpuv_1.6.16
## [67] future.apply_1.20.2 goftest_1.2-3
## [69] glue_1.8.0 nlme_3.1-168
## [71] promises_1.5.0 grid_4.4.2
## [73] Rtsne_0.17 cluster_2.1.8.2
## [75] reshape2_1.4.5 generics_0.1.4
## [77] gtable_0.3.6 spatstat.data_3.1-9
## [79] data.table_1.18.2.1 utf8_1.2.6
## [81] car_3.1-5 XVector_0.46.0
## [83] stringfish_0.18.0 BiocGenerics_0.52.0
## [85] spatstat.geom_3.7-0 RcppAnnoy_0.0.23
## [87] RANN_2.6.2 pillar_1.11.1
## [89] stringr_1.6.0 limma_3.62.2
## [91] spam_2.11-3 RcppHNSW_0.6.0
## [93] later_1.4.8 splines_4.4.2
## [95] lattice_0.22-9 survival_3.8-6
## [97] deldir_2.0-4 tidyselect_1.2.1
## [99] SingleCellExperiment_1.28.1 miniUI_0.1.2
## [101] pbapply_1.7-4 knitr_1.51
## [103] gridExtra_2.3 IRanges_2.40.1
## [105] SummarizedExperiment_1.36.0 scattermore_1.2
## [107] stats4_4.4.2 xfun_0.56
## [109] Biobase_2.66.0 statmod_1.5.1
## [111] matrixStats_1.5.0 UCSC.utils_1.2.0
## [113] stringi_1.8.7 lazyeval_0.2.2
## [115] yaml_2.3.12 evaluate_1.0.5
## [117] codetools_0.2-20 cli_3.6.5
## [119] uwot_0.2.4 RcppParallel_5.1.11-2
## [121] xtable_1.8-8 reticulate_1.45.0
## [123] jquerylib_0.1.4 GenomeInfoDb_1.42.3
## [125] dichromat_2.0-0.1 Rcpp_1.1.1
## [127] globals_0.19.1 spatstat.random_3.4-4
## [129] png_0.1-8 spatstat.univar_3.1-6
## [131] parallel_4.4.2 presto_1.0.0
## [133] dotCall64_1.2 listenv_0.10.1
## [135] viridisLite_0.4.3 scales_1.4.0
## [137] ggridges_0.5.7 crayon_1.5.3
## [139] purrr_1.2.1 rlang_1.1.7
## [141] cowplot_1.2.0