Overview
This script regenerates all manuscript figures from
the saved projection object. The reference object is loaded
solely to extract UMAP coordinates for the grey
background.
Objects required
reference_integrated |
../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds |
UMAP background only — grey reference dots |
mapped_MalignantCD4T |
results/MalignantCD4T_Monocle3_projection/MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds |
All biology — pseudotime, labels, bins, colour
scales |
Key design
decisions
Pseudotime colour-scale limits (PT_LIMITS) |
mapped_MalignantCD4T$predicted.pseudotime |
| Bin boundaries |
Midpoints between actual reference state medians —
computed at runtime from ref_bg |
State medians (REF_MEDIANS) |
Computed at runtime from ref_bg — never
hard-coded |
| All bar charts, violins, density plots |
mapped_MalignantCD4T only |
Panel B fix
Previously called FeaturePlot(reference_integrated, ...)
without limits, producing a spurious 1.0–2.0 colour scale.
Fixed: Panel B plots Sézary cells coloured by
predicted.pseudotime with limits = PT_LIMITS
(correct 0–25 scale).
Bin design
Bins are anchored to actual reference state medians
computed from ref_bg. Boundaries = midpoint between
adjacent state medians (biology-principled, not arbitrary). Treg-like
bin is assigned by Azimuth label (Treg is a branch, not a linear
position).
1. Setup &
Libraries
suppressPackageStartupMessages({
library(Seurat)
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(patchwork)
library(scales)
library(RColorBrewer)
library(ggridges)
library(knitr)
library(kableExtra)
})
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'rmarkdown':
method from
print.paged_df
dir.create("Figures/PNG", recursive = TRUE, showWarnings = FALSE)
dir.create("Figures/PDF", recursive = TRUE, showWarnings = FALSE)
save_fig <- function(p, name, w = 10, h = 8) {
ggsave(file.path("Figures/PNG", paste0(name, ".png")),
p, width = w, height = h, dpi = 300, bg = "white")
ggsave(file.path("Figures/PDF", paste0(name, ".pdf")),
p, width = w, height = h, useDingbats = FALSE)
invisible(cat(sprintf(" \u2713 %s [PNG + PDF]\n", name)))
}
STATE_COLORS <- c(
"CD4 Naive" = "#4472C4",
"CD4 TCM" = "#70AD47",
"CD4 TEM" = "#ED7D31",
"CD4 Temra/CTL" = "#C00000",
"Treg" = "#7030A0"
)
BIN_COLORS <- c(
"Naive-like" = "#4472C4",
"TCM-like" = "#70AD47",
"TEM-like" = "#ED7D31",
"Temra-like" = "#C00000",
"Treg-like" = "#7030A0"
)
STATE_ORDER <- c("CD4 Naive", "CD4 TCM", "CD4 TEM", "CD4 Temra/CTL", "Treg")
BIN_ORDER <- c("Naive-like", "TCM-like", "TEM-like", "Temra-like", "Treg-like")
LINE_ORDER <- paste0("L", 1:7)
cat("Setup complete.\n")
Setup complete.
2. Load & Validate
Objects
ref_path <- paste0(
"../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/",
"1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds"
)
reference_integrated <- readRDS(ref_path)
cat("Reference loaded:", ncol(reference_integrated), "cells\n")
Reference loaded: 11466 cells
stopifnot(
"umap reduction missing from reference" =
"umap" %in% names(reference_integrated@reductions)
)
obj_path <- paste0(
"../results/MalignantCD4T_Monocle3_projection/",
"MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds"
)
mapped_MalignantCD4T <- readRDS(obj_path)
cat("Query loaded: ", ncol(mapped_MalignantCD4T), "cells\n")
Query loaded: 40695 cells
stopifnot(
"ref.umap reduction missing — re-run MapQuery" =
"ref.umap" %in% names(mapped_MalignantCD4T@reductions),
"predicted.pseudotime column missing" =
"predicted.pseudotime" %in% colnames(mapped_MalignantCD4T@meta.data),
"predicted.predicted.celltype.l2 column missing" =
"predicted.predicted.celltype.l2" %in% colnames(mapped_MalignantCD4T@meta.data)
)
if (!"cell_line" %in% colnames(mapped_MalignantCD4T@meta.data)) {
if ("orig.ident" %in% colnames(mapped_MalignantCD4T@meta.data)) {
mapped_MalignantCD4T$cell_line <- mapped_MalignantCD4T$orig.ident
cat("NOTE: using orig.ident as cell_line\n")
} else {
stop("Cannot find cell_line or orig.ident in metadata")
}
}
cat("\npredicted.pseudotime summary (expect Min~0, Max~25):\n")
predicted.pseudotime summary (expect Min~0, Max~25):
print(summary(mapped_MalignantCD4T$predicted.pseudotime))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.156 16.431 17.703 17.655 19.487 25.553
cat("\nTransferred label distribution:\n")
Transferred label distribution:
print(sort(table(mapped_MalignantCD4T$predicted.predicted.celltype.l2,
useNA = "ifany"), decreasing = TRUE))
CD4 TCM CD4 TEM Treg CD4 Naive
40291 304 79 21
cat("\nCell line sizes:\n")
Cell line sizes:
print(sort(table(mapped_MalignantCD4T$cell_line, useNA = "ifany")))
L6 L7 L1 L2 L4 L5 L3
5148 5331 5825 5935 6006 6022 6428
3. Build Reference
Background (ref_bg)
# ref_bg serves two purposes:
# 1. Grey background dots in all projection UMAPs
# 2. Source for computing actual reference state medians (REF_MEDIANS)
ref_bg <- data.frame(
Embeddings(reference_integrated, "umap"),
state = reference_integrated$predicted.celltype.l2,
pseudotime = reference_integrated$monocle3_pseudotime,
stringsAsFactors = FALSE
)
colnames(ref_bg)[1:2] <- c("UMAP_1", "UMAP_2")
ref_bg$state <- factor(ref_bg$state, levels = STATE_ORDER)
cat("Reference monocle3_pseudotime (should be 0-25):\n")
Reference monocle3_pseudotime (should be 0-25):
print(summary(ref_bg$pseudotime))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 3.855 7.404 9.926 15.525 25.356
ref_bg_plot <- ref_bg[sample(nrow(ref_bg)), ]
cat(sprintf("\nref_bg ready: %d reference cells\n", nrow(ref_bg)))
ref_bg ready: 11466 reference cells
4. Compute Reference
State Medians & Bin Boundaries
# ══════════════════════════════════════════════════════════════════════════
# REF_MEDIANS computed at runtime from actual ref_bg — NOT hard-coded.
#
# Diagnostic confirmed these actual values:
# CD4 Naive median = 3.293
# CD4 TCM median = 9.377 (was hard-coded as 13.36 — WRONG)
# CD4 TEM median = 24.840
# CD4 Temra/CTL median = 24.840 (only 10 cells)
# Treg median = 11.150 (was hard-coded as 18.75 — above Treg max of 17.40, WRONG)
#
# Bin boundaries derived as midpoints between adjacent state medians.
# ══════════════════════════════════════════════════════════════════════════
ref_state_medians <- ref_bg %>%
filter(!is.na(pseudotime), !is.na(state)) %>%
group_by(state) %>%
summarise(
n = n(),
med_pt = round(median(pseudotime, na.rm = TRUE), 3),
.groups = "drop"
) %>%
arrange(med_pt)
cat("=== Reference state medians (data-derived) ===\n")
=== Reference state medians (data-derived) ===
print(ref_state_medians)
get_med <- function(s) {
v <- ref_state_medians$med_pt[ref_state_medians$state == s]
if (length(v) == 0) stop(paste("State not found in ref_bg:", s))
v
}
naive_med <- get_med("CD4 Naive")
tcm_med <- get_med("CD4 TCM")
tem_med <- get_med("CD4 TEM")
temra_med <- get_med("CD4 Temra/CTL")
treg_med <- get_med("Treg")
BIN_NAIVE_TCM <- round((naive_med + tcm_med) / 2, 3)
BIN_TCM_TEM <- round((tcm_med + tem_med) / 2, 3)
BIN_TEM_TEMRA <- round((tem_med + temra_med) / 2, 3)
cat("\n=== Bin boundaries (midpoints between adjacent state medians) ===\n")
=== Bin boundaries (midpoints between adjacent state medians) ===
cat(sprintf(" Naive | TCM : %.3f\n", BIN_NAIVE_TCM))
Naive | TCM : 6.335
cat(sprintf(" TCM | TEM : %.3f\n", BIN_TCM_TEM))
TCM | TEM : 17.108
cat(sprintf(" TEM | Temra : %.3f\n", BIN_TEM_TEMRA))
TEM | Temra : 24.840
cat(sprintf(" Treg : label-based (branch logic)\n"))
Treg : label-based (branch logic)
if (abs(tem_med - temra_med) < 0.5) {
cat("\nNOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).\n")
cat(" Temra-like bin will be negligible — biologically expected.\n")
}
NOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).
Temra-like bin will be negligible — biologically expected.
REF_MEDIANS <- data.frame(
state = factor(STATE_ORDER, levels = STATE_ORDER),
med_pt = c(naive_med, tcm_med, tem_med, temra_med, treg_med)
)
cat("\n=== REF_MEDIANS (used for reference lines in all figures) ===\n")
=== REF_MEDIANS (used for reference lines in all figures) ===
print(REF_MEDIANS)
# ── REF_MEDIANS_EFFECTOR: Treg excluded ───────────────────────────────────
# Treg (median PT=11.15) and TCM (median PT=9.38) overlap entirely in
# pseudotime space — both states share the Naive→TCM graph segment.
# Showing a Treg line at PT=11.15 implies a pseudotime boundary that does
# not exist and misleads interpretation. Treg-like cells are assigned by
# transferred Azimuth label (KNN), not by pseudotime position.
REF_MEDIANS_EFFECTOR <- REF_MEDIANS %>%
filter(state != "Treg")
LINETYPE_EFFECTOR <- c(
"CD4 Naive" = "dotted",
"CD4 TCM" = "dashed",
"CD4 TEM" = "longdash",
"CD4 Temra/CTL" = "solid"
)
cat("=== REF_MEDIANS_EFFECTOR (Treg excluded) ===\n")
=== REF_MEDIANS_EFFECTOR (Treg excluded) ===
print(REF_MEDIANS_EFFECTOR)
5. Assign Pseudotime
Bins
assign_bin <- function(pt, lbl) {
if (isTRUE(grepl("Treg", lbl, ignore.case = TRUE))) return("Treg-like")
if (is.na(pt)) return(NA_character_)
if (pt <= BIN_NAIVE_TCM) return("Naive-like")
if (pt <= BIN_TCM_TEM) return("TCM-like")
if (pt <= BIN_TEM_TEMRA) return("TEM-like")
return("Temra-like")
}
mapped_MalignantCD4T$pseudotime_bin <- factor(
mapply(assign_bin,
pt = mapped_MalignantCD4T$predicted.pseudotime,
lbl = mapped_MalignantCD4T$predicted.predicted.celltype.l2),
levels = BIN_ORDER
)
cat("Pseudotime bin distribution (all cells):\n")
Pseudotime bin distribution (all cells):
print(table(mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
Naive-like TCM-like TEM-like Temra-like Treg-like
2287 13298 23485 1546 79
cat("\nPseudotime bin per cell line:\n")
Pseudotime bin per cell line:
print(table(mapped_MalignantCD4T$cell_line,
mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
Naive-like TCM-like TEM-like Temra-like Treg-like
L1 1407 1104 2630 682 2
L2 833 1593 3324 185 0
L3 4 2357 3613 447 7
L4 37 1384 4569 11 5
L5 3 3419 2414 178 8
L6 0 1815 3244 43 46
L7 3 1626 3691 0 11
6. Build Shared Data
Frames
query_df <- data.frame(
Embeddings(mapped_MalignantCD4T, "ref.umap"),
pseudotime = mapped_MalignantCD4T$predicted.pseudotime,
cell_line = factor(mapped_MalignantCD4T$cell_line, levels = LINE_ORDER),
label = mapped_MalignantCD4T$predicted.predicted.celltype.l2,
pseudotime_bin = mapped_MalignantCD4T$pseudotime_bin,
stringsAsFactors = FALSE
)
colnames(query_df)[1:2] <- c("UMAP_1", "UMAP_2")
PT_LIMITS <- range(query_df$pseudotime, na.rm = TRUE)
cat(sprintf("Pseudotime colour limits: %.3f to %.3f\n", PT_LIMITS[1], PT_LIMITS[2]))
Pseudotime colour limits: 1.156 to 25.553
label_df <- query_df %>%
filter(!is.na(label), !is.na(cell_line)) %>%
count(cell_line, label) %>%
group_by(cell_line) %>%
mutate(pct = 100 * n / sum(n)) %>%
ungroup() %>%
mutate(label = factor(label, levels = STATE_ORDER))
bin_df <- query_df %>%
filter(!is.na(pseudotime_bin), !is.na(cell_line)) %>%
count(cell_line, pseudotime_bin) %>%
group_by(cell_line) %>%
mutate(pct = 100 * n / sum(n)) %>%
ungroup()
cat("Data frames ready.\n")
Data frames ready.
7. Figure 7 —
Four-Panel Summary
bg_layer <- geom_point(
data = ref_bg_plot, aes(x = UMAP_1, y = UMAP_2),
colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE
)
p7a <- ggplot(query_df[sample(nrow(query_df)), ],
aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
bg_layer +
geom_point(size = 0.35, alpha = 0.7) +
scale_colour_manual(values = STATE_COLORS, name = "Transferred\nLabel",
na.value = "grey80") +
guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
theme_classic() +
labs(x = "UMAP-1", y = "UMAP-2",
title = "A. Sézary Cells — Transferred CD4 T Cell Labels") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p7b <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
aes(UMAP_1, UMAP_2, colour = pseudotime)) +
bg_layer +
geom_point(size = 0.35, alpha = 0.8) +
scale_colour_gradientn(
colours = c("lightblue", "yellow", "red"),
name = "Pseudotime",
limits = PT_LIMITS,
na.value = "grey85"
) +
theme_classic() +
labs(x = "UMAP-1", y = "UMAP-2",
title = "B. Sézary Cells — Transferred Pseudotime [FIXED]") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p7c <- ggplot(query_df[sample(nrow(query_df)), ],
aes(UMAP_1, UMAP_2, colour = pseudotime_bin)) +
bg_layer +
geom_point(size = 0.35, alpha = 0.7) +
scale_colour_manual(values = BIN_COLORS, name = "Pseudotime\nBin",
na.value = "grey80") +
guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
theme_classic() +
labs(x = "UMAP-1", y = "UMAP-2",
title = "C. Sézary Cells — Pseudotime Bin") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
p7d <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
geom_col(width = 0.75) +
geom_text(data = label_df %>% filter(pct > 3),
aes(label = sprintf("%.1f%%", pct)),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
scale_fill_manual(values = STATE_COLORS, name = "Label") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
theme_classic() +
labs(x = "Cell Line", y = "% Cells",
title = "D. Transferred Label per Cell Line") +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
axis.text.x = element_text(size = 10, face = "bold"))
fig7 <- (p7a | p7b) / (p7c | p7d)
print(fig7)

save_fig(fig7, "Fig7_FourPanel_Summary_FIXED", w = 18, h = 14)
✓ Fig7_FourPanel_Summary_FIXED [PNG + PDF]
save_fig(p7a, "Fig7A_Transferred_Labels", w = 9, h = 7)
✓ Fig7A_Transferred_Labels [PNG + PDF]
save_fig(p7b, "Fig7B_Transferred_Pseudotime_FIXED", w = 9, h = 7)
✓ Fig7B_Transferred_Pseudotime_FIXED [PNG + PDF]
save_fig(p7c, "Fig7C_Pseudotime_Bins_UMAP", w = 9, h = 7)
✓ Fig7C_Pseudotime_Bins_UMAP [PNG + PDF]
save_fig(p7d, "Fig7D_Label_BarChart_per_Line", w = 9, h = 7)
✓ Fig7D_Label_BarChart_per_Line [PNG + PDF]
8. Figure 1 —
Pseudotime Violin per Cell Line
fig1 <- ggplot(
query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
aes(cell_line, pseudotime, fill = cell_line)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
outlier.size = 0.3, outlier.alpha = 0.3) +
geom_hline(data = REF_MEDIANS,
aes(yintercept = med_pt, colour = state, linetype = state),
linewidth = 0.7, alpha = 0.9) +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_colour_manual(values = STATE_COLORS, name = "Reference\nmedian") +
scale_linetype_manual(
values = c("CD4 Naive" = "dotted", "CD4 TCM" = "dashed",
"CD4 TEM" = "longdash", "CD4 Temra/CTL" = "solid",
"Treg" = "twodash"),
name = "Reference\nmedian"
) +
scale_y_continuous(breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Cell Line",
y = "Transferred Pseudotime",
title = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
subtitle = sprintf(
"Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
naive_med, tcm_med, treg_med, tem_med, temra_med)
) +
theme(plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 11, face = "bold"))
print(fig1)

save_fig(fig1, "Fig1_Pseudotime_Violin_per_Line", w = 12, h = 6)
✓ Fig1_Pseudotime_Violin_per_Line [PNG + PDF]
fig1v2 <- ggplot(
query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
aes(cell_line, pseudotime, fill = cell_line)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
outlier.size = 0.3, outlier.alpha = 0.3) +
geom_hline(
data = REF_MEDIANS_EFFECTOR,
aes(yintercept = med_pt, colour = state, linetype = state),
linewidth = 0.7, alpha = 0.9
) +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_colour_manual(
values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
name = "Reference\nmedian"
) +
scale_linetype_manual(
values = LINETYPE_EFFECTOR,
name = "Reference\nmedian"
) +
scale_y_continuous(breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Cell Line",
y = "Transferred Pseudotime",
title = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
subtitle = sprintf(
"Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM (PT=%.2f); Treg-like assigned by label",
naive_med, tcm_med, tem_med, temra_med, treg_med, tcm_med)
) +
theme(
plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 11, face = "bold")
)
print(fig1v2)

save_fig(fig1v2, "Fig1v2_Pseudotime_Violin_NoTreg", w = 12, h = 6)
✓ Fig1v2_Pseudotime_Violin_NoTreg [PNG + PDF]
9. Figure 2 — UMAP:
Pseudotime + Label Side by Side
p2a <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
aes(UMAP_1, UMAP_2, colour = pseudotime)) +
geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
geom_point(size = 0.25, alpha = 0.8) +
scale_colour_gradientn(
colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
) +
theme_classic() +
labs(x = "UMAP-1", y = "UMAP-2",
title = "A. Transferred pseudotime gradient",
subtitle = "Grey = healthy reference | Coloured = Sézary cells") +
theme(plot.title = element_text(size = 12, face = "bold"))
p2b <- ggplot(query_df[sample(nrow(query_df)), ],
aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
colour = "grey88", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
geom_point(size = 0.25, alpha = 0.7) +
scale_colour_manual(values = STATE_COLORS, name = "Transferred\nlabel",
na.value = "grey80") +
guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
theme_classic() +
labs(x = "UMAP-1", y = "UMAP-2",
title = "B. Transferred label identity",
subtitle = "96-99.9% CD4 TCM across all seven lines") +
theme(plot.title = element_text(size = 12, face = "bold"))
fig2 <- p2a | p2b
print(fig2)

save_fig(fig2, "Fig2_UMAP_Pseudotime_and_Label", w = 16, h = 7)
✓ Fig2_UMAP_Pseudotime_and_Label [PNG + PDF]
10. Figure 3 — Label
Bar Chart + Per Cell Line UMAP Facets
p3a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
geom_col(width = 0.75) +
geom_text(data = label_df %>% filter(pct > 5),
aes(label = sprintf("%.0f%%", pct)),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
scale_fill_manual(values = STATE_COLORS, name = "Label") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
theme_classic() +
labs(x = "Cell Line", y = "% Cells", title = "A. Transferred Azimuth l2 Labels") +
theme(plot.title = element_text(size = 12, face = "bold"),
axis.text.x = element_text(size = 10, face = "bold"))
p3b <- ggplot(
query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
aes(UMAP_1, UMAP_2, colour = pseudotime)
) +
geom_point(
data = ref_bg_plot %>%
slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
inherit.aes = FALSE
) +
geom_point(size = 0.2, alpha = 0.8) +
scale_colour_gradientn(
colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
) +
facet_wrap(~ cell_line, nrow = 2) +
theme_classic() +
theme(strip.text = element_text(face = "bold", size = 10),
plot.title = element_text(size = 12, face = "bold"),
axis.text = element_blank(), axis.ticks = element_blank()) +
labs(x = NULL, y = NULL,
title = "B. Per Cell Line UMAP — Projected Pseudotime (Grey = healthy reference)")
fig3 <- p3a + p3b + plot_layout(widths = c(1, 2))
print(fig3)

save_fig(fig3, "Fig3_Labels_PerLine_UMAP", w = 18, h = 6)
✓ Fig3_Labels_PerLine_UMAP [PNG + PDF]
11. Figure 4 — Per
Cell Line Facet UMAP (Key Heterogeneity Figure)
fig4 <- ggplot(
query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
aes(UMAP_1, UMAP_2, colour = pseudotime)
) +
geom_point(
data = ref_bg_plot %>%
slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
inherit.aes = FALSE
) +
geom_point(size = 0.25, alpha = 0.8) +
scale_colour_gradientn(
colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
name = "Pseudotime\n(Naive->Temra)", limits = PT_LIMITS, na.value = "grey85"
) +
facet_wrap(~ cell_line, nrow = 2) +
theme_classic() +
theme(
strip.text = element_text(face = "bold", size = 12),
strip.background = element_rect(fill = "grey95", colour = "grey60"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey40"),
axis.text = element_blank(), axis.ticks = element_blank(),
legend.position = "right"
) +
labs(x = NULL, y = NULL,
title = "Figure 4 — Per Cell Line Facet UMAP: Differentiation Position",
subtitle = "L1 most heterogeneous (spans full trajectory) | L6 most arrested (tight TCM cluster)")
print(fig4)

save_fig(fig4, "Fig4_PerLine_Facet_UMAP", w = 18, h = 10)
✓ Fig4_PerLine_Facet_UMAP [PNG + PDF]
12. Figure 5 — Label
vs Pseudotime Bins Bar Charts
p5a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
geom_col(width = 0.7) +
geom_text(data = label_df %>% filter(pct > 3),
aes(label = sprintf("%.1f%%", pct)),
position = position_stack(vjust = 0.5),
size = 3.2, colour = "white", fontface = "bold") +
scale_fill_manual(values = STATE_COLORS, name = "Label") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
theme_classic() +
labs(x = NULL, y = "% Cells",
title = "A. Transferred Azimuth l2 Label (Cell of Origin)",
subtitle = "96-99.9% CD4 TCM across all lines — TCM origin confirmed") +
theme(plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey40"),
axis.text.x = element_blank(), axis.ticks.x = element_blank())
p5b <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
geom_col(width = 0.7) +
geom_text(data = bin_df %>% filter(pct > 3),
aes(label = sprintf("%.1f%%", pct)),
position = position_stack(vjust = 0.5),
size = 3.2, colour = "white", fontface = "bold") +
scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
theme_classic() +
labs(x = "Cell Line", y = "% Cells",
title = "B. Pseudotime Bin Assignment (Differentiation Position)",
subtitle = sprintf(
"Bin boundaries — Naive|TCM: %.2f | TCM|TEM: %.2f | TEM|Temra: %.2f | Treg: label-based",
BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)) +
theme(plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 11, face = "bold"))
fig5 <- p5a / p5b
print(fig5)

save_fig(fig5, "Fig5_Label_vs_Pseudotime_Bins", w = 16, h = 12)
✓ Fig5_Label_vs_Pseudotime_Bins [PNG + PDF]
save_fig(p5a, "Fig5A_Label_Transfer_BarChart", w = 10, h = 5)
✓ Fig5A_Label_Transfer_BarChart [PNG + PDF]
save_fig(p5b, "Fig5B_Pseudotime_Bins_BarChart", w = 10, h = 5)
✓ Fig5B_Pseudotime_Bins_BarChart [PNG + PDF]
13. Figure 6 —
Pseudotime Density + Ridgeplot
p6a <- ggplot(query_df %>% filter(!is.na(pseudotime)), aes(x = pseudotime)) +
geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
geom_vline(data = REF_MEDIANS,
aes(xintercept = med_pt, colour = state),
linetype = "dashed", linewidth = 0.7) +
geom_text(data = REF_MEDIANS,
aes(x = med_pt, y = Inf,
label = gsub("CD4 ", "", as.character(state))),
inherit.aes = FALSE,
angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30") +
scale_colour_manual(values = STATE_COLORS, name = "Reference\nstate median") +
scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Transferred Pseudotime",
y = "Density",
title = "A. Sézary Pseudotime Distribution — All Lines Combined",
subtitle = sprintf(
"Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
naive_med, tcm_med, treg_med, tem_med, temra_med)
) +
theme(plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
legend.position = "right")
p6b <- ggplot(
query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
aes(x = pseudotime, y = fct_rev(cell_line), fill = cell_line)
) +
geom_density_ridges(scale = 0.9, rel_min_height = 0.01,
alpha = 0.85, colour = "white") +
geom_vline(xintercept = c(BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA),
linetype = "dashed", colour = "grey50", linewidth = 0.5) +
annotate("text", x = BIN_NAIVE_TCM, y = 0.5, label = "Naive|TCM",
size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
annotate("text", x = BIN_TCM_TEM, y = 0.5, label = "TCM|TEM",
size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
annotate("text", x = BIN_TEM_TEMRA, y = 0.5, label = "TEM|Temra",
size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
theme_ridges(grid = FALSE) +
labs(x = "Transferred Pseudotime", y = "Cell Line",
title = "B. Per Cell Line Pseudotime Ridgeplot",
subtitle = "L1 multimodal | L3-L7 narrow peaks at TCM-TEM boundary") +
theme(plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey40"))
fig6 <- p6a | p6b
print(fig6)

save_fig(fig6, "Fig6_Density_and_Ridgeplot", w = 16, h = 7)
✓ Fig6_Density_and_Ridgeplot [PNG + PDF]
save_fig(p6a, "Fig6A_Density_Overall", w = 8, h = 6)
✓ Fig6A_Density_Overall [PNG + PDF]
save_fig(p6b, "Fig6B_Ridgeplot_PerLine", w = 8, h = 6)
✓ Fig6B_Ridgeplot_PerLine [PNG + PDF]
p6a_v2 <- ggplot(
query_df %>% filter(!is.na(pseudotime)),
aes(x = pseudotime)
) +
geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
geom_vline(
data = REF_MEDIANS_EFFECTOR,
aes(xintercept = med_pt, colour = state),
linetype = "dashed", linewidth = 0.7
) +
geom_text(
data = REF_MEDIANS_EFFECTOR,
aes(x = med_pt, y = Inf,
label = gsub("CD4 ", "", as.character(state))),
inherit.aes = FALSE,
angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30"
) +
scale_colour_manual(
values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
name = "Reference\nstate median"
) +
scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Transferred Pseudotime",
y = "Density",
title = "A. Sézary Pseudotime Distribution — All Lines Combined",
subtitle = sprintf(
"Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — overlaps TCM; Treg-like assigned by label not pseudotime",
naive_med, tcm_med, tem_med, temra_med)
) +
theme(
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
legend.position = "right"
)
print(p6a_v2)

save_fig(p6a_v2, "Fig6Av2_Density_NoTreg", w = 8, h = 6)
✓ Fig6Av2_Density_NoTreg [PNG + PDF]
14. Figure 8 — Violin:
Transferred Pseudotime per Label
fig8 <- ggplot(
query_df %>%
filter(!is.na(pseudotime), !is.na(label)) %>%
mutate(label = factor(label, levels = STATE_ORDER)),
aes(label, pseudotime, fill = label)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
outlier.size = 0.4, outlier.alpha = 0.3) +
geom_hline(data = REF_MEDIANS, aes(yintercept = med_pt),
linetype = "dotted", colour = "grey55", linewidth = 0.6) +
# Annotations driven entirely from REF_MEDIANS — no hard-coded y values
geom_text(
data = REF_MEDIANS,
aes(x = 0.55,
y = med_pt,
label = sprintf("Ref: %s (%.2f)",
gsub("CD4 ", "", as.character(state)), med_pt)),
inherit.aes = FALSE,
hjust = 0, size = 2.8, colour = "grey40"
) +
scale_fill_manual(values = STATE_COLORS, guide = "none") +
scale_y_continuous(breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Transferred Azimuth l2 Label",
y = "Transferred Pseudotime",
title = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
subtitle = "CD4 Naive internal control at PT~4 | TCM broad distribution | TEM at terminal PT~25"
) +
theme(plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey40"),
axis.text.x = element_text(size = 10, face = "bold"))
print(fig8)

save_fig(fig8, "Fig8_Violin_Pseudotime_per_Label", w = 12, h = 6)
✓ Fig8_Violin_Pseudotime_per_Label [PNG + PDF]
# For TEM and Temra: median=24.84 for both (Temra n=10, all at same PT).
# Use actual reference max values to separate the lines visually:
# TEM max = 25.00 (from diagnostic)
# Temra max = 24.84 (all 10 cells identical)
# Better approach: show TEM median and Temra median separately but
# acknowledge in subtitle they are nearly identical.
# Override: use mean instead of median for TEM/Temra to separate lines
REF_MEDIANS_EFFECTOR_PLOT <- REF_MEDIANS_EFFECTOR %>%
mutate(med_pt = case_when(
state == "CD4 TEM" ~ 23.962, # mean from diagnostic (median=24.84 same as Temra)
state == "CD4 Temra/CTL" ~ 24.840, # all 10 cells identical
TRUE ~ med_pt
),
label_text = case_when(
state == "CD4 TEM" ~ sprintf("Ref: TEM (mean=23.96)"),
state == "CD4 Temra/CTL" ~ sprintf("Ref: Temra (24.84, n=10)"),
state == "CD4 Naive" ~ sprintf("Ref: Naive (%.2f)", med_pt),
state == "CD4 TCM" ~ sprintf("Ref: TCM (%.2f)", med_pt),
TRUE ~ as.character(state)
))
fig8v2 <- ggplot(
query_df %>%
filter(!is.na(pseudotime), !is.na(label)) %>%
mutate(label = factor(label, levels = STATE_ORDER)), # drop=FALSE keeps Temra axis
aes(label, pseudotime, fill = label)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
outlier.size = 0.4, outlier.alpha = 0.3) +
geom_hline(
data = REF_MEDIANS_EFFECTOR_PLOT,
aes(yintercept = med_pt),
linetype = "dotted", colour = "grey55", linewidth = 0.6
) +
geom_text(
data = REF_MEDIANS_EFFECTOR_PLOT,
aes(x = 0.55,
y = med_pt,
label = label_text),
inherit.aes = FALSE,
hjust = 0, size = 2.8, colour = "grey40"
) +
scale_fill_manual(values = STATE_COLORS, na.value = "grey80", guide = "none",
drop = FALSE) +
scale_x_discrete(drop = FALSE) + # ← forces CD4 Temra/CTL to appear
scale_y_continuous(breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Transferred Azimuth l2 Label",
y = "Transferred Pseudotime",
title = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
subtitle = sprintf(
"Effector axis: Naive=%.2f | TCM=%.2f | TEM~24.84 | Temra~24.84 (n=10, identical)\nCD4 Temra/CTL: no Sézary cells received this label — consistent with TCM arrest phenotype\nTreg line omitted — overlaps TCM; Treg-like assigned by label",
naive_med, tcm_med)
) +
theme(
plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 10, face = "bold")
)
print(fig8v2)

save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)
✓ Fig8v2_Violin_NoTreg [PNG + PDF]
fig8v2 <- ggplot(
query_df %>%
filter(!is.na(pseudotime), !is.na(label)) %>%
mutate(label = factor(label, levels = STATE_ORDER)),
aes(label, pseudotime, fill = label)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
outlier.size = 0.4, outlier.alpha = 0.3) +
geom_hline(
data = REF_MEDIANS_EFFECTOR,
aes(yintercept = med_pt),
linetype = "dotted", colour = "grey55", linewidth = 0.6
) +
geom_text(
data = REF_MEDIANS_EFFECTOR,
aes(x = 0.55,
y = med_pt,
label = sprintf("Ref: %s (%.2f)",
gsub("CD4 ", "", as.character(state)), med_pt)),
inherit.aes = FALSE,
hjust = 0, size = 2.8, colour = "grey40"
) +
scale_fill_manual(values = STATE_COLORS, guide = "none") +
scale_y_continuous(breaks = seq(0, 25, 5)) +
theme_classic() +
labs(
x = "Transferred Azimuth l2 Label",
y = "Transferred Pseudotime",
title = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
subtitle = sprintf(
"Effector axis reference lines: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM; Treg-like violin shown as biological confirmation",
naive_med, tcm_med, tem_med, temra_med, treg_med)
) +
theme(
plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 10, face = "bold")
)
print(fig8v2)

save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)
✓ Fig8v2_Violin_NoTreg [PNG + PDF]
15. Figure 9 —
Cross-Table: Label vs Pseudotime Bin (PRIMARY RESULT)
cross_df <- query_df %>%
filter(!is.na(label), !is.na(pseudotime_bin)) %>%
mutate(label = factor(label, levels = STATE_ORDER)) %>%
count(label, pseudotime_bin) %>%
group_by(label) %>%
mutate(pct = 100 * n / sum(n)) %>%
ungroup()
tcm_discordant <- cross_df %>%
filter(label == "CD4 TCM",
pseudotime_bin %in% c("TEM-like", "Temra-like")) %>%
summarise(pct = sum(pct)) %>%
pull(pct)
fig9 <- ggplot(cross_df, aes(pseudotime_bin, pct, fill = pseudotime_bin)) +
geom_col(width = 0.7) +
geom_text(aes(label = sprintf("%.1f%%", pct)),
vjust = -0.3, size = 3.5, fontface = "bold") +
scale_fill_manual(values = BIN_COLORS, guide = "none") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 115)) +
facet_wrap(~ label, nrow = 1, scales = "free_x") +
theme_classic() +
theme(
strip.text = element_text(face = "bold", size = 11),
strip.background = element_rect(fill = "grey95", colour = "grey60"),
axis.text.x = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10, colour = "grey40")
) +
labs(
x = "Pseudotime Bin",
y = "% of Cells with This Label",
title = "Figure 9 — Cross-Table: Transferred Label vs Pseudotime Bin [PRIMARY RESULT]",
subtitle = sprintf(
"TCM-labelled cells: %.1f%% discordant (TEM-like or Temra-like)\nTreg panel: 100%% Treg-like by design",
tcm_discordant)
)
print(fig9)

save_fig(fig9, "Fig9_CrossTable_Label_vs_Bin_PRIMARY", w = 18, h = 7)
✓ Fig9_CrossTable_Label_vs_Bin_PRIMARY [PNG + PDF]
16. Supplementary
Figures
Supp S1 — Per Cell
Line Pseudotime Density (Faceted)
suppS1 <- ggplot(
query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
aes(x = pseudotime, fill = cell_line, colour = cell_line)
) +
geom_density(alpha = 0.35, linewidth = 0.8) +
geom_vline(data = REF_MEDIANS, aes(xintercept = med_pt),
colour = "grey45", linetype = "dashed", linewidth = 0.5) +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_colour_brewer(palette = "Set2", guide = "none") +
scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
facet_wrap(~ cell_line, nrow = 2) +
theme_classic() +
theme(strip.text = element_text(face = "bold"),
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40")) +
labs(x = "Transferred Pseudotime", y = "Density",
title = "Supp S1 — Per Cell Line Pseudotime Density",
subtitle = "Grey dashed lines = data-derived reference state medians")
print(suppS1)

save_fig(suppS1, "SuppS1_Density_PerLine_Facet", w = 12, h = 7)
✓ SuppS1_Density_PerLine_Facet [PNG + PDF]
Supp S2 — Ridgeplot
Standalone
suppS2 <- p6b +
ggtitle(
"Supp S2 — Per Cell Line Pseudotime Ridgeplot",
subtitle = sprintf(
"Bin boundaries: Naive|TCM=%.2f | TCM|TEM=%.2f | TEM|Temra=%.2f",
BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
)
print(suppS2)

save_fig(suppS2, "SuppS2_Ridgeplot_PerLine", w = 10, h = 7)
✓ SuppS2_Ridgeplot_PerLine [PNG + PDF]
Supp S3 — Violin per
Label x Cell Line (Faceted)
suppS3 <- ggplot(
query_df %>%
filter(!is.na(label), !is.na(cell_line), !is.na(pseudotime)) %>%
mutate(label = factor(label, levels = STATE_ORDER)),
aes(cell_line, pseudotime, fill = cell_line)
) +
geom_violin(scale = "width", trim = FALSE, alpha = 0.8, colour = "white") +
geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
outlier.size = 0.2, outlier.alpha = 0.3) +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_y_continuous(breaks = seq(0, 25, 5)) +
facet_wrap(~ label, nrow = 1) +
theme_classic() +
theme(
strip.text = element_text(face = "bold", size = 10),
strip.background = element_rect(fill = "grey95", colour = "grey60"),
axis.text.x = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40")
) +
labs(x = "Cell Line", y = "Transferred Pseudotime",
title = "Supp S3 — Violin: Pseudotime per Label x Cell Line",
subtitle = "Complements Fig 9 — shows within-state per-line heterogeneity")
print(suppS3)

save_fig(suppS3, "SuppS3_Violin_Label_x_Line", w = 16, h = 10)
✓ SuppS3_Violin_Label_x_Line [PNG + PDF]
Supp S4 — Pseudotime
Bin Bar Chart with Full Percentages
suppS4 <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
geom_col(width = 0.7) +
geom_text(data = bin_df %>% filter(pct > 2),
aes(label = sprintf("%.1f%%", pct)),
position = position_stack(vjust = 0.5),
size = 3.5, colour = "white", fontface = "bold") +
scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
theme_classic() +
labs(
x = "Cell Line",
y = "% Cells",
title = "Supp S4 — Pseudotime Bin Composition per Cell Line",
subtitle = sprintf(
"Bin boundaries: Naive|TCM=%.2f | TCM|TEM=%.2f | TEM|Temra=%.2f | Treg=label-based",
BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
) +
theme(plot.title = element_text(size = 12, face = "bold"),
plot.subtitle = element_text(size = 9, colour = "grey40"),
axis.text.x = element_text(size = 11, face = "bold"))
print(suppS4)

save_fig(suppS4, "SuppS4_Bins_BarChart_Detailed", w = 14, h = 6)
✓ SuppS4_Bins_BarChart_Detailed [PNG + PDF]
18. Session Info
sessionInfo()
R version 4.5.2 (2025-10-31)
Platform: x86_64-redhat-linux-gnu
Running under: Rocky Linux 9.7 (Blue Onyx)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS-OPENMP; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Paris
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] kableExtra_1.4.0 knitr_1.51 ggridges_0.5.7 RColorBrewer_1.1-3 scales_1.4.0 patchwork_1.3.2 forcats_1.0.1
[8] tidyr_1.3.2 dplyr_1.2.0 ggplot2_4.0.2 Seurat_5.4.0 SeuratObject_5.3.0 sp_2.2-1
loaded via a namespace (and not attached):
[1] rstudioapi_0.18.0 jsonlite_2.0.0 magrittr_2.0.4 spatstat.utils_3.2-1 farver_2.1.2 rmarkdown_2.30
[7] ragg_1.5.0 vctrs_0.7.1 ROCR_1.0-12 spatstat.explore_3.7-0 htmltools_0.5.9 sass_0.4.10
[13] sctransform_0.4.3 parallelly_1.46.1 KernSmooth_2.23-26 bslib_0.10.0 htmlwidgets_1.6.4 ica_1.0-3
[19] plyr_1.8.9 plotly_4.12.0 zoo_1.8-15 cachem_1.1.0 igraph_2.2.2 mime_0.13
[25] lifecycle_1.0.5 pkgconfig_2.0.3 Matrix_1.7-4 R6_2.6.1 fastmap_1.2.0 fitdistrplus_1.2-6
[31] future_1.69.0 shiny_1.12.1 digest_0.6.39 S4Vectors_0.48.0 tensor_1.5.1 RSpectra_0.16-2
[37] irlba_2.3.7 textshaping_1.0.4 labeling_0.4.3 progressr_0.18.0 spatstat.sparse_3.1-0 httr_1.4.8
[43] polyclip_1.10-7 abind_1.4-8 compiler_4.5.2 withr_3.0.2 S7_0.2.1 fastDummies_1.7.5
[49] MASS_7.3-65 tools_4.5.2 lmtest_0.9-40 otel_0.2.0 httpuv_1.6.16 future.apply_1.20.1
[55] goftest_1.2-3 glue_1.8.0 nlme_3.1-168 promises_1.5.0 grid_4.5.2 Rtsne_0.17
[61] cluster_2.1.8.2 reshape2_1.4.5 generics_0.1.4 gtable_0.3.6 spatstat.data_3.1-9 data.table_1.18.2.1
[67] xml2_1.5.2 XVector_0.50.0 BiocGenerics_0.56.0 spatstat.geom_3.7-0 RcppAnnoy_0.0.23 ggrepel_0.9.6
[73] RANN_2.6.2 pillar_1.11.1 stringr_1.6.0 spam_2.11-3 RcppHNSW_0.6.0 later_1.4.6
[79] splines_4.5.2 lattice_0.22-9 survival_3.8-6 deldir_2.0-4 tidyselect_1.2.1 miniUI_0.1.2
[85] pbapply_1.7-4 gridExtra_2.3 IRanges_2.44.0 svglite_2.2.2 scattermore_1.2 stats4_4.5.2
[91] xfun_0.56 Biobase_2.70.0 matrixStats_1.5.0 stringi_1.8.7 lazyeval_0.2.2 yaml_2.3.12
[97] evaluate_1.0.5 codetools_0.2-20 tibble_3.3.1 cli_3.6.5 uwot_0.2.4 xtable_1.8-4
[103] reticulate_1.45.0 systemfonts_1.3.1 jquerylib_0.1.4 dichromat_2.0-0.1 Rcpp_1.1.1 globals_0.19.0
[109] spatstat.random_3.4-4 png_0.1-8 spatstat.univar_3.1-6 parallel_4.5.2 dotCall64_1.2 listenv_0.10.0
[115] viridisLite_0.4.3 purrr_1.2.1 rlang_1.1.7 cowplot_1.2.0
---
title: "Sézary Syndrome — Pseudotime Trajectory Figures"
subtitle: "CD4+ T Cell Reference Projection | State-Anchored Pseudotime Bins | PNG + PDF output"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: false
    theme: journal
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo      = TRUE,
  message   = FALSE,
  warning   = FALSE,
  fig.align = "center",
  dpi       = 150
)
```

---

# Overview

This script regenerates **all manuscript figures** from the saved projection object.
The reference object is loaded **solely** to extract UMAP coordinates for the grey background.

## Objects required

| Object | Path | Role |
|--------|------|------|
| `reference_integrated` | `../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds` | **UMAP background only** — grey reference dots |
| `mapped_MalignantCD4T` | `results/MalignantCD4T_Monocle3_projection/MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds` | **All biology** — pseudotime, labels, bins, colour scales |

## Key design decisions

| Item | Source |
|------|--------|
| Pseudotime colour-scale limits (`PT_LIMITS`) | `mapped_MalignantCD4T$predicted.pseudotime` |
| Bin boundaries | Midpoints between **actual** reference state medians — computed at runtime from `ref_bg` |
| State medians (`REF_MEDIANS`) | Computed at runtime from `ref_bg` — **never hard-coded** |
| All bar charts, violins, density plots | `mapped_MalignantCD4T` only |

## Panel B fix

Previously called `FeaturePlot(reference_integrated, ...)` without `limits`,
producing a spurious 1.0–2.0 colour scale. Fixed: Panel B plots Sézary cells
coloured by `predicted.pseudotime` with `limits = PT_LIMITS` (correct 0–25 scale).

## Bin design

Bins are anchored to **actual reference state medians** computed from `ref_bg`.
Boundaries = midpoint between adjacent state medians (biology-principled, not arbitrary).
Treg-like bin is assigned by Azimuth label (Treg is a branch, not a linear position).

---

# 1. Setup & Libraries

```{r libraries}
suppressPackageStartupMessages({
  library(Seurat)
  library(ggplot2)
  library(dplyr)
  library(tidyr)
  library(forcats)
  library(patchwork)
  library(scales)
  library(RColorBrewer)
  library(ggridges)
  library(knitr)
  library(kableExtra)
})

dir.create("Figures/PNG", recursive = TRUE, showWarnings = FALSE)
dir.create("Figures/PDF", recursive = TRUE, showWarnings = FALSE)

save_fig <- function(p, name, w = 10, h = 8) {
  ggsave(file.path("Figures/PNG", paste0(name, ".png")),
         p, width = w, height = h, dpi = 300, bg = "white")
  ggsave(file.path("Figures/PDF", paste0(name, ".pdf")),
         p, width = w, height = h, useDingbats = FALSE)
  invisible(cat(sprintf("  \u2713  %s  [PNG + PDF]\n", name)))
}

STATE_COLORS <- c(
  "CD4 Naive"     = "#4472C4",
  "CD4 TCM"       = "#70AD47",
  "CD4 TEM"       = "#ED7D31",
  "CD4 Temra/CTL" = "#C00000",
  "Treg"          = "#7030A0"
)

BIN_COLORS <- c(
  "Naive-like"  = "#4472C4",
  "TCM-like"    = "#70AD47",
  "TEM-like"    = "#ED7D31",
  "Temra-like"  = "#C00000",
  "Treg-like"   = "#7030A0"
)

STATE_ORDER <- c("CD4 Naive", "CD4 TCM", "CD4 TEM", "CD4 Temra/CTL", "Treg")
BIN_ORDER   <- c("Naive-like", "TCM-like", "TEM-like", "Temra-like", "Treg-like")
LINE_ORDER  <- paste0("L", 1:7)

cat("Setup complete.\n")
```

---

# 2. Load & Validate Objects

```{r load-objects}
ref_path <- paste0(
  "../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/",
  "1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds"
)
reference_integrated <- readRDS(ref_path)
cat("Reference loaded:", ncol(reference_integrated), "cells\n")

stopifnot(
  "umap reduction missing from reference" =
    "umap" %in% names(reference_integrated@reductions)
)

obj_path <- paste0(
  "../results/MalignantCD4T_Monocle3_projection/",
  "MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds"
)
mapped_MalignantCD4T <- readRDS(obj_path)
cat("Query loaded:    ", ncol(mapped_MalignantCD4T), "cells\n")

stopifnot(
  "ref.umap reduction missing — re-run MapQuery" =
    "ref.umap" %in% names(mapped_MalignantCD4T@reductions),
  "predicted.pseudotime column missing" =
    "predicted.pseudotime" %in% colnames(mapped_MalignantCD4T@meta.data),
  "predicted.predicted.celltype.l2 column missing" =
    "predicted.predicted.celltype.l2" %in% colnames(mapped_MalignantCD4T@meta.data)
)

if (!"cell_line" %in% colnames(mapped_MalignantCD4T@meta.data)) {
  if ("orig.ident" %in% colnames(mapped_MalignantCD4T@meta.data)) {
    mapped_MalignantCD4T$cell_line <- mapped_MalignantCD4T$orig.ident
    cat("NOTE: using orig.ident as cell_line\n")
  } else {
    stop("Cannot find cell_line or orig.ident in metadata")
  }
}

cat("\npredicted.pseudotime summary (expect Min~0, Max~25):\n")
print(summary(mapped_MalignantCD4T$predicted.pseudotime))
cat("\nTransferred label distribution:\n")
print(sort(table(mapped_MalignantCD4T$predicted.predicted.celltype.l2,
                 useNA = "ifany"), decreasing = TRUE))
cat("\nCell line sizes:\n")
print(sort(table(mapped_MalignantCD4T$cell_line, useNA = "ifany")))
```

---

# 3. Build Reference Background (`ref_bg`)

```{r build-ref-bg}
# ref_bg serves two purposes:
#   1. Grey background dots in all projection UMAPs
#   2. Source for computing actual reference state medians (REF_MEDIANS)
ref_bg <- data.frame(
  Embeddings(reference_integrated, "umap"),
  state      = reference_integrated$predicted.celltype.l2,
  pseudotime = reference_integrated$monocle3_pseudotime,
  stringsAsFactors = FALSE
)
colnames(ref_bg)[1:2] <- c("UMAP_1", "UMAP_2")
ref_bg$state <- factor(ref_bg$state, levels = STATE_ORDER)

cat("Reference monocle3_pseudotime (should be 0-25):\n")
print(summary(ref_bg$pseudotime))

ref_bg_plot <- ref_bg[sample(nrow(ref_bg)), ]
cat(sprintf("\nref_bg ready: %d reference cells\n", nrow(ref_bg)))
```

---

# 4. Compute Reference State Medians & Bin Boundaries

```{r compute-medians-bins}
# ══════════════════════════════════════════════════════════════════════════
# REF_MEDIANS computed at runtime from actual ref_bg — NOT hard-coded.
#
# Diagnostic confirmed these actual values:
#   CD4 Naive     median =  3.293
#   CD4 TCM       median =  9.377  (was hard-coded as 13.36 — WRONG)
#   CD4 TEM       median = 24.840
#   CD4 Temra/CTL median = 24.840  (only 10 cells)
#   Treg          median = 11.150  (was hard-coded as 18.75 — above Treg max of 17.40, WRONG)
#
# Bin boundaries derived as midpoints between adjacent state medians.
# ══════════════════════════════════════════════════════════════════════════

ref_state_medians <- ref_bg %>%
  filter(!is.na(pseudotime), !is.na(state)) %>%
  group_by(state) %>%
  summarise(
    n      = n(),
    med_pt = round(median(pseudotime, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>%
  arrange(med_pt)

cat("=== Reference state medians (data-derived) ===\n")
print(ref_state_medians)

get_med <- function(s) {
  v <- ref_state_medians$med_pt[ref_state_medians$state == s]
  if (length(v) == 0) stop(paste("State not found in ref_bg:", s))
  v
}

naive_med  <- get_med("CD4 Naive")
tcm_med    <- get_med("CD4 TCM")
tem_med    <- get_med("CD4 TEM")
temra_med  <- get_med("CD4 Temra/CTL")
treg_med   <- get_med("Treg")

BIN_NAIVE_TCM  <- round((naive_med + tcm_med)  / 2, 3)
BIN_TCM_TEM    <- round((tcm_med   + tem_med)   / 2, 3)
BIN_TEM_TEMRA  <- round((tem_med   + temra_med) / 2, 3)

cat("\n=== Bin boundaries (midpoints between adjacent state medians) ===\n")
cat(sprintf("  Naive | TCM   : %.3f\n", BIN_NAIVE_TCM))
cat(sprintf("  TCM   | TEM   : %.3f\n", BIN_TCM_TEM))
cat(sprintf("  TEM   | Temra : %.3f\n", BIN_TEM_TEMRA))
cat(sprintf("  Treg           : label-based (branch logic)\n"))

if (abs(tem_med - temra_med) < 0.5) {
  cat("\nNOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).\n")
  cat("      Temra-like bin will be negligible — biologically expected.\n")
}

REF_MEDIANS <- data.frame(
  state  = factor(STATE_ORDER, levels = STATE_ORDER),
  med_pt = c(naive_med, tcm_med, tem_med, temra_med, treg_med)
)

cat("\n=== REF_MEDIANS (used for reference lines in all figures) ===\n")
print(REF_MEDIANS)
```

```{r}
# ── REF_MEDIANS_EFFECTOR: Treg excluded ───────────────────────────────────
# Treg (median PT=11.15) and TCM (median PT=9.38) overlap entirely in
# pseudotime space — both states share the Naive→TCM graph segment.
# Showing a Treg line at PT=11.15 implies a pseudotime boundary that does
# not exist and misleads interpretation. Treg-like cells are assigned by
# transferred Azimuth label (KNN), not by pseudotime position.
REF_MEDIANS_EFFECTOR <- REF_MEDIANS %>%
  filter(state != "Treg")

LINETYPE_EFFECTOR <- c(
  "CD4 Naive"     = "dotted",
  "CD4 TCM"       = "dashed",
  "CD4 TEM"       = "longdash",
  "CD4 Temra/CTL" = "solid"
)

cat("=== REF_MEDIANS_EFFECTOR (Treg excluded) ===\n")
print(REF_MEDIANS_EFFECTOR)
```


# 5. Assign Pseudotime Bins

```{r assign-bins}
assign_bin <- function(pt, lbl) {
  if (isTRUE(grepl("Treg", lbl, ignore.case = TRUE))) return("Treg-like")
  if (is.na(pt))             return(NA_character_)
  if (pt <= BIN_NAIVE_TCM)   return("Naive-like")
  if (pt <= BIN_TCM_TEM)     return("TCM-like")
  if (pt <= BIN_TEM_TEMRA)   return("TEM-like")
  return("Temra-like")
}

mapped_MalignantCD4T$pseudotime_bin <- factor(
  mapply(assign_bin,
         pt  = mapped_MalignantCD4T$predicted.pseudotime,
         lbl = mapped_MalignantCD4T$predicted.predicted.celltype.l2),
  levels = BIN_ORDER
)

cat("Pseudotime bin distribution (all cells):\n")
print(table(mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
cat("\nPseudotime bin per cell line:\n")
print(table(mapped_MalignantCD4T$cell_line,
            mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
```

---

# 6. Build Shared Data Frames

```{r build-dataframes}
query_df <- data.frame(
  Embeddings(mapped_MalignantCD4T, "ref.umap"),
  pseudotime     = mapped_MalignantCD4T$predicted.pseudotime,
  cell_line      = factor(mapped_MalignantCD4T$cell_line, levels = LINE_ORDER),
  label          = mapped_MalignantCD4T$predicted.predicted.celltype.l2,
  pseudotime_bin = mapped_MalignantCD4T$pseudotime_bin,
  stringsAsFactors = FALSE
)
colnames(query_df)[1:2] <- c("UMAP_1", "UMAP_2")

PT_LIMITS <- range(query_df$pseudotime, na.rm = TRUE)
cat(sprintf("Pseudotime colour limits: %.3f to %.3f\n", PT_LIMITS[1], PT_LIMITS[2]))

label_df <- query_df %>%
  filter(!is.na(label), !is.na(cell_line)) %>%
  count(cell_line, label) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup() %>%
  mutate(label = factor(label, levels = STATE_ORDER))

bin_df <- query_df %>%
  filter(!is.na(pseudotime_bin), !is.na(cell_line)) %>%
  count(cell_line, pseudotime_bin) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

cat("Data frames ready.\n")
```

---

# 7. Figure 7 — Four-Panel Summary

```{r fig7-four-panel, fig.width=18, fig.height=14}

bg_layer <- geom_point(
  data = ref_bg_plot, aes(x = UMAP_1, y = UMAP_2),
  colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE
)

p7a <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nLabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "A. Sézary Cells — Transferred CD4 T Cell Labels") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7b <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.8) +
  scale_colour_gradientn(
    colours  = c("lightblue", "yellow", "red"),
    name     = "Pseudotime",
    limits   = PT_LIMITS,
    na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "B. Sézary Cells — Transferred Pseudotime  [FIXED]") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7c <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime_bin)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = BIN_COLORS, name = "Pseudotime\nBin",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "C. Sézary Cells — Pseudotime Bin") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7d <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title = "D. Transferred Label per Cell Line") +
  theme(plot.title  = element_text(hjust = 0.5, size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

fig7 <- (p7a | p7b) / (p7c | p7d)
print(fig7)
save_fig(fig7, "Fig7_FourPanel_Summary_FIXED",       w = 18, h = 14)
save_fig(p7a,  "Fig7A_Transferred_Labels",           w = 9,  h = 7)
save_fig(p7b,  "Fig7B_Transferred_Pseudotime_FIXED", w = 9,  h = 7)
save_fig(p7c,  "Fig7C_Pseudotime_Bins_UMAP",         w = 9,  h = 7)
save_fig(p7d,  "Fig7D_Label_BarChart_per_Line",      w = 9,  h = 7)
```

---

# 8. Figure 1 — Pseudotime Violin per Cell Line

```{r fig1-violin, fig.width=12, fig.height=6}

fig1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS,
             aes(yintercept = med_pt, colour = state, linetype = state),
             linewidth = 0.7, alpha = 0.9) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nmedian") +
  scale_linetype_manual(
    values = c("CD4 Naive" = "dotted", "CD4 TCM" = "dashed",
               "CD4 TEM" = "longdash", "CD4 Temra/CTL" = "solid",
               "Treg" = "twodash"),
    name = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(fig1)
save_fig(fig1, "Fig1_Pseudotime_Violin_per_Line", w = 12, h = 6)
```

```{r , fig.width=16, fig.height=7}
fig1v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt, colour = state, linetype = state),
    linewidth = 0.7, alpha = 0.9
  ) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nmedian"
  ) +
  scale_linetype_manual(
    values = LINETYPE_EFFECTOR,
    name   = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM (PT=%.2f); Treg-like assigned by label",
      naive_med, tcm_med, tem_med, temra_med, treg_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 11, face = "bold")
  )

print(fig1v2)
save_fig(fig1v2, "Fig1v2_Pseudotime_Violin_NoTreg", w = 12, h = 6)
```


---

# 9. Figure 2 — UMAP: Pseudotime + Label Side by Side

```{r fig2-umap, fig.width=16, fig.height=7}

p2a <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "A. Transferred pseudotime gradient",
       subtitle = "Grey = healthy reference  |  Coloured = Sézary cells") +
  theme(plot.title = element_text(size = 12, face = "bold"))

p2b <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey88", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nlabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "B. Transferred label identity",
       subtitle = "96-99.9% CD4 TCM across all seven lines") +
  theme(plot.title = element_text(size = 12, face = "bold"))

fig2 <- p2a | p2b
print(fig2)
save_fig(fig2, "Fig2_UMAP_Pseudotime_and_Label", w = 16, h = 7)
```

---

# 10. Figure 3 — Label Bar Chart + Per Cell Line UMAP Facets

```{r fig3-label-umap, fig.width=18, fig.height=6}

p3a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 5),
            aes(label = sprintf("%.0f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells", title = "A. Transferred Azimuth l2 Labels") +
  theme(plot.title  = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

p3b <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.2, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text = element_text(face = "bold", size = 10),
        plot.title = element_text(size = 12, face = "bold"),
        axis.text  = element_blank(), axis.ticks = element_blank()) +
  labs(x = NULL, y = NULL,
       title = "B. Per Cell Line UMAP — Projected Pseudotime  (Grey = healthy reference)")

fig3 <- p3a + p3b + plot_layout(widths = c(1, 2))
print(fig3)
save_fig(fig3, "Fig3_Labels_PerLine_UMAP", w = 18, h = 6)
```

---

# 11. Figure 4 — Per Cell Line Facet UMAP (Key Heterogeneity Figure)

```{r fig4-facet-umap, fig.width=18, fig.height=10}

fig4 <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime\n(Naive->Temra)", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 12),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(), axis.ticks = element_blank(),
    legend.position = "right"
  ) +
  labs(x = NULL, y = NULL,
       title    = "Figure 4 — Per Cell Line Facet UMAP: Differentiation Position",
       subtitle = "L1 most heterogeneous (spans full trajectory)  |  L6 most arrested (tight TCM cluster)")

print(fig4)
save_fig(fig4, "Fig4_PerLine_Facet_UMAP", w = 18, h = 10)
```

---

# 12. Figure 5 — Label vs Pseudotime Bins Bar Charts

```{r fig5-barcharts, fig.width=16, fig.height=12}

p5a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.7) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = NULL, y = "% Cells",
       title    = "A. Transferred Azimuth l2 Label (Cell of Origin)",
       subtitle = "96-99.9% CD4 TCM across all lines — TCM origin confirmed") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_blank(), axis.ticks.x = element_blank())

p5b <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title    = "B. Pseudotime Bin Assignment (Differentiation Position)",
       subtitle = sprintf(
         "Bin boundaries — Naive|TCM: %.2f  |  TCM|TEM: %.2f  |  TEM|Temra: %.2f  |  Treg: label-based",
         BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

fig5 <- p5a / p5b
print(fig5)
save_fig(fig5, "Fig5_Label_vs_Pseudotime_Bins",  w = 16, h = 12)
save_fig(p5a,  "Fig5A_Label_Transfer_BarChart",  w = 10, h = 5)
save_fig(p5b,  "Fig5B_Pseudotime_Bins_BarChart", w = 10, h = 5)
```

---

# 13. Figure 6 — Pseudotime Density + Ridgeplot

```{r fig6-density, fig.width=16, fig.height=7}

p6a <- ggplot(query_df %>% filter(!is.na(pseudotime)), aes(x = pseudotime)) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(data = REF_MEDIANS,
             aes(xintercept = med_pt, colour = state),
             linetype = "dashed", linewidth = 0.7) +
  geom_text(data = REF_MEDIANS,
            aes(x = med_pt, y = Inf,
                label = gsub("CD4 ", "", as.character(state))),
            inherit.aes = FALSE,
            angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nstate median") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        legend.position = "right")

p6b <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, y = fct_rev(cell_line), fill = cell_line)
  ) +
  geom_density_ridges(scale = 0.9, rel_min_height = 0.01,
                      alpha = 0.85, colour = "white") +
  geom_vline(xintercept = c(BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA),
             linetype = "dashed", colour = "grey50", linewidth = 0.5) +
  annotate("text", x = BIN_NAIVE_TCM, y = 0.5, label = "Naive|TCM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TCM_TEM,   y = 0.5, label = "TCM|TEM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TEM_TEMRA, y = 0.5, label = "TEM|Temra",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_ridges(grid = FALSE) +
  labs(x = "Transferred Pseudotime", y = "Cell Line",
       title    = "B. Per Cell Line Pseudotime Ridgeplot",
       subtitle = "L1 multimodal  |  L3-L7 narrow peaks at TCM-TEM boundary") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"))

fig6 <- p6a | p6b
print(fig6)
save_fig(fig6, "Fig6_Density_and_Ridgeplot", w = 16, h = 7)
save_fig(p6a,  "Fig6A_Density_Overall",     w = 8,  h = 6)
save_fig(p6b,  "Fig6B_Ridgeplot_PerLine",   w = 8,  h = 6)
```



```{r, fig.width=12, fig.height=6}
p6a_v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime)),
    aes(x = pseudotime)
  ) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(
    data     = REF_MEDIANS_EFFECTOR,
    aes(xintercept = med_pt, colour = state),
    linetype = "dashed", linewidth = 0.7
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x = med_pt, y = Inf,
        label = gsub("CD4 ", "", as.character(state))),
    inherit.aes = FALSE,
    angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30"
  ) +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nstate median"
  ) +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — overlaps TCM; Treg-like assigned by label not pseudotime",
      naive_med, tcm_med, tem_med, temra_med)
  ) +
  theme(
    plot.title      = element_text(size = 12, face = "bold"),
    plot.subtitle   = element_text(size = 9,  colour = "grey40"),
    legend.position = "right"
  )

print(p6a_v2)
save_fig(p6a_v2, "Fig6Av2_Density_NoTreg", w = 8, h = 6)

```

---

# 14. Figure 8 — Violin: Transferred Pseudotime per Label

```{r fig8-violin-per-label, fig.width=12, fig.height=6}

fig8 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS, aes(yintercept = med_pt),
             linetype = "dotted", colour = "grey55", linewidth = 0.6) +
  # Annotations driven entirely from REF_MEDIANS — no hard-coded y values
  geom_text(
    data        = REF_MEDIANS,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = "CD4 Naive internal control at PT~4  |  TCM broad distribution  |  TEM at terminal PT~25"
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_text(size = 10, face = "bold"))

print(fig8)
save_fig(fig8, "Fig8_Violin_Pseudotime_per_Label", w = 12, h = 6)
```




```{r , fig.width=12, fig.height=6}

# For TEM and Temra: median=24.84 for both (Temra n=10, all at same PT).
# Use actual reference max values to separate the lines visually:
#   TEM   max = 25.00  (from diagnostic)
#   Temra max = 24.84  (all 10 cells identical)
# Better approach: show TEM median and Temra median separately but
# acknowledge in subtitle they are nearly identical.

# Override: use mean instead of median for TEM/Temra to separate lines
REF_MEDIANS_EFFECTOR_PLOT <- REF_MEDIANS_EFFECTOR %>%
  mutate(med_pt = case_when(
    state == "CD4 TEM"       ~ 23.962,  # mean from diagnostic (median=24.84 same as Temra)
    state == "CD4 Temra/CTL" ~ 24.840,  # all 10 cells identical
    TRUE ~ med_pt
  ),
  label_text = case_when(
    state == "CD4 TEM"       ~ sprintf("Ref: TEM (mean=23.96)"),
    state == "CD4 Temra/CTL" ~ sprintf("Ref: Temra (24.84, n=10)"),
    state == "CD4 Naive"     ~ sprintf("Ref: Naive (%.2f)", med_pt),
    state == "CD4 TCM"       ~ sprintf("Ref: TCM (%.2f)", med_pt),
    TRUE ~ as.character(state)
  ))

fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),  # drop=FALSE keeps Temra axis
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR_PLOT,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR_PLOT,
    aes(x       = 0.55,
        y       = med_pt,
        label   = label_text),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, na.value = "grey80", guide = "none",
                    drop = FALSE) +
  scale_x_discrete(drop = FALSE) +   # ← forces CD4 Temra/CTL to appear
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis: Naive=%.2f | TCM=%.2f | TEM~24.84 | Temra~24.84 (n=10, identical)\nCD4 Temra/CTL: no Sézary cells received this label — consistent with TCM arrest phenotype\nTreg line omitted — overlaps TCM; Treg-like assigned by label",
      naive_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)
save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)

```

```{r , fig.width=18, fig.height=7}
fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis reference lines: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM; Treg-like violin shown as biological confirmation",
      naive_med, tcm_med, tem_med, temra_med, treg_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)
save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)

```


---

# 15. Figure 9 — Cross-Table: Label vs Pseudotime Bin (PRIMARY RESULT)

```{r , fig.width=18, fig.height=7}

cross_df <- query_df %>%
  filter(!is.na(label), !is.na(pseudotime_bin)) %>%
  mutate(label = factor(label, levels = STATE_ORDER)) %>%
  count(label, pseudotime_bin) %>%
  group_by(label) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

tcm_discordant <- cross_df %>%
  filter(label == "CD4 TCM",
         pseudotime_bin %in% c("TEM-like", "Temra-like")) %>%
  summarise(pct = sum(pct)) %>%
  pull(pct)

fig9 <- ggplot(cross_df, aes(pseudotime_bin, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = sprintf("%.1f%%", pct)),
            vjust = -0.3, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, guide = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 115)) +
  facet_wrap(~ label, nrow = 1, scales = "free_x") +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 11),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40")
  ) +
  labs(
    x        = "Pseudotime Bin",
    y        = "% of Cells with This Label",
    title    = "Figure 9 — Cross-Table: Transferred Label vs Pseudotime Bin  [PRIMARY RESULT]",
    subtitle = sprintf(
      "TCM-labelled cells: %.1f%% discordant (TEM-like or Temra-like)\nTreg panel: 100%% Treg-like by design",
      tcm_discordant)
  )

print(fig9)
save_fig(fig9, "Fig9_CrossTable_Label_vs_Bin_PRIMARY", w = 18, h = 7)
```

---

# 16. Supplementary Figures

## Supp S1 — Per Cell Line Pseudotime Density (Faceted)

```{r suppS1-density-facet, fig.width=12, fig.height=7}

suppS1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, fill = cell_line, colour = cell_line)
  ) +
  geom_density(alpha = 0.35, linewidth = 0.8) +
  geom_vline(data = REF_MEDIANS, aes(xintercept = med_pt),
             colour = "grey45", linetype = "dashed", linewidth = 0.5) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text    = element_text(face = "bold"),
        plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40")) +
  labs(x = "Transferred Pseudotime", y = "Density",
       title    = "Supp S1 — Per Cell Line Pseudotime Density",
       subtitle = "Grey dashed lines = data-derived reference state medians")

print(suppS1)
save_fig(suppS1, "SuppS1_Density_PerLine_Facet", w = 12, h = 7)
```

## Supp S2 — Ridgeplot Standalone

```{r suppS2-ridgeplot, fig.width=10, fig.height=7}

suppS2 <- p6b +
  ggtitle(
    "Supp S2 — Per Cell Line Pseudotime Ridgeplot",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f | TCM|TEM=%.2f | TEM|Temra=%.2f",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  )

print(suppS2)
save_fig(suppS2, "SuppS2_Ridgeplot_PerLine", w = 10, h = 7)
```

## Supp S3 — Violin per Label x Cell Line (Faceted)

```{r suppS3-violin-label-line, fig.width=16, fig.height=10}

suppS3 <- ggplot(
    query_df %>%
      filter(!is.na(label), !is.na(cell_line), !is.na(pseudotime)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.8, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.2, outlier.alpha = 0.3) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  facet_wrap(~ label, nrow = 1) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 10),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 12, face = "bold"),
    plot.subtitle    = element_text(size = 9,  colour = "grey40")
  ) +
  labs(x = "Cell Line", y = "Transferred Pseudotime",
       title    = "Supp S3 — Violin: Pseudotime per Label x Cell Line",
       subtitle = "Complements Fig 9 — shows within-state per-line heterogeneity")

print(suppS3)
save_fig(suppS3, "SuppS3_Violin_Label_x_Line", w = 16, h = 10)
```

## Supp S4 — Pseudotime Bin Bar Chart with Full Percentages

```{r suppS4-bins-detailed, fig.width=14, fig.height=6}

suppS4 <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 2),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.5, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "% Cells",
    title    = "Supp S4 — Pseudotime Bin Composition per Cell Line",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f  |  TCM|TEM=%.2f  |  TEM|Temra=%.2f  |  Treg=label-based",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(suppS4)
save_fig(suppS4, "SuppS4_Bins_BarChart_Detailed", w = 14, h = 6)
```

---

# 17. Figure Manifest

```{r manifest, echo=FALSE}
manifest <- data.frame(
  File = c(
    "Fig7_FourPanel_Summary_FIXED",
    "Fig7A_Transferred_Labels",
    "Fig7B_Transferred_Pseudotime_FIXED",
    "Fig7C_Pseudotime_Bins_UMAP",
    "Fig7D_Label_BarChart_per_Line",
    "Fig1_Pseudotime_Violin_per_Line",
    "Fig2_UMAP_Pseudotime_and_Label",
    "Fig3_Labels_PerLine_UMAP",
    "Fig4_PerLine_Facet_UMAP",
    "Fig5_Label_vs_Pseudotime_Bins",
    "Fig5A_Label_Transfer_BarChart",
    "Fig5B_Pseudotime_Bins_BarChart",
    "Fig6_Density_and_Ridgeplot",
    "Fig6A_Density_Overall",
    "Fig6B_Ridgeplot_PerLine",
    "Fig8_Violin_Pseudotime_per_Label",
    "Fig9_CrossTable_Label_vs_Bin_PRIMARY",
    "SuppS1_Density_PerLine_Facet",
    "SuppS2_Ridgeplot_PerLine",
    "SuppS3_Violin_Label_x_Line",
    "SuppS4_Bins_BarChart_Detailed"
  ),
  Description = c(
    "Four-panel: labels | pseudotime (FIXED) | bins | label bar",
    "Sezary UMAP — transferred Azimuth l2 label",
    "Sezary UMAP — transferred pseudotime, FIXED 0-25 colour scale",
    "Sezary UMAP — pseudotime bin colour",
    "Transferred label proportions per cell line bar chart",
    "Pseudotime violin per cell line with data-derived reference medians",
    "UMAP pseudotime gradient + label identity side by side",
    "Label bar chart + per cell line UMAP facets",
    "Per cell line facet UMAP — key inter-line heterogeneity figure",
    "Dual bar chart: labels (top) and pseudotime bins (bottom)",
    "Label transfer proportions bar chart only",
    "Pseudotime bin proportions bar chart only",
    "Overall pseudotime density + per-line ridgeplot",
    "Overall pseudotime density with data-derived reference median lines",
    "Per cell line pseudotime ridgeplot with bin boundaries",
    "Violin: transferred pseudotime per label — data-derived annotations",
    "Cross-table: label vs pseudotime bin — PRIMARY RESULT",
    "Per cell line pseudotime density faceted",
    "Per cell line ridgeplot standalone",
    "Violin: pseudotime per label x cell line faceted",
    "Pseudotime bin bar chart with full percentages labelled"
  ),
  Manuscript = c(
    "Fig 1 combined", "Fig 1A", "Fig 1B (FIXED)", "Fig 1C", "Fig 1D",
    "Supp", "Fig 2", "Fig 3", "Fig 4 (KEY)",
    "Fig 5", "—", "Fig 5B",
    "Supp", "Supp S3", "Supp S4",
    "Supp S5", "Fig 2 (PRIMARY)",
    "Supp S1", "Supp S2", "Supp S6", "Supp"
  ),
  stringsAsFactors = FALSE
)

kable(manifest,
      caption = "All 21 figures — Figures/PNG/*.png (300 dpi) and Figures/PDF/*.pdf") %>%
  kable_styling(bootstrap_options = c("striped","condensed","hover"),
                full_width = TRUE) %>%
  row_spec(which(grepl("FIXED|PRIMARY", manifest$File)),
           bold = TRUE, background = "#fff3cd")
```

---

# 18. Session Info

```{r session-info}
sessionInfo()
```
