1 Overview

This script regenerates all manuscript figures from the saved projection object. The reference object is loaded solely to extract UMAP coordinates for the grey background.

1.1 Objects required

Object Path Role
reference_integrated ../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds UMAP background only — grey reference dots
mapped_MalignantCD4T results/MalignantCD4T_Monocle3_projection/MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds All biology — pseudotime, labels, bins, colour scales

1.2 Key design decisions

Item Source
Pseudotime colour-scale limits (PT_LIMITS) mapped_MalignantCD4T$predicted.pseudotime
Bin boundaries Midpoints between actual reference state medians — computed at runtime from ref_bg
State medians (REF_MEDIANS) Computed at runtime from ref_bgnever hard-coded
All bar charts, violins, density plots mapped_MalignantCD4T only

1.3 Panel B fix

Previously called FeaturePlot(reference_integrated, ...) without limits, producing a spurious 1.0–2.0 colour scale. Fixed: Panel B plots Sézary cells coloured by predicted.pseudotime with limits = PT_LIMITS (correct 0–25 scale).

1.4 Bin design

Bins are anchored to actual reference state medians computed from ref_bg. Boundaries = midpoint between adjacent state medians (biology-principled, not arbitrary). Treg-like bin is assigned by Azimuth label (Treg is a branch, not a linear position).


2 1. Setup & Libraries

suppressPackageStartupMessages({
  library(Seurat)
  library(ggplot2)
  library(dplyr)
  library(tidyr)
  library(forcats)
  library(patchwork)
  library(scales)
  library(RColorBrewer)
  library(ggridges)
  library(knitr)
  library(kableExtra)
})
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'rmarkdown':
  method         from
  print.paged_df     
dir.create("Figures/PNG", recursive = TRUE, showWarnings = FALSE)
dir.create("Figures/PDF", recursive = TRUE, showWarnings = FALSE)

save_fig <- function(p, name, w = 10, h = 8) {
  ggsave(file.path("Figures/PNG", paste0(name, ".png")),
         p, width = w, height = h, dpi = 300, bg = "white")
  ggsave(file.path("Figures/PDF", paste0(name, ".pdf")),
         p, width = w, height = h, useDingbats = FALSE)
  invisible(cat(sprintf("  \u2713  %s  [PNG + PDF]\n", name)))
}

STATE_COLORS <- c(
  "CD4 Naive"     = "#4472C4",
  "CD4 TCM"       = "#70AD47",
  "CD4 TEM"       = "#ED7D31",
  "CD4 Temra/CTL" = "#C00000",
  "Treg"          = "#7030A0"
)

BIN_COLORS <- c(
  "Naive-like"  = "#4472C4",
  "TCM-like"    = "#70AD47",
  "TEM-like"    = "#ED7D31",
  "Temra-like"  = "#C00000",
  "Treg-like"   = "#7030A0"
)

STATE_ORDER <- c("CD4 Naive", "CD4 TCM", "CD4 TEM", "CD4 Temra/CTL", "Treg")
BIN_ORDER   <- c("Naive-like", "TCM-like", "TEM-like", "Temra-like", "Treg-like")
LINE_ORDER  <- paste0("L", 1:7)

cat("Setup complete.\n")
Setup complete.

3 2. Load & Validate Objects

ref_path <- paste0(
  "../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/",
  "1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds"
)
reference_integrated <- readRDS(ref_path)
cat("Reference loaded:", ncol(reference_integrated), "cells\n")
Reference loaded: 11466 cells
stopifnot(
  "umap reduction missing from reference" =
    "umap" %in% names(reference_integrated@reductions)
)

obj_path <- paste0(
  "../results/MalignantCD4T_Monocle3_projection/",
  "MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds"
)
mapped_MalignantCD4T <- readRDS(obj_path)
cat("Query loaded:    ", ncol(mapped_MalignantCD4T), "cells\n")
Query loaded:     40695 cells
stopifnot(
  "ref.umap reduction missing — re-run MapQuery" =
    "ref.umap" %in% names(mapped_MalignantCD4T@reductions),
  "predicted.pseudotime column missing" =
    "predicted.pseudotime" %in% colnames(mapped_MalignantCD4T@meta.data),
  "predicted.predicted.celltype.l2 column missing" =
    "predicted.predicted.celltype.l2" %in% colnames(mapped_MalignantCD4T@meta.data)
)

if (!"cell_line" %in% colnames(mapped_MalignantCD4T@meta.data)) {
  if ("orig.ident" %in% colnames(mapped_MalignantCD4T@meta.data)) {
    mapped_MalignantCD4T$cell_line <- mapped_MalignantCD4T$orig.ident
    cat("NOTE: using orig.ident as cell_line\n")
  } else {
    stop("Cannot find cell_line or orig.ident in metadata")
  }
}

cat("\npredicted.pseudotime summary (expect Min~0, Max~25):\n")

predicted.pseudotime summary (expect Min~0, Max~25):
print(summary(mapped_MalignantCD4T$predicted.pseudotime))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.156  16.431  17.703  17.655  19.487  25.553 
cat("\nTransferred label distribution:\n")

Transferred label distribution:
print(sort(table(mapped_MalignantCD4T$predicted.predicted.celltype.l2,
                 useNA = "ifany"), decreasing = TRUE))

  CD4 TCM   CD4 TEM      Treg CD4 Naive 
    40291       304        79        21 
cat("\nCell line sizes:\n")

Cell line sizes:
print(sort(table(mapped_MalignantCD4T$cell_line, useNA = "ifany")))

  L6   L7   L1   L2   L4   L5   L3 
5148 5331 5825 5935 6006 6022 6428 

4 3. Build Reference Background (ref_bg)

# ref_bg serves two purposes:
#   1. Grey background dots in all projection UMAPs
#   2. Source for computing actual reference state medians (REF_MEDIANS)
ref_bg <- data.frame(
  Embeddings(reference_integrated, "umap"),
  state      = reference_integrated$predicted.celltype.l2,
  pseudotime = reference_integrated$monocle3_pseudotime,
  stringsAsFactors = FALSE
)
colnames(ref_bg)[1:2] <- c("UMAP_1", "UMAP_2")
ref_bg$state <- factor(ref_bg$state, levels = STATE_ORDER)

cat("Reference monocle3_pseudotime (should be 0-25):\n")
Reference monocle3_pseudotime (should be 0-25):
print(summary(ref_bg$pseudotime))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   3.855   7.404   9.926  15.525  25.356 
ref_bg_plot <- ref_bg[sample(nrow(ref_bg)), ]
cat(sprintf("\nref_bg ready: %d reference cells\n", nrow(ref_bg)))

ref_bg ready: 11466 reference cells

5 4. Compute Reference State Medians & Bin Boundaries

# ══════════════════════════════════════════════════════════════════════════
# REF_MEDIANS computed at runtime from actual ref_bg — NOT hard-coded.
#
# Diagnostic confirmed these actual values:
#   CD4 Naive     median =  3.293
#   CD4 TCM       median =  9.377  (was hard-coded as 13.36 — WRONG)
#   CD4 TEM       median = 24.840
#   CD4 Temra/CTL median = 24.840  (only 10 cells)
#   Treg          median = 11.150  (was hard-coded as 18.75 — above Treg max of 17.40, WRONG)
#
# Bin boundaries derived as midpoints between adjacent state medians.
# ══════════════════════════════════════════════════════════════════════════

ref_state_medians <- ref_bg %>%
  filter(!is.na(pseudotime), !is.na(state)) %>%
  group_by(state) %>%
  summarise(
    n      = n(),
    med_pt = round(median(pseudotime, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>%
  arrange(med_pt)

cat("=== Reference state medians (data-derived) ===\n")
=== Reference state medians (data-derived) ===
print(ref_state_medians)

get_med <- function(s) {
  v <- ref_state_medians$med_pt[ref_state_medians$state == s]
  if (length(v) == 0) stop(paste("State not found in ref_bg:", s))
  v
}

naive_med  <- get_med("CD4 Naive")
tcm_med    <- get_med("CD4 TCM")
tem_med    <- get_med("CD4 TEM")
temra_med  <- get_med("CD4 Temra/CTL")
treg_med   <- get_med("Treg")

BIN_NAIVE_TCM  <- round((naive_med + tcm_med)  / 2, 3)
BIN_TCM_TEM    <- round((tcm_med   + tem_med)   / 2, 3)
BIN_TEM_TEMRA  <- round((tem_med   + temra_med) / 2, 3)

cat("\n=== Bin boundaries (midpoints between adjacent state medians) ===\n")

=== Bin boundaries (midpoints between adjacent state medians) ===
cat(sprintf("  Naive | TCM   : %.3f\n", BIN_NAIVE_TCM))
  Naive | TCM   : 6.335
cat(sprintf("  TCM   | TEM   : %.3f\n", BIN_TCM_TEM))
  TCM   | TEM   : 17.108
cat(sprintf("  TEM   | Temra : %.3f\n", BIN_TEM_TEMRA))
  TEM   | Temra : 24.840
cat(sprintf("  Treg           : label-based (branch logic)\n"))
  Treg           : label-based (branch logic)
if (abs(tem_med - temra_med) < 0.5) {
  cat("\nNOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).\n")
  cat("      Temra-like bin will be negligible — biologically expected.\n")
}

NOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).
      Temra-like bin will be negligible — biologically expected.
REF_MEDIANS <- data.frame(
  state  = factor(STATE_ORDER, levels = STATE_ORDER),
  med_pt = c(naive_med, tcm_med, tem_med, temra_med, treg_med)
)

cat("\n=== REF_MEDIANS (used for reference lines in all figures) ===\n")

=== REF_MEDIANS (used for reference lines in all figures) ===
print(REF_MEDIANS)
# ── REF_MEDIANS_EFFECTOR: Treg excluded ───────────────────────────────────
# Treg (median PT=11.15) and TCM (median PT=9.38) overlap entirely in
# pseudotime space — both states share the Naive→TCM graph segment.
# Showing a Treg line at PT=11.15 implies a pseudotime boundary that does
# not exist and misleads interpretation. Treg-like cells are assigned by
# transferred Azimuth label (KNN), not by pseudotime position.
REF_MEDIANS_EFFECTOR <- REF_MEDIANS %>%
  filter(state != "Treg")

LINETYPE_EFFECTOR <- c(
  "CD4 Naive"     = "dotted",
  "CD4 TCM"       = "dashed",
  "CD4 TEM"       = "longdash",
  "CD4 Temra/CTL" = "solid"
)

cat("=== REF_MEDIANS_EFFECTOR (Treg excluded) ===\n")
=== REF_MEDIANS_EFFECTOR (Treg excluded) ===
print(REF_MEDIANS_EFFECTOR)

6 5. Assign Pseudotime Bins

assign_bin <- function(pt, lbl) {
  if (isTRUE(grepl("Treg", lbl, ignore.case = TRUE))) return("Treg-like")
  if (is.na(pt))             return(NA_character_)
  if (pt <= BIN_NAIVE_TCM)   return("Naive-like")
  if (pt <= BIN_TCM_TEM)     return("TCM-like")
  if (pt <= BIN_TEM_TEMRA)   return("TEM-like")
  return("Temra-like")
}

mapped_MalignantCD4T$pseudotime_bin <- factor(
  mapply(assign_bin,
         pt  = mapped_MalignantCD4T$predicted.pseudotime,
         lbl = mapped_MalignantCD4T$predicted.predicted.celltype.l2),
  levels = BIN_ORDER
)

cat("Pseudotime bin distribution (all cells):\n")
Pseudotime bin distribution (all cells):
print(table(mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))

Naive-like   TCM-like   TEM-like Temra-like  Treg-like 
      2287      13298      23485       1546         79 
cat("\nPseudotime bin per cell line:\n")

Pseudotime bin per cell line:
print(table(mapped_MalignantCD4T$cell_line,
            mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
    
     Naive-like TCM-like TEM-like Temra-like Treg-like
  L1       1407     1104     2630        682         2
  L2        833     1593     3324        185         0
  L3          4     2357     3613        447         7
  L4         37     1384     4569         11         5
  L5          3     3419     2414        178         8
  L6          0     1815     3244         43        46
  L7          3     1626     3691          0        11

7 6. Build Shared Data Frames

query_df <- data.frame(
  Embeddings(mapped_MalignantCD4T, "ref.umap"),
  pseudotime     = mapped_MalignantCD4T$predicted.pseudotime,
  cell_line      = factor(mapped_MalignantCD4T$cell_line, levels = LINE_ORDER),
  label          = mapped_MalignantCD4T$predicted.predicted.celltype.l2,
  pseudotime_bin = mapped_MalignantCD4T$pseudotime_bin,
  stringsAsFactors = FALSE
)
colnames(query_df)[1:2] <- c("UMAP_1", "UMAP_2")

PT_LIMITS <- range(query_df$pseudotime, na.rm = TRUE)
cat(sprintf("Pseudotime colour limits: %.3f to %.3f\n", PT_LIMITS[1], PT_LIMITS[2]))
Pseudotime colour limits: 1.156 to 25.553
label_df <- query_df %>%
  filter(!is.na(label), !is.na(cell_line)) %>%
  count(cell_line, label) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup() %>%
  mutate(label = factor(label, levels = STATE_ORDER))

bin_df <- query_df %>%
  filter(!is.na(pseudotime_bin), !is.na(cell_line)) %>%
  count(cell_line, pseudotime_bin) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

cat("Data frames ready.\n")
Data frames ready.

8 7. Figure 7 — Four-Panel Summary


bg_layer <- geom_point(
  data = ref_bg_plot, aes(x = UMAP_1, y = UMAP_2),
  colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE
)

p7a <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nLabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "A. Sézary Cells — Transferred CD4 T Cell Labels") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7b <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.8) +
  scale_colour_gradientn(
    colours  = c("lightblue", "yellow", "red"),
    name     = "Pseudotime",
    limits   = PT_LIMITS,
    na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "B. Sézary Cells — Transferred Pseudotime  [FIXED]") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7c <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime_bin)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = BIN_COLORS, name = "Pseudotime\nBin",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "C. Sézary Cells — Pseudotime Bin") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7d <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title = "D. Transferred Label per Cell Line") +
  theme(plot.title  = element_text(hjust = 0.5, size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

fig7 <- (p7a | p7b) / (p7c | p7d)
print(fig7)

save_fig(fig7, "Fig7_FourPanel_Summary_FIXED",       w = 18, h = 14)
  ✓  Fig7_FourPanel_Summary_FIXED  [PNG + PDF]
save_fig(p7a,  "Fig7A_Transferred_Labels",           w = 9,  h = 7)
  ✓  Fig7A_Transferred_Labels  [PNG + PDF]
save_fig(p7b,  "Fig7B_Transferred_Pseudotime_FIXED", w = 9,  h = 7)
  ✓  Fig7B_Transferred_Pseudotime_FIXED  [PNG + PDF]
save_fig(p7c,  "Fig7C_Pseudotime_Bins_UMAP",         w = 9,  h = 7)
  ✓  Fig7C_Pseudotime_Bins_UMAP  [PNG + PDF]
save_fig(p7d,  "Fig7D_Label_BarChart_per_Line",      w = 9,  h = 7)
  ✓  Fig7D_Label_BarChart_per_Line  [PNG + PDF]

9 8. Figure 1 — Pseudotime Violin per Cell Line


fig1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS,
             aes(yintercept = med_pt, colour = state, linetype = state),
             linewidth = 0.7, alpha = 0.9) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nmedian") +
  scale_linetype_manual(
    values = c("CD4 Naive" = "dotted", "CD4 TCM" = "dashed",
               "CD4 TEM" = "longdash", "CD4 Temra/CTL" = "solid",
               "Treg" = "twodash"),
    name = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(fig1)

save_fig(fig1, "Fig1_Pseudotime_Violin_per_Line", w = 12, h = 6)
  ✓  Fig1_Pseudotime_Violin_per_Line  [PNG + PDF]
fig1v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt, colour = state, linetype = state),
    linewidth = 0.7, alpha = 0.9
  ) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nmedian"
  ) +
  scale_linetype_manual(
    values = LINETYPE_EFFECTOR,
    name   = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM (PT=%.2f); Treg-like assigned by label",
      naive_med, tcm_med, tem_med, temra_med, treg_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 11, face = "bold")
  )

print(fig1v2)

save_fig(fig1v2, "Fig1v2_Pseudotime_Violin_NoTreg", w = 12, h = 6)
  ✓  Fig1v2_Pseudotime_Violin_NoTreg  [PNG + PDF]

10 9. Figure 2 — UMAP: Pseudotime + Label Side by Side


p2a <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "A. Transferred pseudotime gradient",
       subtitle = "Grey = healthy reference  |  Coloured = Sézary cells") +
  theme(plot.title = element_text(size = 12, face = "bold"))

p2b <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey88", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nlabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "B. Transferred label identity",
       subtitle = "96-99.9% CD4 TCM across all seven lines") +
  theme(plot.title = element_text(size = 12, face = "bold"))

fig2 <- p2a | p2b
print(fig2)

save_fig(fig2, "Fig2_UMAP_Pseudotime_and_Label", w = 16, h = 7)
  ✓  Fig2_UMAP_Pseudotime_and_Label  [PNG + PDF]

11 10. Figure 3 — Label Bar Chart + Per Cell Line UMAP Facets


p3a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 5),
            aes(label = sprintf("%.0f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells", title = "A. Transferred Azimuth l2 Labels") +
  theme(plot.title  = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

p3b <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.2, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text = element_text(face = "bold", size = 10),
        plot.title = element_text(size = 12, face = "bold"),
        axis.text  = element_blank(), axis.ticks = element_blank()) +
  labs(x = NULL, y = NULL,
       title = "B. Per Cell Line UMAP — Projected Pseudotime  (Grey = healthy reference)")

fig3 <- p3a + p3b + plot_layout(widths = c(1, 2))
print(fig3)

save_fig(fig3, "Fig3_Labels_PerLine_UMAP", w = 18, h = 6)
  ✓  Fig3_Labels_PerLine_UMAP  [PNG + PDF]

12 11. Figure 4 — Per Cell Line Facet UMAP (Key Heterogeneity Figure)


fig4 <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime\n(Naive->Temra)", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 12),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(), axis.ticks = element_blank(),
    legend.position = "right"
  ) +
  labs(x = NULL, y = NULL,
       title    = "Figure 4 — Per Cell Line Facet UMAP: Differentiation Position",
       subtitle = "L1 most heterogeneous (spans full trajectory)  |  L6 most arrested (tight TCM cluster)")

print(fig4)

save_fig(fig4, "Fig4_PerLine_Facet_UMAP", w = 18, h = 10)
  ✓  Fig4_PerLine_Facet_UMAP  [PNG + PDF]

13 12. Figure 5 — Label vs Pseudotime Bins Bar Charts


p5a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.7) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = NULL, y = "% Cells",
       title    = "A. Transferred Azimuth l2 Label (Cell of Origin)",
       subtitle = "96-99.9% CD4 TCM across all lines — TCM origin confirmed") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_blank(), axis.ticks.x = element_blank())

p5b <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title    = "B. Pseudotime Bin Assignment (Differentiation Position)",
       subtitle = sprintf(
         "Bin boundaries — Naive|TCM: %.2f  |  TCM|TEM: %.2f  |  TEM|Temra: %.2f  |  Treg: label-based",
         BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

fig5 <- p5a / p5b
print(fig5)

save_fig(fig5, "Fig5_Label_vs_Pseudotime_Bins",  w = 16, h = 12)
  ✓  Fig5_Label_vs_Pseudotime_Bins  [PNG + PDF]
save_fig(p5a,  "Fig5A_Label_Transfer_BarChart",  w = 10, h = 5)
  ✓  Fig5A_Label_Transfer_BarChart  [PNG + PDF]
save_fig(p5b,  "Fig5B_Pseudotime_Bins_BarChart", w = 10, h = 5)
  ✓  Fig5B_Pseudotime_Bins_BarChart  [PNG + PDF]

14 13. Figure 6 — Pseudotime Density + Ridgeplot


p6a <- ggplot(query_df %>% filter(!is.na(pseudotime)), aes(x = pseudotime)) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(data = REF_MEDIANS,
             aes(xintercept = med_pt, colour = state),
             linetype = "dashed", linewidth = 0.7) +
  geom_text(data = REF_MEDIANS,
            aes(x = med_pt, y = Inf,
                label = gsub("CD4 ", "", as.character(state))),
            inherit.aes = FALSE,
            angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nstate median") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        legend.position = "right")

p6b <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, y = fct_rev(cell_line), fill = cell_line)
  ) +
  geom_density_ridges(scale = 0.9, rel_min_height = 0.01,
                      alpha = 0.85, colour = "white") +
  geom_vline(xintercept = c(BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA),
             linetype = "dashed", colour = "grey50", linewidth = 0.5) +
  annotate("text", x = BIN_NAIVE_TCM, y = 0.5, label = "Naive|TCM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TCM_TEM,   y = 0.5, label = "TCM|TEM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TEM_TEMRA, y = 0.5, label = "TEM|Temra",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_ridges(grid = FALSE) +
  labs(x = "Transferred Pseudotime", y = "Cell Line",
       title    = "B. Per Cell Line Pseudotime Ridgeplot",
       subtitle = "L1 multimodal  |  L3-L7 narrow peaks at TCM-TEM boundary") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"))

fig6 <- p6a | p6b
print(fig6)

save_fig(fig6, "Fig6_Density_and_Ridgeplot", w = 16, h = 7)
  ✓  Fig6_Density_and_Ridgeplot  [PNG + PDF]
save_fig(p6a,  "Fig6A_Density_Overall",     w = 8,  h = 6)
  ✓  Fig6A_Density_Overall  [PNG + PDF]
save_fig(p6b,  "Fig6B_Ridgeplot_PerLine",   w = 8,  h = 6)
  ✓  Fig6B_Ridgeplot_PerLine  [PNG + PDF]
p6a_v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime)),
    aes(x = pseudotime)
  ) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(
    data     = REF_MEDIANS_EFFECTOR,
    aes(xintercept = med_pt, colour = state),
    linetype = "dashed", linewidth = 0.7
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x = med_pt, y = Inf,
        label = gsub("CD4 ", "", as.character(state))),
    inherit.aes = FALSE,
    angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30"
  ) +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nstate median"
  ) +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — overlaps TCM; Treg-like assigned by label not pseudotime",
      naive_med, tcm_med, tem_med, temra_med)
  ) +
  theme(
    plot.title      = element_text(size = 12, face = "bold"),
    plot.subtitle   = element_text(size = 9,  colour = "grey40"),
    legend.position = "right"
  )

print(p6a_v2)

save_fig(p6a_v2, "Fig6Av2_Density_NoTreg", w = 8, h = 6)
  ✓  Fig6Av2_Density_NoTreg  [PNG + PDF]

15 14. Figure 8 — Violin: Transferred Pseudotime per Label


fig8 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS, aes(yintercept = med_pt),
             linetype = "dotted", colour = "grey55", linewidth = 0.6) +
  # Annotations driven entirely from REF_MEDIANS — no hard-coded y values
  geom_text(
    data        = REF_MEDIANS,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = "CD4 Naive internal control at PT~4  |  TCM broad distribution  |  TEM at terminal PT~25"
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_text(size = 10, face = "bold"))

print(fig8)

save_fig(fig8, "Fig8_Violin_Pseudotime_per_Label", w = 12, h = 6)
  ✓  Fig8_Violin_Pseudotime_per_Label  [PNG + PDF]

# For TEM and Temra: median=24.84 for both (Temra n=10, all at same PT).
# Use actual reference max values to separate the lines visually:
#   TEM   max = 25.00  (from diagnostic)
#   Temra max = 24.84  (all 10 cells identical)
# Better approach: show TEM median and Temra median separately but
# acknowledge in subtitle they are nearly identical.

# Override: use mean instead of median for TEM/Temra to separate lines
REF_MEDIANS_EFFECTOR_PLOT <- REF_MEDIANS_EFFECTOR %>%
  mutate(med_pt = case_when(
    state == "CD4 TEM"       ~ 23.962,  # mean from diagnostic (median=24.84 same as Temra)
    state == "CD4 Temra/CTL" ~ 24.840,  # all 10 cells identical
    TRUE ~ med_pt
  ),
  label_text = case_when(
    state == "CD4 TEM"       ~ sprintf("Ref: TEM (mean=23.96)"),
    state == "CD4 Temra/CTL" ~ sprintf("Ref: Temra (24.84, n=10)"),
    state == "CD4 Naive"     ~ sprintf("Ref: Naive (%.2f)", med_pt),
    state == "CD4 TCM"       ~ sprintf("Ref: TCM (%.2f)", med_pt),
    TRUE ~ as.character(state)
  ))

fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),  # drop=FALSE keeps Temra axis
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR_PLOT,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR_PLOT,
    aes(x       = 0.55,
        y       = med_pt,
        label   = label_text),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, na.value = "grey80", guide = "none",
                    drop = FALSE) +
  scale_x_discrete(drop = FALSE) +   # ← forces CD4 Temra/CTL to appear
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis: Naive=%.2f | TCM=%.2f | TEM~24.84 | Temra~24.84 (n=10, identical)\nCD4 Temra/CTL: no Sézary cells received this label — consistent with TCM arrest phenotype\nTreg line omitted — overlaps TCM; Treg-like assigned by label",
      naive_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)

save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)
  ✓  Fig8v2_Violin_NoTreg  [PNG + PDF]
fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis reference lines: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM; Treg-like violin shown as biological confirmation",
      naive_med, tcm_med, tem_med, temra_med, treg_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)

save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)
  ✓  Fig8v2_Violin_NoTreg  [PNG + PDF]

16 15. Figure 9 — Cross-Table: Label vs Pseudotime Bin (PRIMARY RESULT)


cross_df <- query_df %>%
  filter(!is.na(label), !is.na(pseudotime_bin)) %>%
  mutate(label = factor(label, levels = STATE_ORDER)) %>%
  count(label, pseudotime_bin) %>%
  group_by(label) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

tcm_discordant <- cross_df %>%
  filter(label == "CD4 TCM",
         pseudotime_bin %in% c("TEM-like", "Temra-like")) %>%
  summarise(pct = sum(pct)) %>%
  pull(pct)

fig9 <- ggplot(cross_df, aes(pseudotime_bin, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = sprintf("%.1f%%", pct)),
            vjust = -0.3, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, guide = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 115)) +
  facet_wrap(~ label, nrow = 1, scales = "free_x") +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 11),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40")
  ) +
  labs(
    x        = "Pseudotime Bin",
    y        = "% of Cells with This Label",
    title    = "Figure 9 — Cross-Table: Transferred Label vs Pseudotime Bin  [PRIMARY RESULT]",
    subtitle = sprintf(
      "TCM-labelled cells: %.1f%% discordant (TEM-like or Temra-like)\nTreg panel: 100%% Treg-like by design",
      tcm_discordant)
  )

print(fig9)

save_fig(fig9, "Fig9_CrossTable_Label_vs_Bin_PRIMARY", w = 18, h = 7)
  ✓  Fig9_CrossTable_Label_vs_Bin_PRIMARY  [PNG + PDF]

17 16. Supplementary Figures

17.1 Supp S1 — Per Cell Line Pseudotime Density (Faceted)


suppS1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, fill = cell_line, colour = cell_line)
  ) +
  geom_density(alpha = 0.35, linewidth = 0.8) +
  geom_vline(data = REF_MEDIANS, aes(xintercept = med_pt),
             colour = "grey45", linetype = "dashed", linewidth = 0.5) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text    = element_text(face = "bold"),
        plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40")) +
  labs(x = "Transferred Pseudotime", y = "Density",
       title    = "Supp S1 — Per Cell Line Pseudotime Density",
       subtitle = "Grey dashed lines = data-derived reference state medians")

print(suppS1)

save_fig(suppS1, "SuppS1_Density_PerLine_Facet", w = 12, h = 7)
  ✓  SuppS1_Density_PerLine_Facet  [PNG + PDF]

17.2 Supp S2 — Ridgeplot Standalone


suppS2 <- p6b +
  ggtitle(
    "Supp S2 — Per Cell Line Pseudotime Ridgeplot",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f | TCM|TEM=%.2f | TEM|Temra=%.2f",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  )

print(suppS2)

save_fig(suppS2, "SuppS2_Ridgeplot_PerLine", w = 10, h = 7)
  ✓  SuppS2_Ridgeplot_PerLine  [PNG + PDF]

17.3 Supp S3 — Violin per Label x Cell Line (Faceted)


suppS3 <- ggplot(
    query_df %>%
      filter(!is.na(label), !is.na(cell_line), !is.na(pseudotime)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.8, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.2, outlier.alpha = 0.3) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  facet_wrap(~ label, nrow = 1) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 10),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 12, face = "bold"),
    plot.subtitle    = element_text(size = 9,  colour = "grey40")
  ) +
  labs(x = "Cell Line", y = "Transferred Pseudotime",
       title    = "Supp S3 — Violin: Pseudotime per Label x Cell Line",
       subtitle = "Complements Fig 9 — shows within-state per-line heterogeneity")

print(suppS3)

save_fig(suppS3, "SuppS3_Violin_Label_x_Line", w = 16, h = 10)
  ✓  SuppS3_Violin_Label_x_Line  [PNG + PDF]

17.4 Supp S4 — Pseudotime Bin Bar Chart with Full Percentages


suppS4 <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 2),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.5, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "% Cells",
    title    = "Supp S4 — Pseudotime Bin Composition per Cell Line",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f  |  TCM|TEM=%.2f  |  TEM|Temra=%.2f  |  Treg=label-based",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(suppS4)

save_fig(suppS4, "SuppS4_Bins_BarChart_Detailed", w = 14, h = 6)
  ✓  SuppS4_Bins_BarChart_Detailed  [PNG + PDF]

18 17. Figure Manifest

All 21 figures — Figures/PNG/*.png (300 dpi) and Figures/PDF/*.pdf
File Description Manuscript
Fig7_FourPanel_Summary_FIXED Four-panel: labels &#124; pseudotime (FIXED) &#124; bins &#124; label bar Fig 1 combined
Fig7A_Transferred_Labels Sezary UMAP — transferred Azimuth l2 label Fig 1A
Fig7B_Transferred_Pseudotime_FIXED Sezary UMAP — transferred pseudotime, FIXED 0-25 colour scale Fig 1B (FIXED)
Fig7C_Pseudotime_Bins_UMAP Sezary UMAP — pseudotime bin colour Fig 1C
Fig7D_Label_BarChart_per_Line Transferred label proportions per cell line bar chart Fig 1D
Fig1_Pseudotime_Violin_per_Line Pseudotime violin per cell line with data-derived reference medians Supp
Fig2_UMAP_Pseudotime_and_Label UMAP pseudotime gradient + label identity side by side Fig 2
Fig3_Labels_PerLine_UMAP Label bar chart + per cell line UMAP facets Fig 3
Fig4_PerLine_Facet_UMAP Per cell line facet UMAP — key inter-line heterogeneity figure Fig 4 (KEY)
Fig5_Label_vs_Pseudotime_Bins Dual bar chart: labels (top) and pseudotime bins (bottom) Fig 5
Fig5A_Label_Transfer_BarChart Label transfer proportions bar chart only
Fig5B_Pseudotime_Bins_BarChart Pseudotime bin proportions bar chart only Fig 5B
Fig6_Density_and_Ridgeplot Overall pseudotime density + per-line ridgeplot Supp
Fig6A_Density_Overall Overall pseudotime density with data-derived reference median lines Supp S3
Fig6B_Ridgeplot_PerLine Per cell line pseudotime ridgeplot with bin boundaries Supp S4
Fig8_Violin_Pseudotime_per_Label Violin: transferred pseudotime per label — data-derived annotations Supp S5
Fig9_CrossTable_Label_vs_Bin_PRIMARY Cross-table: label vs pseudotime bin — PRIMARY RESULT Fig 2 (PRIMARY)
SuppS1_Density_PerLine_Facet Per cell line pseudotime density faceted Supp S1
SuppS2_Ridgeplot_PerLine Per cell line ridgeplot standalone Supp S2
SuppS3_Violin_Label_x_Line Violin: pseudotime per label x cell line faceted Supp S6
SuppS4_Bins_BarChart_Detailed Pseudotime bin bar chart with full percentages labelled Supp

19 18. Session Info

sessionInfo()
R version 4.5.2 (2025-10-31)
Platform: x86_64-redhat-linux-gnu
Running under: Rocky Linux 9.7 (Blue Onyx)

Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS-OPENMP;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8        LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C           LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Paris
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] kableExtra_1.4.0   knitr_1.51         ggridges_0.5.7     RColorBrewer_1.1-3 scales_1.4.0       patchwork_1.3.2    forcats_1.0.1     
 [8] tidyr_1.3.2        dplyr_1.2.0        ggplot2_4.0.2      Seurat_5.4.0       SeuratObject_5.3.0 sp_2.2-1          

loaded via a namespace (and not attached):
  [1] rstudioapi_0.18.0      jsonlite_2.0.0         magrittr_2.0.4         spatstat.utils_3.2-1   farver_2.1.2           rmarkdown_2.30        
  [7] ragg_1.5.0             vctrs_0.7.1            ROCR_1.0-12            spatstat.explore_3.7-0 htmltools_0.5.9        sass_0.4.10           
 [13] sctransform_0.4.3      parallelly_1.46.1      KernSmooth_2.23-26     bslib_0.10.0           htmlwidgets_1.6.4      ica_1.0-3             
 [19] plyr_1.8.9             plotly_4.12.0          zoo_1.8-15             cachem_1.1.0           igraph_2.2.2           mime_0.13             
 [25] lifecycle_1.0.5        pkgconfig_2.0.3        Matrix_1.7-4           R6_2.6.1               fastmap_1.2.0          fitdistrplus_1.2-6    
 [31] future_1.69.0          shiny_1.12.1           digest_0.6.39          S4Vectors_0.48.0       tensor_1.5.1           RSpectra_0.16-2       
 [37] irlba_2.3.7            textshaping_1.0.4      labeling_0.4.3         progressr_0.18.0       spatstat.sparse_3.1-0  httr_1.4.8            
 [43] polyclip_1.10-7        abind_1.4-8            compiler_4.5.2         withr_3.0.2            S7_0.2.1               fastDummies_1.7.5     
 [49] MASS_7.3-65            tools_4.5.2            lmtest_0.9-40          otel_0.2.0             httpuv_1.6.16          future.apply_1.20.1   
 [55] goftest_1.2-3          glue_1.8.0             nlme_3.1-168           promises_1.5.0         grid_4.5.2             Rtsne_0.17            
 [61] cluster_2.1.8.2        reshape2_1.4.5         generics_0.1.4         gtable_0.3.6           spatstat.data_3.1-9    data.table_1.18.2.1   
 [67] xml2_1.5.2             XVector_0.50.0         BiocGenerics_0.56.0    spatstat.geom_3.7-0    RcppAnnoy_0.0.23       ggrepel_0.9.6         
 [73] RANN_2.6.2             pillar_1.11.1          stringr_1.6.0          spam_2.11-3            RcppHNSW_0.6.0         later_1.4.6           
 [79] splines_4.5.2          lattice_0.22-9         survival_3.8-6         deldir_2.0-4           tidyselect_1.2.1       miniUI_0.1.2          
 [85] pbapply_1.7-4          gridExtra_2.3          IRanges_2.44.0         svglite_2.2.2          scattermore_1.2        stats4_4.5.2          
 [91] xfun_0.56              Biobase_2.70.0         matrixStats_1.5.0      stringi_1.8.7          lazyeval_0.2.2         yaml_2.3.12           
 [97] evaluate_1.0.5         codetools_0.2-20       tibble_3.3.1           cli_3.6.5              uwot_0.2.4             xtable_1.8-4          
[103] reticulate_1.45.0      systemfonts_1.3.1      jquerylib_0.1.4        dichromat_2.0-0.1      Rcpp_1.1.1             globals_0.19.0        
[109] spatstat.random_3.4-4  png_0.1-8              spatstat.univar_3.1-6  parallel_4.5.2         dotCall64_1.2          listenv_0.10.0        
[115] viridisLite_0.4.3      purrr_1.2.1            rlang_1.1.7            cowplot_1.2.0         
---
title: "Sézary Syndrome — Pseudotime Trajectory Figures"
subtitle: "CD4+ T Cell Reference Projection | State-Anchored Pseudotime Bins | PNG + PDF output"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: false
    theme: journal
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo      = TRUE,
  message   = FALSE,
  warning   = FALSE,
  fig.align = "center",
  dpi       = 150
)
```

---

# Overview

This script regenerates **all manuscript figures** from the saved projection object.
The reference object is loaded **solely** to extract UMAP coordinates for the grey background.

## Objects required

| Object | Path | Role |
|--------|------|------|
| `reference_integrated` | `../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds` | **UMAP background only** — grey reference dots |
| `mapped_MalignantCD4T` | `results/MalignantCD4T_Monocle3_projection/MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds` | **All biology** — pseudotime, labels, bins, colour scales |

## Key design decisions

| Item | Source |
|------|--------|
| Pseudotime colour-scale limits (`PT_LIMITS`) | `mapped_MalignantCD4T$predicted.pseudotime` |
| Bin boundaries | Midpoints between **actual** reference state medians — computed at runtime from `ref_bg` |
| State medians (`REF_MEDIANS`) | Computed at runtime from `ref_bg` — **never hard-coded** |
| All bar charts, violins, density plots | `mapped_MalignantCD4T` only |

## Panel B fix

Previously called `FeaturePlot(reference_integrated, ...)` without `limits`,
producing a spurious 1.0–2.0 colour scale. Fixed: Panel B plots Sézary cells
coloured by `predicted.pseudotime` with `limits = PT_LIMITS` (correct 0–25 scale).

## Bin design

Bins are anchored to **actual reference state medians** computed from `ref_bg`.
Boundaries = midpoint between adjacent state medians (biology-principled, not arbitrary).
Treg-like bin is assigned by Azimuth label (Treg is a branch, not a linear position).

---

# 1. Setup & Libraries

```{r libraries}
suppressPackageStartupMessages({
  library(Seurat)
  library(ggplot2)
  library(dplyr)
  library(tidyr)
  library(forcats)
  library(patchwork)
  library(scales)
  library(RColorBrewer)
  library(ggridges)
  library(knitr)
  library(kableExtra)
})

dir.create("Figures/PNG", recursive = TRUE, showWarnings = FALSE)
dir.create("Figures/PDF", recursive = TRUE, showWarnings = FALSE)

save_fig <- function(p, name, w = 10, h = 8) {
  ggsave(file.path("Figures/PNG", paste0(name, ".png")),
         p, width = w, height = h, dpi = 300, bg = "white")
  ggsave(file.path("Figures/PDF", paste0(name, ".pdf")),
         p, width = w, height = h, useDingbats = FALSE)
  invisible(cat(sprintf("  \u2713  %s  [PNG + PDF]\n", name)))
}

STATE_COLORS <- c(
  "CD4 Naive"     = "#4472C4",
  "CD4 TCM"       = "#70AD47",
  "CD4 TEM"       = "#ED7D31",
  "CD4 Temra/CTL" = "#C00000",
  "Treg"          = "#7030A0"
)

BIN_COLORS <- c(
  "Naive-like"  = "#4472C4",
  "TCM-like"    = "#70AD47",
  "TEM-like"    = "#ED7D31",
  "Temra-like"  = "#C00000",
  "Treg-like"   = "#7030A0"
)

STATE_ORDER <- c("CD4 Naive", "CD4 TCM", "CD4 TEM", "CD4 Temra/CTL", "Treg")
BIN_ORDER   <- c("Naive-like", "TCM-like", "TEM-like", "Temra-like", "Treg-like")
LINE_ORDER  <- paste0("L", 1:7)

cat("Setup complete.\n")
```

---

# 2. Load & Validate Objects

```{r load-objects}
ref_path <- paste0(
  "../../1-Final_Custom_MST_Monocle3_Trajectory_and_mapping/",
  "1-Custom_MST/Objects/cd4_ref_dual_trajectory.rds"
)
reference_integrated <- readRDS(ref_path)
cat("Reference loaded:", ncol(reference_integrated), "cells\n")

stopifnot(
  "umap reduction missing from reference" =
    "umap" %in% names(reference_integrated@reductions)
)

obj_path <- paste0(
  "../results/MalignantCD4T_Monocle3_projection/",
  "MalignantCD4T_mapped_monocle3_pseudotime_noProlif_ref.Rds"
)
mapped_MalignantCD4T <- readRDS(obj_path)
cat("Query loaded:    ", ncol(mapped_MalignantCD4T), "cells\n")

stopifnot(
  "ref.umap reduction missing — re-run MapQuery" =
    "ref.umap" %in% names(mapped_MalignantCD4T@reductions),
  "predicted.pseudotime column missing" =
    "predicted.pseudotime" %in% colnames(mapped_MalignantCD4T@meta.data),
  "predicted.predicted.celltype.l2 column missing" =
    "predicted.predicted.celltype.l2" %in% colnames(mapped_MalignantCD4T@meta.data)
)

if (!"cell_line" %in% colnames(mapped_MalignantCD4T@meta.data)) {
  if ("orig.ident" %in% colnames(mapped_MalignantCD4T@meta.data)) {
    mapped_MalignantCD4T$cell_line <- mapped_MalignantCD4T$orig.ident
    cat("NOTE: using orig.ident as cell_line\n")
  } else {
    stop("Cannot find cell_line or orig.ident in metadata")
  }
}

cat("\npredicted.pseudotime summary (expect Min~0, Max~25):\n")
print(summary(mapped_MalignantCD4T$predicted.pseudotime))
cat("\nTransferred label distribution:\n")
print(sort(table(mapped_MalignantCD4T$predicted.predicted.celltype.l2,
                 useNA = "ifany"), decreasing = TRUE))
cat("\nCell line sizes:\n")
print(sort(table(mapped_MalignantCD4T$cell_line, useNA = "ifany")))
```

---

# 3. Build Reference Background (`ref_bg`)

```{r build-ref-bg}
# ref_bg serves two purposes:
#   1. Grey background dots in all projection UMAPs
#   2. Source for computing actual reference state medians (REF_MEDIANS)
ref_bg <- data.frame(
  Embeddings(reference_integrated, "umap"),
  state      = reference_integrated$predicted.celltype.l2,
  pseudotime = reference_integrated$monocle3_pseudotime,
  stringsAsFactors = FALSE
)
colnames(ref_bg)[1:2] <- c("UMAP_1", "UMAP_2")
ref_bg$state <- factor(ref_bg$state, levels = STATE_ORDER)

cat("Reference monocle3_pseudotime (should be 0-25):\n")
print(summary(ref_bg$pseudotime))

ref_bg_plot <- ref_bg[sample(nrow(ref_bg)), ]
cat(sprintf("\nref_bg ready: %d reference cells\n", nrow(ref_bg)))
```

---

# 4. Compute Reference State Medians & Bin Boundaries

```{r compute-medians-bins}
# ══════════════════════════════════════════════════════════════════════════
# REF_MEDIANS computed at runtime from actual ref_bg — NOT hard-coded.
#
# Diagnostic confirmed these actual values:
#   CD4 Naive     median =  3.293
#   CD4 TCM       median =  9.377  (was hard-coded as 13.36 — WRONG)
#   CD4 TEM       median = 24.840
#   CD4 Temra/CTL median = 24.840  (only 10 cells)
#   Treg          median = 11.150  (was hard-coded as 18.75 — above Treg max of 17.40, WRONG)
#
# Bin boundaries derived as midpoints between adjacent state medians.
# ══════════════════════════════════════════════════════════════════════════

ref_state_medians <- ref_bg %>%
  filter(!is.na(pseudotime), !is.na(state)) %>%
  group_by(state) %>%
  summarise(
    n      = n(),
    med_pt = round(median(pseudotime, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>%
  arrange(med_pt)

cat("=== Reference state medians (data-derived) ===\n")
print(ref_state_medians)

get_med <- function(s) {
  v <- ref_state_medians$med_pt[ref_state_medians$state == s]
  if (length(v) == 0) stop(paste("State not found in ref_bg:", s))
  v
}

naive_med  <- get_med("CD4 Naive")
tcm_med    <- get_med("CD4 TCM")
tem_med    <- get_med("CD4 TEM")
temra_med  <- get_med("CD4 Temra/CTL")
treg_med   <- get_med("Treg")

BIN_NAIVE_TCM  <- round((naive_med + tcm_med)  / 2, 3)
BIN_TCM_TEM    <- round((tcm_med   + tem_med)   / 2, 3)
BIN_TEM_TEMRA  <- round((tem_med   + temra_med) / 2, 3)

cat("\n=== Bin boundaries (midpoints between adjacent state medians) ===\n")
cat(sprintf("  Naive | TCM   : %.3f\n", BIN_NAIVE_TCM))
cat(sprintf("  TCM   | TEM   : %.3f\n", BIN_TCM_TEM))
cat(sprintf("  TEM   | Temra : %.3f\n", BIN_TEM_TEMRA))
cat(sprintf("  Treg           : label-based (branch logic)\n"))

if (abs(tem_med - temra_med) < 0.5) {
  cat("\nNOTE: TEM and Temra medians are very close (n=10 Temra cells in reference).\n")
  cat("      Temra-like bin will be negligible — biologically expected.\n")
}

REF_MEDIANS <- data.frame(
  state  = factor(STATE_ORDER, levels = STATE_ORDER),
  med_pt = c(naive_med, tcm_med, tem_med, temra_med, treg_med)
)

cat("\n=== REF_MEDIANS (used for reference lines in all figures) ===\n")
print(REF_MEDIANS)
```

```{r}
# ── REF_MEDIANS_EFFECTOR: Treg excluded ───────────────────────────────────
# Treg (median PT=11.15) and TCM (median PT=9.38) overlap entirely in
# pseudotime space — both states share the Naive→TCM graph segment.
# Showing a Treg line at PT=11.15 implies a pseudotime boundary that does
# not exist and misleads interpretation. Treg-like cells are assigned by
# transferred Azimuth label (KNN), not by pseudotime position.
REF_MEDIANS_EFFECTOR <- REF_MEDIANS %>%
  filter(state != "Treg")

LINETYPE_EFFECTOR <- c(
  "CD4 Naive"     = "dotted",
  "CD4 TCM"       = "dashed",
  "CD4 TEM"       = "longdash",
  "CD4 Temra/CTL" = "solid"
)

cat("=== REF_MEDIANS_EFFECTOR (Treg excluded) ===\n")
print(REF_MEDIANS_EFFECTOR)
```


# 5. Assign Pseudotime Bins

```{r assign-bins}
assign_bin <- function(pt, lbl) {
  if (isTRUE(grepl("Treg", lbl, ignore.case = TRUE))) return("Treg-like")
  if (is.na(pt))             return(NA_character_)
  if (pt <= BIN_NAIVE_TCM)   return("Naive-like")
  if (pt <= BIN_TCM_TEM)     return("TCM-like")
  if (pt <= BIN_TEM_TEMRA)   return("TEM-like")
  return("Temra-like")
}

mapped_MalignantCD4T$pseudotime_bin <- factor(
  mapply(assign_bin,
         pt  = mapped_MalignantCD4T$predicted.pseudotime,
         lbl = mapped_MalignantCD4T$predicted.predicted.celltype.l2),
  levels = BIN_ORDER
)

cat("Pseudotime bin distribution (all cells):\n")
print(table(mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
cat("\nPseudotime bin per cell line:\n")
print(table(mapped_MalignantCD4T$cell_line,
            mapped_MalignantCD4T$pseudotime_bin, useNA = "ifany"))
```

---

# 6. Build Shared Data Frames

```{r build-dataframes}
query_df <- data.frame(
  Embeddings(mapped_MalignantCD4T, "ref.umap"),
  pseudotime     = mapped_MalignantCD4T$predicted.pseudotime,
  cell_line      = factor(mapped_MalignantCD4T$cell_line, levels = LINE_ORDER),
  label          = mapped_MalignantCD4T$predicted.predicted.celltype.l2,
  pseudotime_bin = mapped_MalignantCD4T$pseudotime_bin,
  stringsAsFactors = FALSE
)
colnames(query_df)[1:2] <- c("UMAP_1", "UMAP_2")

PT_LIMITS <- range(query_df$pseudotime, na.rm = TRUE)
cat(sprintf("Pseudotime colour limits: %.3f to %.3f\n", PT_LIMITS[1], PT_LIMITS[2]))

label_df <- query_df %>%
  filter(!is.na(label), !is.na(cell_line)) %>%
  count(cell_line, label) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup() %>%
  mutate(label = factor(label, levels = STATE_ORDER))

bin_df <- query_df %>%
  filter(!is.na(pseudotime_bin), !is.na(cell_line)) %>%
  count(cell_line, pseudotime_bin) %>%
  group_by(cell_line) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

cat("Data frames ready.\n")
```

---

# 7. Figure 7 — Four-Panel Summary

```{r fig7-four-panel, fig.width=18, fig.height=14}

bg_layer <- geom_point(
  data = ref_bg_plot, aes(x = UMAP_1, y = UMAP_2),
  colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE
)

p7a <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nLabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "A. Sézary Cells — Transferred CD4 T Cell Labels") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7b <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.8) +
  scale_colour_gradientn(
    colours  = c("lightblue", "yellow", "red"),
    name     = "Pseudotime",
    limits   = PT_LIMITS,
    na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "B. Sézary Cells — Transferred Pseudotime  [FIXED]") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7c <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime_bin)) +
  bg_layer +
  geom_point(size = 0.35, alpha = 0.7) +
  scale_colour_manual(values = BIN_COLORS, name = "Pseudotime\nBin",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title = "C. Sézary Cells — Pseudotime Bin") +
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))

p7d <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title = "D. Transferred Label per Cell Line") +
  theme(plot.title  = element_text(hjust = 0.5, size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

fig7 <- (p7a | p7b) / (p7c | p7d)
print(fig7)
save_fig(fig7, "Fig7_FourPanel_Summary_FIXED",       w = 18, h = 14)
save_fig(p7a,  "Fig7A_Transferred_Labels",           w = 9,  h = 7)
save_fig(p7b,  "Fig7B_Transferred_Pseudotime_FIXED", w = 9,  h = 7)
save_fig(p7c,  "Fig7C_Pseudotime_Bins_UMAP",         w = 9,  h = 7)
save_fig(p7d,  "Fig7D_Label_BarChart_per_Line",      w = 9,  h = 7)
```

---

# 8. Figure 1 — Pseudotime Violin per Cell Line

```{r fig1-violin, fig.width=12, fig.height=6}

fig1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS,
             aes(yintercept = med_pt, colour = state, linetype = state),
             linewidth = 0.7, alpha = 0.9) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nmedian") +
  scale_linetype_manual(
    values = c("CD4 Naive" = "dotted", "CD4 TCM" = "dashed",
               "CD4 TEM" = "longdash", "CD4 Temra/CTL" = "solid",
               "Treg" = "twodash"),
    name = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(fig1)
save_fig(fig1, "Fig1_Pseudotime_Violin_per_Line", w = 12, h = 6)
```

```{r , fig.width=16, fig.height=7}
fig1v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.06, fill = "white", colour = "grey40",
               outlier.size = 0.3, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt, colour = state, linetype = state),
    linewidth = 0.7, alpha = 0.9
  ) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nmedian"
  ) +
  scale_linetype_manual(
    values = LINETYPE_EFFECTOR,
    name   = "Reference\nmedian"
  ) +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "Transferred Pseudotime",
    title    = "Figure 1 — Pseudotime Distribution per Sézary Cell Line",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM (PT=%.2f); Treg-like assigned by label",
      naive_med, tcm_med, tem_med, temra_med, treg_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 11, face = "bold")
  )

print(fig1v2)
save_fig(fig1v2, "Fig1v2_Pseudotime_Violin_NoTreg", w = 12, h = 6)
```


---

# 9. Figure 2 — UMAP: Pseudotime + Label Side by Side

```{r fig2-umap, fig.width=16, fig.height=7}

p2a <- ggplot(query_df[order(query_df$pseudotime, na.last = FALSE), ],
              aes(UMAP_1, UMAP_2, colour = pseudotime)) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey68", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "A. Transferred pseudotime gradient",
       subtitle = "Grey = healthy reference  |  Coloured = Sézary cells") +
  theme(plot.title = element_text(size = 12, face = "bold"))

p2b <- ggplot(query_df[sample(nrow(query_df)), ],
              aes(UMAP_1, UMAP_2, colour = factor(label, levels = STATE_ORDER))) +
  geom_point(data = ref_bg_plot, aes(UMAP_1, UMAP_2),
             colour = "grey88", size = 0.25, alpha = 0.45, inherit.aes = FALSE) +
  geom_point(size = 0.25, alpha = 0.7) +
  scale_colour_manual(values = STATE_COLORS, name = "Transferred\nlabel",
                      na.value = "grey80") +
  guides(colour = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  theme_classic() +
  labs(x = "UMAP-1", y = "UMAP-2",
       title    = "B. Transferred label identity",
       subtitle = "96-99.9% CD4 TCM across all seven lines") +
  theme(plot.title = element_text(size = 12, face = "bold"))

fig2 <- p2a | p2b
print(fig2)
save_fig(fig2, "Fig2_UMAP_Pseudotime_and_Label", w = 16, h = 7)
```

---

# 10. Figure 3 — Label Bar Chart + Per Cell Line UMAP Facets

```{r fig3-label-umap, fig.width=18, fig.height=6}

p3a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.75) +
  geom_text(data = label_df %>% filter(pct > 5),
            aes(label = sprintf("%.0f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells", title = "A. Transferred Azimuth l2 Labels") +
  theme(plot.title  = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(size = 10, face = "bold"))

p3b <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.2, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text = element_text(face = "bold", size = 10),
        plot.title = element_text(size = 12, face = "bold"),
        axis.text  = element_blank(), axis.ticks = element_blank()) +
  labs(x = NULL, y = NULL,
       title = "B. Per Cell Line UMAP — Projected Pseudotime  (Grey = healthy reference)")

fig3 <- p3a + p3b + plot_layout(widths = c(1, 2))
print(fig3)
save_fig(fig3, "Fig3_Labels_PerLine_UMAP", w = 18, h = 6)
```

---

# 11. Figure 4 — Per Cell Line Facet UMAP (Key Heterogeneity Figure)

```{r fig4-facet-umap, fig.width=18, fig.height=10}

fig4 <- ggplot(
    query_df %>% filter(!is.na(cell_line)) %>% arrange(pseudotime),
    aes(UMAP_1, UMAP_2, colour = pseudotime)
  ) +
  geom_point(
    data = ref_bg_plot %>%
      slice(rep(1:n(), times = length(levels(query_df$cell_line)))),
    aes(UMAP_1, UMAP_2), colour = "grey88", size = 0.2, alpha = 0.4,
    inherit.aes = FALSE
  ) +
  geom_point(size = 0.25, alpha = 0.8) +
  scale_colour_gradientn(
    colours = c("#0D0887","#6A00A8","#B12A90","#E16462","#FCA636","#F0F921"),
    name = "Pseudotime\n(Naive->Temra)", limits = PT_LIMITS, na.value = "grey85"
  ) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 12),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40"),
    axis.text = element_blank(), axis.ticks = element_blank(),
    legend.position = "right"
  ) +
  labs(x = NULL, y = NULL,
       title    = "Figure 4 — Per Cell Line Facet UMAP: Differentiation Position",
       subtitle = "L1 most heterogeneous (spans full trajectory)  |  L6 most arrested (tight TCM cluster)")

print(fig4)
save_fig(fig4, "Fig4_PerLine_Facet_UMAP", w = 18, h = 10)
```

---

# 12. Figure 5 — Label vs Pseudotime Bins Bar Charts

```{r fig5-barcharts, fig.width=16, fig.height=12}

p5a <- ggplot(label_df, aes(cell_line, pct, fill = label)) +
  geom_col(width = 0.7) +
  geom_text(data = label_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = STATE_COLORS, name = "Label") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = NULL, y = "% Cells",
       title    = "A. Transferred Azimuth l2 Label (Cell of Origin)",
       subtitle = "96-99.9% CD4 TCM across all lines — TCM origin confirmed") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_blank(), axis.ticks.x = element_blank())

p5b <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 3),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.2, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(x = "Cell Line", y = "% Cells",
       title    = "B. Pseudotime Bin Assignment (Differentiation Position)",
       subtitle = sprintf(
         "Bin boundaries — Naive|TCM: %.2f  |  TCM|TEM: %.2f  |  TEM|Temra: %.2f  |  Treg: label-based",
         BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

fig5 <- p5a / p5b
print(fig5)
save_fig(fig5, "Fig5_Label_vs_Pseudotime_Bins",  w = 16, h = 12)
save_fig(p5a,  "Fig5A_Label_Transfer_BarChart",  w = 10, h = 5)
save_fig(p5b,  "Fig5B_Pseudotime_Bins_BarChart", w = 10, h = 5)
```

---

# 13. Figure 6 — Pseudotime Density + Ridgeplot

```{r fig6-density, fig.width=16, fig.height=7}

p6a <- ggplot(query_df %>% filter(!is.na(pseudotime)), aes(x = pseudotime)) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(data = REF_MEDIANS,
             aes(xintercept = med_pt, colour = state),
             linetype = "dashed", linewidth = 0.7) +
  geom_text(data = REF_MEDIANS,
            aes(x = med_pt, y = Inf,
                label = gsub("CD4 ", "", as.character(state))),
            inherit.aes = FALSE,
            angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30") +
  scale_colour_manual(values = STATE_COLORS, name = "Reference\nstate median") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Reference medians: Naive=%.2f | TCM=%.2f | Treg=%.2f | TEM=%.2f | Temra=%.2f",
      naive_med, tcm_med, treg_med, tem_med, temra_med)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        legend.position = "right")

p6b <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, y = fct_rev(cell_line), fill = cell_line)
  ) +
  geom_density_ridges(scale = 0.9, rel_min_height = 0.01,
                      alpha = 0.85, colour = "white") +
  geom_vline(xintercept = c(BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA),
             linetype = "dashed", colour = "grey50", linewidth = 0.5) +
  annotate("text", x = BIN_NAIVE_TCM, y = 0.5, label = "Naive|TCM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TCM_TEM,   y = 0.5, label = "TCM|TEM",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  annotate("text", x = BIN_TEM_TEMRA, y = 0.5, label = "TEM|Temra",
           size = 2.5, hjust = 1.05, colour = "grey40", angle = 90) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_ridges(grid = FALSE) +
  labs(x = "Transferred Pseudotime", y = "Cell Line",
       title    = "B. Per Cell Line Pseudotime Ridgeplot",
       subtitle = "L1 multimodal  |  L3-L7 narrow peaks at TCM-TEM boundary") +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"))

fig6 <- p6a | p6b
print(fig6)
save_fig(fig6, "Fig6_Density_and_Ridgeplot", w = 16, h = 7)
save_fig(p6a,  "Fig6A_Density_Overall",     w = 8,  h = 6)
save_fig(p6b,  "Fig6B_Ridgeplot_PerLine",   w = 8,  h = 6)
```



```{r, fig.width=12, fig.height=6}
p6a_v2 <- ggplot(
    query_df %>% filter(!is.na(pseudotime)),
    aes(x = pseudotime)
  ) +
  geom_density(fill = "#c0392b", colour = "#c0392b", alpha = 0.4, linewidth = 0.9) +
  geom_vline(
    data     = REF_MEDIANS_EFFECTOR,
    aes(xintercept = med_pt, colour = state),
    linetype = "dashed", linewidth = 0.7
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x = med_pt, y = Inf,
        label = gsub("CD4 ", "", as.character(state))),
    inherit.aes = FALSE,
    angle = 90, hjust = 1.1, vjust = -0.3, size = 2.8, colour = "grey30"
  ) +
  scale_colour_manual(
    values = STATE_COLORS[names(STATE_COLORS) != "Treg"],
    name   = "Reference\nstate median"
  ) +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Pseudotime",
    y        = "Density",
    title    = "A. Sézary Pseudotime Distribution — All Lines Combined",
    subtitle = sprintf(
      "Effector axis medians: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — overlaps TCM; Treg-like assigned by label not pseudotime",
      naive_med, tcm_med, tem_med, temra_med)
  ) +
  theme(
    plot.title      = element_text(size = 12, face = "bold"),
    plot.subtitle   = element_text(size = 9,  colour = "grey40"),
    legend.position = "right"
  )

print(p6a_v2)
save_fig(p6a_v2, "Fig6Av2_Density_NoTreg", w = 8, h = 6)

```

---

# 14. Figure 8 — Violin: Transferred Pseudotime per Label

```{r fig8-violin-per-label, fig.width=12, fig.height=6}

fig8 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(data = REF_MEDIANS, aes(yintercept = med_pt),
             linetype = "dotted", colour = "grey55", linewidth = 0.6) +
  # Annotations driven entirely from REF_MEDIANS — no hard-coded y values
  geom_text(
    data        = REF_MEDIANS,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = "CD4 Naive internal control at PT~4  |  TCM broad distribution  |  TEM at terminal PT~25"
  ) +
  theme(plot.title    = element_text(size = 13, face = "bold"),
        plot.subtitle = element_text(size = 10, colour = "grey40"),
        axis.text.x   = element_text(size = 10, face = "bold"))

print(fig8)
save_fig(fig8, "Fig8_Violin_Pseudotime_per_Label", w = 12, h = 6)
```




```{r , fig.width=12, fig.height=6}

# For TEM and Temra: median=24.84 for both (Temra n=10, all at same PT).
# Use actual reference max values to separate the lines visually:
#   TEM   max = 25.00  (from diagnostic)
#   Temra max = 24.84  (all 10 cells identical)
# Better approach: show TEM median and Temra median separately but
# acknowledge in subtitle they are nearly identical.

# Override: use mean instead of median for TEM/Temra to separate lines
REF_MEDIANS_EFFECTOR_PLOT <- REF_MEDIANS_EFFECTOR %>%
  mutate(med_pt = case_when(
    state == "CD4 TEM"       ~ 23.962,  # mean from diagnostic (median=24.84 same as Temra)
    state == "CD4 Temra/CTL" ~ 24.840,  # all 10 cells identical
    TRUE ~ med_pt
  ),
  label_text = case_when(
    state == "CD4 TEM"       ~ sprintf("Ref: TEM (mean=23.96)"),
    state == "CD4 Temra/CTL" ~ sprintf("Ref: Temra (24.84, n=10)"),
    state == "CD4 Naive"     ~ sprintf("Ref: Naive (%.2f)", med_pt),
    state == "CD4 TCM"       ~ sprintf("Ref: TCM (%.2f)", med_pt),
    TRUE ~ as.character(state)
  ))

fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),  # drop=FALSE keeps Temra axis
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR_PLOT,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR_PLOT,
    aes(x       = 0.55,
        y       = med_pt,
        label   = label_text),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, na.value = "grey80", guide = "none",
                    drop = FALSE) +
  scale_x_discrete(drop = FALSE) +   # ← forces CD4 Temra/CTL to appear
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis: Naive=%.2f | TCM=%.2f | TEM~24.84 | Temra~24.84 (n=10, identical)\nCD4 Temra/CTL: no Sézary cells received this label — consistent with TCM arrest phenotype\nTreg line omitted — overlaps TCM; Treg-like assigned by label",
      naive_med, tcm_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)
save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)

```

```{r , fig.width=18, fig.height=7}
fig8v2 <- ggplot(
    query_df %>%
      filter(!is.na(pseudotime), !is.na(label)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(label, pseudotime, fill = label)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.85, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.4, outlier.alpha = 0.3) +
  geom_hline(
    data      = REF_MEDIANS_EFFECTOR,
    aes(yintercept = med_pt),
    linetype  = "dotted", colour = "grey55", linewidth = 0.6
  ) +
  geom_text(
    data        = REF_MEDIANS_EFFECTOR,
    aes(x       = 0.55,
        y       = med_pt,
        label   = sprintf("Ref: %s (%.2f)",
                          gsub("CD4 ", "", as.character(state)), med_pt)),
    inherit.aes = FALSE,
    hjust = 0, size = 2.8, colour = "grey40"
  ) +
  scale_fill_manual(values = STATE_COLORS, guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  theme_classic() +
  labs(
    x        = "Transferred Azimuth l2 Label",
    y        = "Transferred Pseudotime",
    title    = "Figure 8 — Transferred Pseudotime per Label (All Cell Lines)",
    subtitle = sprintf(
      "Effector axis reference lines: Naive=%.2f | TCM=%.2f | TEM=%.2f | Temra=%.2f\nTreg line omitted — Treg (PT=%.2f) overlaps TCM; Treg-like violin shown as biological confirmation",
      naive_med, tcm_med, tem_med, temra_med, treg_med)
  ) +
  theme(
    plot.title    = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 9,  colour = "grey40"),
    axis.text.x   = element_text(size = 10, face = "bold")
  )

print(fig8v2)
save_fig(fig8v2, "Fig8v2_Violin_NoTreg", w = 12, h = 6)

```


---

# 15. Figure 9 — Cross-Table: Label vs Pseudotime Bin (PRIMARY RESULT)

```{r , fig.width=18, fig.height=7}

cross_df <- query_df %>%
  filter(!is.na(label), !is.na(pseudotime_bin)) %>%
  mutate(label = factor(label, levels = STATE_ORDER)) %>%
  count(label, pseudotime_bin) %>%
  group_by(label) %>%
  mutate(pct = 100 * n / sum(n)) %>%
  ungroup()

tcm_discordant <- cross_df %>%
  filter(label == "CD4 TCM",
         pseudotime_bin %in% c("TEM-like", "Temra-like")) %>%
  summarise(pct = sum(pct)) %>%
  pull(pct)

fig9 <- ggplot(cross_df, aes(pseudotime_bin, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = sprintf("%.1f%%", pct)),
            vjust = -0.3, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, guide = "none") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 115)) +
  facet_wrap(~ label, nrow = 1, scales = "free_x") +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 11),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 14, face = "bold"),
    plot.subtitle    = element_text(size = 10, colour = "grey40")
  ) +
  labs(
    x        = "Pseudotime Bin",
    y        = "% of Cells with This Label",
    title    = "Figure 9 — Cross-Table: Transferred Label vs Pseudotime Bin  [PRIMARY RESULT]",
    subtitle = sprintf(
      "TCM-labelled cells: %.1f%% discordant (TEM-like or Temra-like)\nTreg panel: 100%% Treg-like by design",
      tcm_discordant)
  )

print(fig9)
save_fig(fig9, "Fig9_CrossTable_Label_vs_Bin_PRIMARY", w = 18, h = 7)
```

---

# 16. Supplementary Figures

## Supp S1 — Per Cell Line Pseudotime Density (Faceted)

```{r suppS1-density-facet, fig.width=12, fig.height=7}

suppS1 <- ggplot(
    query_df %>% filter(!is.na(pseudotime), !is.na(cell_line)),
    aes(x = pseudotime, fill = cell_line, colour = cell_line)
  ) +
  geom_density(alpha = 0.35, linewidth = 0.8) +
  geom_vline(data = REF_MEDIANS, aes(xintercept = med_pt),
             colour = "grey45", linetype = "dashed", linewidth = 0.5) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_colour_brewer(palette = "Set2", guide = "none") +
  scale_x_continuous(limits = c(0, 26), breaks = seq(0, 25, 5)) +
  facet_wrap(~ cell_line, nrow = 2) +
  theme_classic() +
  theme(strip.text    = element_text(face = "bold"),
        plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40")) +
  labs(x = "Transferred Pseudotime", y = "Density",
       title    = "Supp S1 — Per Cell Line Pseudotime Density",
       subtitle = "Grey dashed lines = data-derived reference state medians")

print(suppS1)
save_fig(suppS1, "SuppS1_Density_PerLine_Facet", w = 12, h = 7)
```

## Supp S2 — Ridgeplot Standalone

```{r suppS2-ridgeplot, fig.width=10, fig.height=7}

suppS2 <- p6b +
  ggtitle(
    "Supp S2 — Per Cell Line Pseudotime Ridgeplot",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f | TCM|TEM=%.2f | TEM|Temra=%.2f",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  )

print(suppS2)
save_fig(suppS2, "SuppS2_Ridgeplot_PerLine", w = 10, h = 7)
```

## Supp S3 — Violin per Label x Cell Line (Faceted)

```{r suppS3-violin-label-line, fig.width=16, fig.height=10}

suppS3 <- ggplot(
    query_df %>%
      filter(!is.na(label), !is.na(cell_line), !is.na(pseudotime)) %>%
      mutate(label = factor(label, levels = STATE_ORDER)),
    aes(cell_line, pseudotime, fill = cell_line)
  ) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.8, colour = "white") +
  geom_boxplot(width = 0.07, fill = "white", colour = "grey40",
               outlier.size = 0.2, outlier.alpha = 0.3) +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_y_continuous(breaks = seq(0, 25, 5)) +
  facet_wrap(~ label, nrow = 1) +
  theme_classic() +
  theme(
    strip.text       = element_text(face = "bold", size = 10),
    strip.background = element_rect(fill = "grey95", colour = "grey60"),
    axis.text.x      = element_text(size = 9, face = "bold", angle = 30, hjust = 1),
    plot.title       = element_text(size = 12, face = "bold"),
    plot.subtitle    = element_text(size = 9,  colour = "grey40")
  ) +
  labs(x = "Cell Line", y = "Transferred Pseudotime",
       title    = "Supp S3 — Violin: Pseudotime per Label x Cell Line",
       subtitle = "Complements Fig 9 — shows within-state per-line heterogeneity")

print(suppS3)
save_fig(suppS3, "SuppS3_Violin_Label_x_Line", w = 16, h = 10)
```

## Supp S4 — Pseudotime Bin Bar Chart with Full Percentages

```{r suppS4-bins-detailed, fig.width=14, fig.height=6}

suppS4 <- ggplot(bin_df, aes(cell_line, pct, fill = pseudotime_bin)) +
  geom_col(width = 0.7) +
  geom_text(data = bin_df %>% filter(pct > 2),
            aes(label = sprintf("%.1f%%", pct)),
            position = position_stack(vjust = 0.5),
            size = 3.5, colour = "white", fontface = "bold") +
  scale_fill_manual(values = BIN_COLORS, name = "Pseudotime bin") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 101)) +
  theme_classic() +
  labs(
    x        = "Cell Line",
    y        = "% Cells",
    title    = "Supp S4 — Pseudotime Bin Composition per Cell Line",
    subtitle = sprintf(
      "Bin boundaries: Naive|TCM=%.2f  |  TCM|TEM=%.2f  |  TEM|Temra=%.2f  |  Treg=label-based",
      BIN_NAIVE_TCM, BIN_TCM_TEM, BIN_TEM_TEMRA)
  ) +
  theme(plot.title    = element_text(size = 12, face = "bold"),
        plot.subtitle = element_text(size = 9,  colour = "grey40"),
        axis.text.x   = element_text(size = 11, face = "bold"))

print(suppS4)
save_fig(suppS4, "SuppS4_Bins_BarChart_Detailed", w = 14, h = 6)
```

---

# 17. Figure Manifest

```{r manifest, echo=FALSE}
manifest <- data.frame(
  File = c(
    "Fig7_FourPanel_Summary_FIXED",
    "Fig7A_Transferred_Labels",
    "Fig7B_Transferred_Pseudotime_FIXED",
    "Fig7C_Pseudotime_Bins_UMAP",
    "Fig7D_Label_BarChart_per_Line",
    "Fig1_Pseudotime_Violin_per_Line",
    "Fig2_UMAP_Pseudotime_and_Label",
    "Fig3_Labels_PerLine_UMAP",
    "Fig4_PerLine_Facet_UMAP",
    "Fig5_Label_vs_Pseudotime_Bins",
    "Fig5A_Label_Transfer_BarChart",
    "Fig5B_Pseudotime_Bins_BarChart",
    "Fig6_Density_and_Ridgeplot",
    "Fig6A_Density_Overall",
    "Fig6B_Ridgeplot_PerLine",
    "Fig8_Violin_Pseudotime_per_Label",
    "Fig9_CrossTable_Label_vs_Bin_PRIMARY",
    "SuppS1_Density_PerLine_Facet",
    "SuppS2_Ridgeplot_PerLine",
    "SuppS3_Violin_Label_x_Line",
    "SuppS4_Bins_BarChart_Detailed"
  ),
  Description = c(
    "Four-panel: labels | pseudotime (FIXED) | bins | label bar",
    "Sezary UMAP — transferred Azimuth l2 label",
    "Sezary UMAP — transferred pseudotime, FIXED 0-25 colour scale",
    "Sezary UMAP — pseudotime bin colour",
    "Transferred label proportions per cell line bar chart",
    "Pseudotime violin per cell line with data-derived reference medians",
    "UMAP pseudotime gradient + label identity side by side",
    "Label bar chart + per cell line UMAP facets",
    "Per cell line facet UMAP — key inter-line heterogeneity figure",
    "Dual bar chart: labels (top) and pseudotime bins (bottom)",
    "Label transfer proportions bar chart only",
    "Pseudotime bin proportions bar chart only",
    "Overall pseudotime density + per-line ridgeplot",
    "Overall pseudotime density with data-derived reference median lines",
    "Per cell line pseudotime ridgeplot with bin boundaries",
    "Violin: transferred pseudotime per label — data-derived annotations",
    "Cross-table: label vs pseudotime bin — PRIMARY RESULT",
    "Per cell line pseudotime density faceted",
    "Per cell line ridgeplot standalone",
    "Violin: pseudotime per label x cell line faceted",
    "Pseudotime bin bar chart with full percentages labelled"
  ),
  Manuscript = c(
    "Fig 1 combined", "Fig 1A", "Fig 1B (FIXED)", "Fig 1C", "Fig 1D",
    "Supp", "Fig 2", "Fig 3", "Fig 4 (KEY)",
    "Fig 5", "—", "Fig 5B",
    "Supp", "Supp S3", "Supp S4",
    "Supp S5", "Fig 2 (PRIMARY)",
    "Supp S1", "Supp S2", "Supp S6", "Supp"
  ),
  stringsAsFactors = FALSE
)

kable(manifest,
      caption = "All 21 figures — Figures/PNG/*.png (300 dpi) and Figures/PDF/*.pdf") %>%
  kable_styling(bootstrap_options = c("striped","condensed","hover"),
                full_width = TRUE) %>%
  row_spec(which(grepl("FIXED|PRIMARY", manifest$File)),
           bold = TRUE, background = "#fff3cd")
```

---

# 18. Session Info

```{r session-info}
sessionInfo()
```
