Load Seurat Object
# Load your Seurat Object
seurat_obj <- readRDS("Output_Objects/Seurat_Object_With_TF_Activity.rds")
Idents(seurat_obj) <- "seurat_clusters"
cat("✓ Seurat object loaded\n")
✓ Seurat object loaded
cat(sprintf(" - %d cells across %d clusters\n",
ncol(seurat_obj),
length(unique(seurat_obj$seurat_clusters))))
- 49305 cells across 14 clusters
Run this code block
to restore activities instantly:
# If 'activities' is missing but 'dorothea' assay exists, reconstruct it:
if (!exists("activities") && "dorothea" %in% names(seurat_obj@assays)) {
print("Reconstructing 'activities' dataframe from Seurat object...")
# Extract the matrix (Seurat v5 uses 'layer' instead of 'slot')
# Since you ran ScaleData, we use 'scale.data'
mat <- GetAssayData(seurat_obj, assay = "dorothea", layer = "scale.data")
# Convert to long format (what SCpubr needs)
activities <- as.data.frame(mat) %>%
rownames_to_column("source") %>%
pivot_longer(cols = -source, names_to = "condition", values_to = "score") %>%
mutate(statistic = "norm_wmean") # SCpubr requires this column
print("Activities dataframe restored!")
}
SCpubr Heatmap
Visualization-Heatmap of averaged scores
library(SCpubr)
# General heatmap (Top Variable TFs)
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
activities = activities)
p1 <- out$heatmaps$average_scores
print(p1)
# 1. Save as PDF
pdf("Output_Figures/SCpubr_Heatmap_Default.pdf", width = 10, height = 8)
print(p1) # ComplexHeatmap requires explicit print() inside pdf()
dev.off()
png
2
# 2. Save as PNG
png("Output_Figures/SCpubr_Heatmap_Default.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p1)
dev.off()
png
2

Set the scale
limits
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
activities = activities,
min.cutoff = -1.5,
max.cutoff = 1.5)
p2 <- out$heatmaps$average_scores
print(p2)
# Save ComplexHeatmap properly
pdf("Output_Figures/SCpubr_Heatmap_Scaled.pdf", width = 10, height = 8)
print(p2)
dev.off()
png
2
png("Output_Figures/SCpubr_Heatmap_Scaled.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p2)
dev.off()
png
2

Enforce Symmetry
(Best for Manuscript)
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
activities = activities,
min.cutoff = -1.5,
max.cutoff = 1.5,
enforce_symmetry = TRUE)
p3 <- out$heatmaps$average_scores
print(p3)
pdf("Output_Figures/SCpubr_Heatmap_Symmetric.pdf", width = 10, height = 8)
print(p3)
dev.off()
png
2
png("Output_Figures/SCpubr_Heatmap_Symmetric.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p3)
dev.off()
png
2

Top 40 TFs
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
activities = activities,
n_tfs = 40)
p4 <- out$heatmaps$average_scores
print(p4)
pdf("Output_Figures/SCpubr_Heatmap_Top40.pdf", width = 14, height = 6)
print(p4)
dev.off()
png
2
png("Output_Figures/SCpubr_Heatmap_Top40.png", width = 14 * 300, height = 6 * 300, res = 300)
print(p4)
dev.off()
png
2

FIGURE 3.16B:
Expression-Activity Concordance (2-Row Grid)
# Select 6 TFs representing key heterogeneity axes
selected_tfs <- c(
"FOXO1", # Homeostasis
"MYC", # Oncogenic
"TBX21", # Th1/Stem
"GATA3", # Th2
"RELA", # Inflammatory
"IRF1" # Interferon
)
cat("\n✓ Selected TFs for Figure 3.16B:\n")
✓ Selected TFs for Figure 3.16B:
print(selected_tfs)
[1] "FOXO1" "MYC" "TBX21" "GATA3" "RELA" "IRF1"
# ---- TOP ROW: TF ACTIVITY (dorothea assay) ----
DefaultAssay(seurat_obj) <- "dorothea"
act_plots <- lapply(selected_tfs, function(tf) {
FeaturePlot(seurat_obj,
features = tf,
reduction = "umap",
order = TRUE,,label = T,
cols = c("grey90", "red3"),
pt.size = 0.3) +
ggtitle(paste0(tf, " Activity")) +
theme_minimal() +
theme(plot.title = element_text(size = 11, face = "bold"),
axis.title = element_text(size = 9),
axis.text = element_text(size = 7),
legend.position = "right",
legend.text = element_text(size = 8),
legend.title = element_text(size = 9))
})
cat("✓ TF Activity plots created (TOP ROW)\n")
✓ TF Activity plots created (TOP ROW)
# ---- BOTTOM ROW: Gene EXPRESSION (SCT assay) ----
DefaultAssay(seurat_obj) <- "SCT"
expr_plots <- lapply(selected_tfs, function(tf) {
FeaturePlot(seurat_obj,
features = tf,
reduction = "umap",
order = TRUE,label = T,
cols = c("grey90", "darkblue"),
pt.size = 0.3) +
ggtitle(paste0(tf, " Expression")) +
theme_minimal() +
theme(plot.title = element_text(size = 11, face = "bold"),
axis.title = element_text(size = 9),
axis.text = element_text(size = 7),
legend.position = "right",
legend.text = element_text(size = 8),
legend.title = element_text(size = 9))
})
cat("✓ Gene Expression plots created (BOTTOM ROW)\n")
✓ Gene Expression plots created (BOTTOM ROW)
# ---- COMBINE: 2 rows × 6 columns ----
top_row <- wrap_plots(act_plots, nrow = 1)
bottom_row <- wrap_plots(expr_plots, nrow = 1)
p_3.16B <- top_row / bottom_row +
plot_annotation(
title = "TF Activity and Expression Patterns",
subtitle = "Top: Inferred TF Activity (DoRothEA) | Bottom: Gene Expression (SCT)",
theme = theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, color = "grey30"))
)
ggsave("Output_Figures/Figure_3.16B_Activity_vs_Expression_Grid.pdf",
p_3.16B,
width = 22,
height = 8,
units = "in")
ggsave("Output_Figures/Figure_3.16B_Activity_vs_Expression_Grid.png",
p_3.16B,
width = 22,
height = 8,
units = "in",
dpi = 300)
print(p_3.16B)

cat("\n✓✓✓ FIGURE 3.16B SAVED (20×8 inches, 2 rows × 6 columns) ✓✓✓\n")
✓✓✓ FIGURE 3.16B SAVED (20×8 inches, 2 rows × 6 columns) ✓✓✓
SUPPLEMENTARY
FIGURES
SUPPLEMENTARY FIGURE
1: Differential TF Activity Volcano Plot
library(EnhancedVolcano)
# 1. Perform Differential Analysis
non_malignant_clusters <- c(3, 10)
seurat_obj$Condition <- ifelse(seurat_obj$seurat_clusters %in% non_malignant_clusters, "Non-Malignant", "Malignant")
DefaultAssay(seurat_obj) <- "dorothea"
Idents(seurat_obj) <- "Condition"
cat("Running FindMarkers...\n")
Running FindMarkers...
diff_tfs <- FindMarkers(seurat_obj, ident.1="Malignant", ident.2="Non-Malignant", test.use="t", logfc.threshold=0.1, min.pct=0)
| | 0 % ~calculating
|+ | 2 % ~01s
|++ | 4 % ~01s
|+++ | 5 % ~01s
|++++ | 7 % ~01s
|+++++ | 9 % ~01s
|++++++ | 11% ~01s
|+++++++ | 12% ~01s
|++++++++ | 14% ~01s
|+++++++++ | 16% ~01s
|+++++++++ | 18% ~01s
|++++++++++ | 20% ~01s
|+++++++++++ | 21% ~01s
|++++++++++++ | 23% ~01s
|+++++++++++++ | 25% ~01s
|++++++++++++++ | 27% ~01s
|+++++++++++++++ | 29% ~01s
|++++++++++++++++ | 30% ~01s
|+++++++++++++++++ | 32% ~01s
|+++++++++++++++++ | 34% ~01s
|++++++++++++++++++ | 36% ~00s
|+++++++++++++++++++ | 38% ~00s
|++++++++++++++++++++ | 39% ~00s
|+++++++++++++++++++++ | 41% ~00s
|++++++++++++++++++++++ | 43% ~00s
|+++++++++++++++++++++++ | 45% ~00s
|++++++++++++++++++++++++ | 46% ~00s
|+++++++++++++++++++++++++ | 48% ~00s
|+++++++++++++++++++++++++ | 50% ~00s
|++++++++++++++++++++++++++ | 52% ~00s
|+++++++++++++++++++++++++++ | 54% ~00s
|++++++++++++++++++++++++++++ | 55% ~00s
|+++++++++++++++++++++++++++++ | 57% ~00s
|++++++++++++++++++++++++++++++ | 59% ~00s
|+++++++++++++++++++++++++++++++ | 61% ~00s
|++++++++++++++++++++++++++++++++ | 62% ~00s
|+++++++++++++++++++++++++++++++++ | 64% ~00s
|++++++++++++++++++++++++++++++++++ | 66% ~00s
|++++++++++++++++++++++++++++++++++ | 68% ~00s
|+++++++++++++++++++++++++++++++++++ | 70% ~00s
|++++++++++++++++++++++++++++++++++++ | 71% ~00s
|+++++++++++++++++++++++++++++++++++++ | 73% ~00s
|++++++++++++++++++++++++++++++++++++++ | 75% ~00s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~00s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~00s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~00s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~00s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~00s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01s
diff_tfs$gene <- rownames(diff_tfs)
# 2. Plot EnhancedVolcano
p_volcano <- EnhancedVolcano(diff_tfs,
lab = rownames(diff_tfs), x = 'avg_log2FC', y = 'p_val_adj',
title = 'Differential TF Activity: Malignant vs. Non-Malignant',
subtitle = 'DecoupleR Inferred Activity',
pCutoff = 1e-5, FCcutoff = 0.5, pointSize = 3.0, labSize = 5.0, colAlpha = 0.8,
legendPosition = 'right', drawConnectors = TRUE, widthConnectors = 0.5,
col = c("grey30", "forestgreen", "royalblue", "firebrick2"))
ggsave("Output_Figures/Supplementary_Differential_TF_Activity_Volcano.pdf", p_volcano, width=12, height=10)
print(p_volcano)

cat("✓ Supplementary Volcano Plot Saved\n")
✓ Supplementary Volcano Plot Saved
SUPPLEMENTARY FIGURE:
CHECK TF AVAILABILITY
# ============================================================================
# CHECK TF AVAILABILITY
# ============================================================================
# 1. Define Desired Modules
modules <- list(
"Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
"Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
"Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
"Treg Signature" = c("FOXP3", "STAT5B", "IKZF2"),
"Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
"Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
"Exhaustion" = c("TOX", "PRDM1", "BATF")
)
# 2. Get available TFs in the object
DefaultAssay(seurat_obj) <- "dorothea"
available_tfs <- rownames(seurat_obj)
# 3. Check each module
cat("Checking TF availability in DoRothEA assay...\n")
Checking TF availability in DoRothEA assay...
cat("------------------------------------------------\n")
------------------------------------------------
final_modules <- list()
for (mod_name in names(modules)) {
tfs <- modules[[mod_name]]
# Find intersection
present <- intersect(tfs, available_tfs)
missing <- setdiff(tfs, available_tfs)
# Report
cat(sprintf("Module: %s\n", mod_name))
cat(sprintf(" ✓ Found (%d): %s\n", length(present), paste(present, collapse=", ")))
if(length(missing) > 0) {
cat(sprintf(" ⚠ MISSING (%d): %s\n", length(missing), paste(missing, collapse=", ")))
}
# Only keep module if at least 1 TF exists
if(length(present) > 0) {
final_modules[[mod_name]] <- present
}
cat("\n")
}
Module: Homeostasis
✓ Found (3): FOXO1, TCF7, LEF1
Module: Oncogenic/Cell Cycle
✓ Found (4): MYC, E2F1, E2F4, FOXM1
Module: Inflammation (NF-kB)
✓ Found (4): RELA, NFKB1, REL, STAT3
Module: Treg Signature
✓ Found (1): STAT5B
⚠ MISSING (2): FOXP3, IKZF2
Module: Th2 Differentiation
✓ Found (3): GATA3, STAT6, MAF
Module: Th1 Differentiation
✓ Found (3): TBX21, STAT1, IRF1
Module: Exhaustion
✓ Found (2): PRDM1, BATF
⚠ MISSING (1): TOX
# 4. Update the 'modules' list to only use valid TFs
modules <- final_modules
cat("------------------------------------------------\n")
------------------------------------------------
cat("Ready to plot with validated TFs.\n")
Ready to plot with validated TFs.
SUPPLEMENTARY FIGURE
2: Variance Explained by Key Regulatory Modules
library(ggplot2)
library(dplyr)
library(Seurat)
# 1. Define Modules
modules <- list(
"Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
"Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
"Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
"Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
"Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
"Exhaustion" = c("TOX", "PRDM1", "BATF")
)
# 2. Calculate Variance using 'data' layer (NOT scale.data)
# IMPORTANT: scale.data sets variance to 1. We need 'data' for biological variance.
DefaultAssay(seurat_obj) <- "dorothea"
mat_raw <- GetAssayData(seurat_obj, assay = "dorothea", layer = "data")
all_vars <- apply(mat_raw, 1, var)
# 3. Calculate Mean Variance per Module
module_variance <- sapply(modules, function(tfs) {
valid_tfs <- intersect(tfs, names(all_vars))
if(length(valid_tfs) > 0) {
mean(all_vars[valid_tfs], na.rm = TRUE)
} else {
0
}
})
# 4. Create Plot Data
plot_data <- data.frame(
Module = names(module_variance),
Variance = module_variance
) %>% arrange(desc(Variance))
# 5. Plot
p_var <- ggplot(plot_data, aes(x = reorder(Module, Variance), y = Variance, fill = Module)) +
geom_bar(stat = "identity", width = 0.7) +
coord_flip() +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
labs(title = "Drivers of Heterogeneity: Variance Explained by TF Modules",
subtitle = "Calculated on unscaled TF activity scores (DoRothEA)",
y = "Mean Variance", x = "") +
theme(legend.position = "none", plot.title = element_text(face="bold"))
ggsave("Output_Figures/Supplementary_TF_Module_Variance.pdf", p_var, width=8, height=6)
print(p_var)

cat("✓ Supplementary Variance Plot Saved\n")
✓ Supplementary Variance Plot Saved
SUPPLEMENTARY FIGURE
3: Regulatory Module Activity Heatmap
library(ComplexHeatmap)
library(circlize)
library(Seurat)
# 1. Define Modules (Added Treg)
modules <- list(
"Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
"Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
"Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
"Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
"Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
"Exhaustion" = c("TOX", "PRDM1", "BATF")
)
# 2. Get Average Activity of ALL TFs per Cluster (using 'data' layer)
# We use AverageExpression to get the mean activity of every TF in every cluster
cluster_avg <- AverageExpression(seurat_obj, assays = "dorothea", layer = "data")$dorothea
# 3. Aggregate TFs into Module Scores
# For each module, we take the mean of its TFs' average activity
module_mat <- sapply(names(modules), function(mod_name) {
tfs <- modules[[mod_name]]
valid_tfs <- intersect(tfs, rownames(cluster_avg))
if(length(valid_tfs) > 0) {
colMeans(cluster_avg[valid_tfs, , drop=FALSE])
} else {
rep(0, ncol(cluster_avg))
}
})
# Result is Clusters x Modules. Transpose to Modules x Clusters for heatmap
module_mat <- t(module_mat)
# 4. Scale (Z-score) row-wise for visualization
# This highlights relative enrichment (Red = High for that module, Blue = Low)
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0
# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))
ht_mod <- Heatmap(module_mat_z,
name = "Activity (z)",
col = col_fun,
cluster_rows = TRUE,
cluster_columns = TRUE,
rect_gp = gpar(col = "white", lwd = 1), # Add white borders
row_names_side = "left",
column_title = "Regulatory Module Activity Across Clusters",
row_names_gp = gpar(fontsize = 11, fontface = "bold"),
column_names_gp = gpar(fontsize = 10))
# Draw and Save
draw(ht_mod)
pdf("Output_Figures/Supplementary_TF_Module_Heatmap.pdf", width=8, height=6)
draw(ht_mod)
dev.off()
png
2
png("Output_Figures/Supplementary_TF_Module_Heatmap.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()
png
2

cat("✓ Supplementary Module Heatmap Saved\n")
✓ Supplementary Module Heatmap Saved
SUPPLEMENTARY FIGURE
4: Regulatory Module Activity by Cell Line
library(ComplexHeatmap)
library(circlize)
library(Seurat)
# 1. Define Modules
modules <- list(
"Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
"Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
"Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
"Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
"Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
"Exhaustion" = c("TOX", "PRDM1", "BATF")
)
# 2. Get Average Activity per Cell Line (orig.ident)
cell_line_avg <- AverageExpression(seurat_obj,
assays = "dorothea",
layer = "data",
group.by = "orig.ident")$dorothea
# 3. Aggregate TFs into Module Scores
module_mat <- sapply(names(modules), function(mod_name) {
tfs <- modules[[mod_name]]
valid_tfs <- intersect(tfs, rownames(cell_line_avg))
if(length(valid_tfs) > 0) {
colMeans(cell_line_avg[valid_tfs, , drop=FALSE])
} else {
rep(0, ncol(cell_line_avg))
}
})
# Transpose to [Modules x Cell Lines]
module_mat <- t(module_mat)
# 4. Scale (Z-score) row-wise
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0
# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))
ht_mod <- Heatmap(module_mat_z,
name = "Activity (z)",
col = col_fun,
cluster_rows = TRUE,
cluster_columns = TRUE, # Group similar cell lines together
rect_gp = gpar(col = "white", lwd = 1),
row_names_side = "left",
column_title = "Regulatory Landscape Across Cell Lines",
row_names_gp = gpar(fontsize = 11, fontface = "bold"),
column_names_gp = gpar(fontsize = 10))
# Draw and Save
draw(ht_mod)
pdf("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.pdf", width=8, height=6)
draw(ht_mod)
dev.off()
png
2
png("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()
png
2

cat("✓ Supplementary Cell Line Heatmap Saved\n")
✓ Supplementary Cell Line Heatmap Saved
SUPPLEMENTARY FIGURE
4: Regulatory Module Activity by Cell Line
library(ComplexHeatmap)
library(circlize)
library(Seurat)
# 1. Define Modules
modules <- list(
"Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
"Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
"Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
"Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
"Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
"Exhaustion" = c("TOX", "PRDM1", "BATF")
)
# 2. Get Average Activity per Cell Line (orig.ident)
cell_line_avg <- AverageExpression(seurat_obj,
assays = "dorothea",
layer = "data",
group.by = "seurat_clusters")$dorothea
# 3. Aggregate TFs into Module Scores
module_mat <- sapply(names(modules), function(mod_name) {
tfs <- modules[[mod_name]]
valid_tfs <- intersect(tfs, rownames(cell_line_avg))
if(length(valid_tfs) > 0) {
colMeans(cell_line_avg[valid_tfs, , drop=FALSE])
} else {
rep(0, ncol(cell_line_avg))
}
})
# Transpose to [Modules x Cell Lines]
module_mat <- t(module_mat)
# 4. Scale (Z-score) row-wise
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0
# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))
ht_mod <- Heatmap(module_mat_z,
name = "Activity (z)",
col = col_fun,
cluster_rows = TRUE,
cluster_columns = TRUE, # Group similar cell lines together
rect_gp = gpar(col = "white", lwd = 1),
row_names_side = "left",
column_title = "Regulatory Landscape Across Clusters",
row_names_gp = gpar(fontsize = 11, fontface = "bold"),
column_names_gp = gpar(fontsize = 10))
# Draw and Save
draw(ht_mod)
pdf("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.pdf", width=8, height=6)
draw(ht_mod)
dev.off()
png
2
png("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()
png
2

cat("✓ Supplementary Cell Line Heatmap Saved\n")
✓ Supplementary Cell Line Heatmap Saved
NEW FIGURE:
Regulatory Trajectory Analysis (PCA on TF Activity)
library(Seurat)
library(ggplot2)
library(dplyr)
# 1. Run PCA specifically on TF Activity (DoRothEA)
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
|
| | 0%
|
|===========================================================================================================| 100%
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)
# 2. Extract PCA Embeddings
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>%
as.data.frame() %>%
mutate(Cluster = seurat_obj$seurat_clusters,
Condition = ifelse(seurat_obj$seurat_clusters %in% c(3, 10), "Non-Malignant", "Malignant"))
# 3. Calculate Cluster Centroids (for arrows)
centroids <- pca_data %>%
group_by(Cluster) %>%
summarise(PC1 = mean(PC_1), PC2 = mean(PC_2))
# 4. Define Key Drivers for PC1 and PC2 (Loadings)
# This tells us WHAT drives the trajectory (e.g., PC1 = Malignancy, PC2 = Th1 vs Th2)
loadings <- Loadings(seurat_obj, reduction = "pca")
pc1_drivers <- names(sort(abs(loadings[, 1]), decreasing = T))[1:5]
pc2_drivers <- names(sort(abs(loadings[, 2]), decreasing = T))[1:5]
cat("PC1 Drivers (X-axis):", paste(pc1_drivers, collapse=", "), "\n")
PC1 Drivers (X-axis): MYC, TBX21, E2F4, ZNF263, E2F1
cat("PC2 Drivers (Y-axis):", paste(pc2_drivers, collapse=", "), "\n")
PC2 Drivers (Y-axis): NFKB1, STAT1, RELA, IRF1, HNF4A
# 5. Plot the Trajectory
p_traj <- ggplot(pca_data, aes(x = PC_1, y = PC_2, color = Cluster)) +
# Points
geom_point(alpha = 0.6, size = 1.5) +
# Centroids and Labels
geom_point(data = centroids, aes(x = PC1, y = PC2), size = 5, color = "black", shape = 21, fill = "white") +
geom_text(data = centroids, aes(x = PC1, y = PC2, label = Cluster), color = "black", fontface = "bold") +
# Styling
scale_color_manual(values = Seurat::DiscretePalette(14)) +
labs(title = "Regulatory Trajectory of Sézary Cells",
subtitle = paste0("PC1 driven by: ", paste(pc1_drivers[1:3], collapse=", "),
"\nPC2 driven by: ", paste(pc2_drivers[1:3], collapse=", ")),
x = "PC1: Regulatory Axis 1",
y = "PC2: Regulatory Axis 2") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 14),
legend.position = "right")
# Save
ggsave("Output_Figures/Supplementary_TF_Trajectory_PCA.pdf", p_traj, width = 8, height = 7)
ggsave("Output_Figures/Supplementary_TF_Trajectory_PCA.png", p_traj, width = 8, height = 7, dpi = 300)
print(p_traj)

cat("✓ Regulatory Trajectory Plot Saved\n")
✓ Regulatory Trajectory Plot Saved
REFINED FIGURE:
Regulatory Bi-plot (Trajectory + TF Drivers))
library(Seurat)
library(ggplot2)
library(dplyr)
library(ggrepel)
# 1. Run PCA on TF Activity
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
|
| | 0%
|
|===========================================================================================================| 100%
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)
# 2. Extract Cell Embeddings (Points)
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>%
as.data.frame() %>%
mutate(Cluster = as.factor(seurat_obj$seurat_clusters))
# 3. Extract Feature Loadings (Arrows)
loadings <- Loadings(seurat_obj, reduction = "pca")
# Select top 5 drivers for PC1 and PC2
top_pc1 <- names(sort(abs(loadings[, 1]), decreasing = TRUE))[1:5]
top_pc2 <- names(sort(abs(loadings[, 2]), decreasing = TRUE))[1:5]
top_drivers <- unique(c(top_pc1, top_pc2))
arrow_data <- as.data.frame(loadings[top_drivers, 1:2])
arrow_data$TF <- rownames(arrow_data)
# Scale arrows to match plot dimensions (scaling factor for visibility)
scale_factor <- max(abs(pca_data$PC_1)) / max(abs(arrow_data$PC_1)) * 0.8
arrow_data$PC_1 <- arrow_data$PC_1 * scale_factor
arrow_data$PC_2 <- arrow_data$PC_2 * scale_factor
# 4. Plot
p_biplot <- ggplot(pca_data, aes(x = PC_1, y = PC_2)) +
# Cell Points
geom_point(aes(color = Cluster), alpha = 0.5, size = 1.5) +
# TF Arrows
geom_segment(data = arrow_data, aes(x = 0, y = 0, xend = PC_1, yend = PC_2),
arrow = arrow(length = unit(0.2, "cm")), color = "black", linewidth = 0.8) +
# TF Labels
geom_text_repel(data = arrow_data, aes(x = PC_1, y = PC_2, label = TF),
fontface = "bold", color = "black", size = 4, box.padding = 0.5) +
# Styling
scale_color_manual(values = Seurat::DiscretePalette(14)) +
labs(title = "Regulatory Landscape Bi-plot",
subtitle = "Arrows indicate direction of TF activity driving heterogeneity",
x = "PC1: Malignancy Axis (Cell Cycle)",
y = "PC2: Identity Axis (Inflammation vs. Differentiation)") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 14),
legend.position = "right")
# Save
ggsave("Output_Figures/Supplementary_TF_Biplot.pdf", p_biplot, width = 10, height = 8)
ggsave("Output_Figures/Supplementary_TF_Biplot.png", p_biplot, width = 10, height = 8, dpi = 300)
print(p_biplot)

cat("✓ Bi-plot Saved. This visualizes exactly WHICH TFs pull clusters apart.\n")
✓ Bi-plot Saved. This visualizes exactly WHICH TFs pull clusters apart.
REFINED FIGURE:
Regulatory Bi-plot (Cleaned)
library(Seurat)
library(ggplot2)
library(dplyr)
library(ggrepel)
# 1. Run PCA on TF Activity (if not already done)
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
|
| | 0%
|
|===========================================================================================================| 100%
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)
# 2. Extract Cell Embeddings
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>%
as.data.frame() %>%
mutate(Cluster = as.factor(seurat_obj$seurat_clusters))
# 3. Extract Feature Loadings (The "Arrows")
loadings <- Loadings(seurat_obj, reduction = "pca")
# --- FILTERING STEP ---
# Select only the top 3 positive and top 3 negative drivers for PC1 and PC2
pc1_top <- names(sort(loadings[, 1], decreasing = TRUE))[1:3]
pc1_bottom <- names(sort(loadings[, 1], decreasing = FALSE))[1:3]
pc2_top <- names(sort(loadings[, 2], decreasing = TRUE))[1:3]
pc2_bottom <- names(sort(loadings[, 2], decreasing = FALSE))[1:3]
top_drivers <- unique(c(pc1_top, pc1_bottom, pc2_top, pc2_bottom))
arrow_data <- as.data.frame(loadings[top_drivers, 1:2])
arrow_data$TF <- rownames(arrow_data)
# Scale arrows to be visible on the plot (multiply by factor)
scale_factor <- max(abs(pca_data$PC_1)) / max(abs(arrow_data$PC_1)) * 0.8
arrow_data$PC_1 <- arrow_data$PC_1 * scale_factor
arrow_data$PC_2 <- arrow_data$PC_2 * scale_factor
# 4. Plot
p_biplot <- ggplot(pca_data, aes(x = PC_1, y = PC_2)) +
# Points (Cells)
geom_point(aes(color = Cluster), alpha = 0.6, size = 1.5) +
# Arrows (TFs)
geom_segment(data = arrow_data, aes(x = 0, y = 0, xend = PC_1, yend = PC_2),
arrow = arrow(length = unit(0.2, "cm")), color = "black", linewidth = 0.8) +
# Labels (TFs) - using ggrepel to avoid overlap
geom_text_repel(data = arrow_data, aes(x = PC_1, y = PC_2, label = TF),
fontface = "bold", color = "black", size = 4,
box.padding = 0.5, point.padding = 0.2) +
# Styling
scale_color_manual(values = Seurat::DiscretePalette(14)) +
labs(title = "Regulatory Drivers of Heterogeneity",
subtitle = "Arrows indicate TFs driving separation along PC1 (Malignancy) and PC2 (Identity)",
x = "PC1: Malignancy Axis",
y = "PC2: Identity Axis") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 14),
legend.position = "right")
# Save
ggsave("Output_Figures/Supplementary_TF_Biplot_Clean.pdf", p_biplot, width = 10, height = 8)
print(p_biplot)

cat("✓ Clean Bi-plot Saved. Shows only top drivers.\n")
✓ Clean Bi-plot Saved. Shows only top drivers.
---
title: "TF Activity Inference Analysis (DecoupleR + DoRothEA) Visualization"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---


# load libraries
```{r setup, include=TRUE}
# Data Processing
library(dplyr)
library(Seurat)
library(tibble)
library(tidyr)
library(stringr)

# Visualization
library(ggplot2)
library(ComplexHeatmap)
library(patchwork)
library(SCpubr)

# Regulatory Network Inference
library(decoupleR)
library(dorothea)
data(dorothea_hs, package = "dorothea")
library(tictoc)


```

# Load Seurat Object 
```{r}

# Load your Seurat Object
seurat_obj <- readRDS("Output_Objects/Seurat_Object_With_TF_Activity.rds")

Idents(seurat_obj) <- "seurat_clusters"
cat("✓ Seurat object loaded\n")
cat(sprintf("  - %d cells across %d clusters\n", 
            ncol(seurat_obj), 
            length(unique(seurat_obj$seurat_clusters))))
```

## Run this code block to restore activities instantly:
```{r}

# If 'activities' is missing but 'dorothea' assay exists, reconstruct it:
if (!exists("activities") && "dorothea" %in% names(seurat_obj@assays)) {
  
  print("Reconstructing 'activities' dataframe from Seurat object...")
  
  # Extract the matrix (Seurat v5 uses 'layer' instead of 'slot')
  # Since you ran ScaleData, we use 'scale.data'
  mat <- GetAssayData(seurat_obj, assay = "dorothea", layer = "scale.data")
  
  # Convert to long format (what SCpubr needs)
  activities <- as.data.frame(mat) %>%
    rownames_to_column("source") %>%
    pivot_longer(cols = -source, names_to = "condition", values_to = "score") %>%
    mutate(statistic = "norm_wmean") # SCpubr requires this column
    
  print("Activities dataframe restored!")
}
```

## SCpubr Heatmap Visualization-Heatmap of averaged scores
```{r, fig.height=8, fig.width=10}
library(SCpubr)
# General heatmap (Top Variable TFs)
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities)
p1 <- out$heatmaps$average_scores
print(p1)

# 1. Save as PDF
pdf("Output_Figures/SCpubr_Heatmap_Default.pdf", width = 10, height = 8)
print(p1) # ComplexHeatmap requires explicit print() inside pdf()
dev.off()

# 2. Save as PNG
png("Output_Figures/SCpubr_Heatmap_Default.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p1)
dev.off()
```


## Set the scale limits
```{r, fig.height=8, fig.width=10}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5)
p2 <- out$heatmaps$average_scores
print(p2)

# Save ComplexHeatmap properly
pdf("Output_Figures/SCpubr_Heatmap_Scaled.pdf", width = 10, height = 8)
print(p2)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Scaled.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p2)
dev.off()


```

## Enforce Symmetry (Best for Manuscript)
```{r, fig.height=8, fig.width=10}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5,
                                 enforce_symmetry = TRUE)
p3 <- out$heatmaps$average_scores
print(p3)

pdf("Output_Figures/SCpubr_Heatmap_Symmetric.pdf", width = 10, height = 8)
print(p3)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Symmetric.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p3)
dev.off()

```

## Top 40 TFs
```{r, fig.height=6, fig.width=14}
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 40)
p4 <- out$heatmaps$average_scores
print(p4)

pdf("Output_Figures/SCpubr_Heatmap_Top40.pdf", width = 14, height = 6)
print(p4)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Top40.png", width = 14 * 300, height = 6 * 300, res = 300)
print(p4)
dev.off()
```


## FIGURE 3.16A: Global TF Activity Heatmap (Top 100 TFs)
```{r, fig.height=12, fig.width=32}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 100)
p5 <- out$heatmaps$average_scores
print(p5)

pdf("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.pdf", width = 32, height = 12)
print(p5)
dev.off()

png("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.png", width = 32 * 300, height = 12 * 300, res = 300)
print(p5)
dev.off()
```

# Check TF Availability
```{r, fig.height=12, fig.width=32}

# Get TF names from dorothea assay
DefaultAssay(seurat_obj) <- "dorothea"
tfs_in_activity <- rownames(seurat_obj)

# Get gene names from SCT assay
DefaultAssay(seurat_obj) <- "SCT"
genes_in_expression <- rownames(seurat_obj)

# Find TFs present in BOTH
tfs_in_both <- intersect(tfs_in_activity, genes_in_expression)

cat("\n═══════════════════════════════════════════════════════════\n")
cat(sprintf("✓ %d TFs available in both assays\n", length(tfs_in_both)))
cat("═══════════════════════════════════════════════════════════\n")
```


# FIGURE 3.16B: Expression-Activity Concordance (2-Row Grid)
```{r, fig.height=8, fig.width=22}

# Select 6 TFs representing key heterogeneity axes
selected_tfs <- c(
  "FOXO1",   # Homeostasis
  "MYC",     # Oncogenic
  "TBX21",   # Th1/Stem
  "GATA3",   # Th2
  "RELA",    # Inflammatory
  "IRF1"     # Interferon
)

cat("\n✓ Selected TFs for Figure 3.16B:\n")
print(selected_tfs)

# ---- TOP ROW: TF ACTIVITY (dorothea assay) ----
DefaultAssay(seurat_obj) <- "dorothea"

act_plots <- lapply(selected_tfs, function(tf) {
  FeaturePlot(seurat_obj, 
              features = tf,
              reduction = "umap",
              order = TRUE,,label = T,
              cols = c("grey90", "red3"),
              pt.size = 0.3) +
    ggtitle(paste0(tf, " Activity")) +
    theme_minimal() +
    theme(plot.title = element_text(size = 11, face = "bold"),
          axis.title = element_text(size = 9),
          axis.text = element_text(size = 7),
          legend.position = "right",
          legend.text = element_text(size = 8),
          legend.title = element_text(size = 9))
})

cat("✓ TF Activity plots created (TOP ROW)\n")

# ---- BOTTOM ROW: Gene EXPRESSION (SCT assay) ----
DefaultAssay(seurat_obj) <- "SCT"

expr_plots <- lapply(selected_tfs, function(tf) {
  FeaturePlot(seurat_obj, 
              features = tf, 
              reduction = "umap",
              order = TRUE,label = T,
              cols = c("grey90", "darkblue"),
              pt.size = 0.3) +
    ggtitle(paste0(tf, " Expression")) +
    theme_minimal() +
    theme(plot.title = element_text(size = 11, face = "bold"),
          axis.title = element_text(size = 9),
          axis.text = element_text(size = 7),
          legend.position = "right",
          legend.text = element_text(size = 8),
          legend.title = element_text(size = 9))
})

cat("✓ Gene Expression plots created (BOTTOM ROW)\n")

# ---- COMBINE: 2 rows × 6 columns ----
top_row <- wrap_plots(act_plots, nrow = 1)
bottom_row <- wrap_plots(expr_plots, nrow = 1)

p_3.16B <- top_row / bottom_row +
           plot_annotation(
             title = "TF Activity and Expression Patterns",
             subtitle = "Top: Inferred TF Activity (DoRothEA) | Bottom: Gene Expression (SCT)",
             theme = theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
                          plot.subtitle = element_text(size = 11, hjust = 0.5, color = "grey30"))
           )

ggsave("Output_Figures/Figure_3.16B_Activity_vs_Expression_Grid.pdf", 
       p_3.16B, 
       width = 22, 
       height = 8, 
       units = "in")

ggsave("Output_Figures/Figure_3.16B_Activity_vs_Expression_Grid.png", 
       p_3.16B, 
       width = 22, 
       height = 8, 
       units = "in", 
       dpi = 300)

print(p_3.16B)
cat("\n✓✓✓ FIGURE 3.16B SAVED (20×8 inches, 2 rows × 6 columns) ✓✓✓\n")

```

# FIGURE 3.16C: Literature-Validated TF Modules
```{r, fig.height=6, fig.width=10}
library(ComplexHeatmap)
library(circlize)  # <--- Essential for colorRamp2

# Curated list of Sézary-relevant TFs
literature_tfs <- c("GATA3", "TBX21", "RORC", "FOXP3", "TOX", "SATB1", "IKZF2",
                    "STAT3", "STAT5B", "STAT6", "RELA", "NFKB1", "MYC", "E2F1")

# Check availability
available_tfs <- intersect(literature_tfs, rownames(seurat_obj[["dorothea"]]))

# Extract data
mat_scaled <- GetAssayData(seurat_obj, assay="dorothea", layer="scale.data")
mat_use <- mat_scaled[available_tfs, , drop=FALSE]

# Calculate average activity per cluster
clusters <- as.factor(seurat_obj$seurat_clusters)
avg_mat_z <- t(scale(t(sapply(levels(clusters), function(cl) Matrix::rowMeans(mat_use[, clusters == cl, drop=FALSE])))))
avg_mat_z[is.na(avg_mat_z)] <- 0

# Create Heatmap
ht_3.16C <- Heatmap(avg_mat_z, 
                    name = "TF activity (z)", 
                    col = circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026")),
                    cluster_rows = TRUE, 
                    cluster_columns = TRUE, 
                    column_title = "Literature-validated Sézary TF modules")

# Save
pdf("Output_Figures/Figure_3.16C_Literature_TF_Heatmap.pdf", width=10, height=6)
draw(ht_3.16C)
dev.off()

png("Output_Figures/Figure_3.16C_Literature_TF_Heatmap.png", width=10*300, height=6*300, res=300)
draw(ht_3.16C)
dev.off()

cat("✓ Figure 3.16C Saved\n")

ht_3.16C

```

# SUPPLEMENTARY FIGURES
## SUPPLEMENTARY FIGURE 1: Differential TF Activity Volcano Plot
```{r, fig.height=12, fig.width=16}
library(EnhancedVolcano)
# 1. Perform Differential Analysis
non_malignant_clusters <- c(3, 10)
seurat_obj$Condition <- ifelse(seurat_obj$seurat_clusters %in% non_malignant_clusters, "Non-Malignant", "Malignant")
DefaultAssay(seurat_obj) <- "dorothea"
Idents(seurat_obj) <- "Condition"

cat("Running FindMarkers...\n")
diff_tfs <- FindMarkers(seurat_obj, ident.1="Malignant", ident.2="Non-Malignant", test.use="t", logfc.threshold=0.1, min.pct=0)
diff_tfs$gene <- rownames(diff_tfs)

# 2. Plot EnhancedVolcano
p_volcano <- EnhancedVolcano(diff_tfs,
    lab = rownames(diff_tfs), x = 'avg_log2FC', y = 'p_val_adj',
    title = 'Differential TF Activity: Malignant vs. Non-Malignant',
    subtitle = 'DecoupleR Inferred Activity',
    pCutoff = 1e-5, FCcutoff = 0.5, pointSize = 3.0, labSize = 5.0, colAlpha = 0.8,
    legendPosition = 'right', drawConnectors = TRUE, widthConnectors = 0.5,
    col = c("grey30", "forestgreen", "royalblue", "firebrick2"))

ggsave("Output_Figures/Supplementary_Differential_TF_Activity_Volcano.pdf", p_volcano, width=12, height=10)
print(p_volcano)
cat("✓ Supplementary Volcano Plot Saved\n")
```
## SUPPLEMENTARY FIGURE:  CHECK TF AVAILABILITY
```{r, fig.height=12, fig.width=16}
# ============================================================================
# CHECK TF AVAILABILITY
# ============================================================================

# 1. Define Desired Modules
modules <- list(
  "Homeostasis"          = c("FOXO1", "TCF7", "LEF1"),
  "Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
  "Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
  "Treg Signature"       = c("FOXP3", "STAT5B", "IKZF2"),
  "Th2 Differentiation"  = c("GATA3", "STAT6", "MAF"),
  "Th1 Differentiation"  = c("TBX21", "STAT1", "IRF1"),
  "Exhaustion"           = c("TOX", "PRDM1", "BATF")
)

# 2. Get available TFs in the object
DefaultAssay(seurat_obj) <- "dorothea"
available_tfs <- rownames(seurat_obj)

# 3. Check each module
cat("Checking TF availability in DoRothEA assay...\n")
cat("------------------------------------------------\n")

final_modules <- list()

for (mod_name in names(modules)) {
  tfs <- modules[[mod_name]]
  
  # Find intersection
  present <- intersect(tfs, available_tfs)
  missing <- setdiff(tfs, available_tfs)
  
  # Report
  cat(sprintf("Module: %s\n", mod_name))
  cat(sprintf("  ✓ Found (%d): %s\n", length(present), paste(present, collapse=", ")))
  
  if(length(missing) > 0) {
    cat(sprintf("  ⚠ MISSING (%d): %s\n", length(missing), paste(missing, collapse=", ")))
  }
  
  # Only keep module if at least 1 TF exists
  if(length(present) > 0) {
    final_modules[[mod_name]] <- present
  }
  cat("\n")
}

# 4. Update the 'modules' list to only use valid TFs
modules <- final_modules
cat("------------------------------------------------\n")
cat("Ready to plot with validated TFs.\n")
```

## SUPPLEMENTARY FIGURE 2:  Variance Explained by Key Regulatory Modules
```{r, fig.height=12, fig.width=16}
library(ggplot2)
library(dplyr)
library(Seurat)

# 1. Define Modules
modules <- list(
  "Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
  "Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
  "Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
  "Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
  "Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
  "Exhaustion" = c("TOX", "PRDM1", "BATF")
)

# 2. Calculate Variance using 'data' layer (NOT scale.data)
# IMPORTANT: scale.data sets variance to 1. We need 'data' for biological variance.
DefaultAssay(seurat_obj) <- "dorothea"
mat_raw <- GetAssayData(seurat_obj, assay = "dorothea", layer = "data")
all_vars <- apply(mat_raw, 1, var)

# 3. Calculate Mean Variance per Module
module_variance <- sapply(modules, function(tfs) {
  valid_tfs <- intersect(tfs, names(all_vars))
  if(length(valid_tfs) > 0) {
    mean(all_vars[valid_tfs], na.rm = TRUE)
  } else {
    0
  }
})

# 4. Create Plot Data
plot_data <- data.frame(
  Module = names(module_variance),
  Variance = module_variance
) %>% arrange(desc(Variance))

# 5. Plot
p_var <- ggplot(plot_data, aes(x = reorder(Module, Variance), y = Variance, fill = Module)) +
  geom_bar(stat = "identity", width = 0.7) +
  coord_flip() +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "Drivers of Heterogeneity: Variance Explained by TF Modules",
       subtitle = "Calculated on unscaled TF activity scores (DoRothEA)",
       y = "Mean Variance", x = "") +
  theme(legend.position = "none", plot.title = element_text(face="bold"))

ggsave("Output_Figures/Supplementary_TF_Module_Variance.pdf", p_var, width=8, height=6)
print(p_var)
cat("✓ Supplementary Variance Plot Saved\n")
```

## SUPPLEMENTARY FIGURE 3: Regulatory Module Activity Heatmap
```{r, fig.height=12, fig.width=16}
library(ComplexHeatmap)
library(circlize)
library(Seurat)

# 1. Define Modules (Added Treg)
modules <- list(
  "Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
  "Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
  "Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
  "Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
  "Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
  "Exhaustion" = c("TOX", "PRDM1", "BATF")
)


# 2. Get Average Activity of ALL TFs per Cluster (using 'data' layer)
# We use AverageExpression to get the mean activity of every TF in every cluster
cluster_avg <- AverageExpression(seurat_obj, assays = "dorothea", layer = "data")$dorothea

# 3. Aggregate TFs into Module Scores
# For each module, we take the mean of its TFs' average activity
module_mat <- sapply(names(modules), function(mod_name) {
  tfs <- modules[[mod_name]]
  valid_tfs <- intersect(tfs, rownames(cluster_avg))
  
  if(length(valid_tfs) > 0) {
    colMeans(cluster_avg[valid_tfs, , drop=FALSE])
  } else {
    rep(0, ncol(cluster_avg))
  }
})

# Result is Clusters x Modules. Transpose to Modules x Clusters for heatmap
module_mat <- t(module_mat)

# 4. Scale (Z-score) row-wise for visualization
# This highlights relative enrichment (Red = High for that module, Blue = Low)
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0

# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))

ht_mod <- Heatmap(module_mat_z,
                  name = "Activity (z)",
                  col = col_fun,
                  cluster_rows = TRUE, 
                  cluster_columns = TRUE,
                  rect_gp = gpar(col = "white", lwd = 1), # Add white borders
                  row_names_side = "left",
                  column_title = "Regulatory Module Activity Across Clusters",
                  row_names_gp = gpar(fontsize = 11, fontface = "bold"),
                  column_names_gp = gpar(fontsize = 10))

# Draw and Save
draw(ht_mod)

pdf("Output_Figures/Supplementary_TF_Module_Heatmap.pdf", width=8, height=6)
draw(ht_mod)
dev.off()

png("Output_Figures/Supplementary_TF_Module_Heatmap.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()

cat("✓ Supplementary Module Heatmap Saved\n")
```

## SUPPLEMENTARY FIGURE 4: Regulatory Module Activity by Cell Line
```{r, fig.height=12, fig.width=16}
library(ComplexHeatmap)
library(circlize)
library(Seurat)

# 1. Define Modules
modules <- list(
  "Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
  "Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
  "Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
  "Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
  "Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
  "Exhaustion" = c("TOX", "PRDM1", "BATF")
)


# 2. Get Average Activity per Cell Line (orig.ident)
cell_line_avg <- AverageExpression(seurat_obj, 
                                   assays = "dorothea", 
                                   layer = "data", 
                                   group.by = "orig.ident")$dorothea

# 3. Aggregate TFs into Module Scores
module_mat <- sapply(names(modules), function(mod_name) {
  tfs <- modules[[mod_name]]
  valid_tfs <- intersect(tfs, rownames(cell_line_avg))
  
  if(length(valid_tfs) > 0) {
    colMeans(cell_line_avg[valid_tfs, , drop=FALSE])
  } else {
    rep(0, ncol(cell_line_avg))
  }
})

# Transpose to [Modules x Cell Lines]
module_mat <- t(module_mat)

# 4. Scale (Z-score) row-wise
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0

# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))

ht_mod <- Heatmap(module_mat_z,
                  name = "Activity (z)",
                  col = col_fun,
                  cluster_rows = TRUE, 
                  cluster_columns = TRUE, # Group similar cell lines together
                  rect_gp = gpar(col = "white", lwd = 1), 
                  row_names_side = "left",
                  column_title = "Regulatory Landscape Across Cell Lines",
                  row_names_gp = gpar(fontsize = 11, fontface = "bold"),
                  column_names_gp = gpar(fontsize = 10))

# Draw and Save
draw(ht_mod)

pdf("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.pdf", width=8, height=6)
draw(ht_mod)
dev.off()

png("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()

cat("✓ Supplementary Cell Line Heatmap Saved\n")
```

## SUPPLEMENTARY FIGURE 4: Regulatory Module Activity by Cell Line
```{r, fig.height=12, fig.width=16}
library(ComplexHeatmap)
library(circlize)
library(Seurat)

# 1. Define Modules
modules <- list(
  "Homeostasis" = c("FOXO1", "TCF7", "LEF1"),
  "Oncogenic/Cell Cycle" = c("MYC", "E2F1", "E2F4", "FOXM1"),
  "Inflammation (NF-kB)" = c("RELA", "NFKB1", "REL", "STAT3"),
  "Th2 Differentiation" = c("GATA3", "STAT6", "MAF"),
  "Th1 Differentiation" = c("TBX21", "STAT1", "IRF1"),
  "Exhaustion" = c("TOX", "PRDM1", "BATF")
)


# 2. Get Average Activity per Cell Line (orig.ident)
cell_line_avg <- AverageExpression(seurat_obj, 
                                   assays = "dorothea", 
                                   layer = "data", 
                                   group.by = "seurat_clusters")$dorothea

# 3. Aggregate TFs into Module Scores
module_mat <- sapply(names(modules), function(mod_name) {
  tfs <- modules[[mod_name]]
  valid_tfs <- intersect(tfs, rownames(cell_line_avg))
  
  if(length(valid_tfs) > 0) {
    colMeans(cell_line_avg[valid_tfs, , drop=FALSE])
  } else {
    rep(0, ncol(cell_line_avg))
  }
})

# Transpose to [Modules x Cell Lines]
module_mat <- t(module_mat)

# 4. Scale (Z-score) row-wise
module_mat_z <- t(scale(t(module_mat)))
module_mat_z[is.na(module_mat_z)] <- 0

# 5. Plot Heatmap
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))

ht_mod <- Heatmap(module_mat_z,
                  name = "Activity (z)",
                  col = col_fun,
                  cluster_rows = TRUE, 
                  cluster_columns = TRUE, # Group similar cell lines together
                  rect_gp = gpar(col = "white", lwd = 1), 
                  row_names_side = "left",
                  column_title = "Regulatory Landscape Across Clusters",
                  row_names_gp = gpar(fontsize = 11, fontface = "bold"),
                  column_names_gp = gpar(fontsize = 10))

# Draw and Save
draw(ht_mod)

pdf("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.pdf", width=8, height=6)
draw(ht_mod)
dev.off()

png("Output_Figures/Supplementary_TF_Module_Heatmap_CellLine.png", width=8*300, height=6*300, res=300)
draw(ht_mod)
dev.off()

cat("✓ Supplementary Cell Line Heatmap Saved\n")
```



## NEW FIGURE: Regulatory Trajectory Analysis (PCA on TF Activity)
```{r, fig.height=12, fig.width=16}
library(Seurat)
library(ggplot2)
library(dplyr)

# 1. Run PCA specifically on TF Activity (DoRothEA)
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)

# 2. Extract PCA Embeddings
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>% 
  as.data.frame() %>% 
  mutate(Cluster = seurat_obj$seurat_clusters,
         Condition = ifelse(seurat_obj$seurat_clusters %in% c(3, 10), "Non-Malignant", "Malignant"))

# 3. Calculate Cluster Centroids (for arrows)
centroids <- pca_data %>% 
  group_by(Cluster) %>% 
  summarise(PC1 = mean(PC_1), PC2 = mean(PC_2))

# 4. Define Key Drivers for PC1 and PC2 (Loadings)
# This tells us WHAT drives the trajectory (e.g., PC1 = Malignancy, PC2 = Th1 vs Th2)
loadings <- Loadings(seurat_obj, reduction = "pca")
pc1_drivers <- names(sort(abs(loadings[, 1]), decreasing = T))[1:5]
pc2_drivers <- names(sort(abs(loadings[, 2]), decreasing = T))[1:5]

cat("PC1 Drivers (X-axis):", paste(pc1_drivers, collapse=", "), "\n")
cat("PC2 Drivers (Y-axis):", paste(pc2_drivers, collapse=", "), "\n")

# 5. Plot the Trajectory
p_traj <- ggplot(pca_data, aes(x = PC_1, y = PC_2, color = Cluster)) +
  # Points
  geom_point(alpha = 0.6, size = 1.5) +
  
  # Centroids and Labels
  geom_point(data = centroids, aes(x = PC1, y = PC2), size = 5, color = "black", shape = 21, fill = "white") +
  geom_text(data = centroids, aes(x = PC1, y = PC2, label = Cluster), color = "black", fontface = "bold") +
  
  # Styling
  scale_color_manual(values = Seurat::DiscretePalette(14)) +
  labs(title = "Regulatory Trajectory of Sézary Cells",
       subtitle = paste0("PC1 driven by: ", paste(pc1_drivers[1:3], collapse=", "), 
                         "\nPC2 driven by: ", paste(pc2_drivers[1:3], collapse=", ")),
       x = "PC1: Regulatory Axis 1", 
       y = "PC2: Regulatory Axis 2") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 14),
        legend.position = "right")

# Save
ggsave("Output_Figures/Supplementary_TF_Trajectory_PCA.pdf", p_traj, width = 8, height = 7)
ggsave("Output_Figures/Supplementary_TF_Trajectory_PCA.png", p_traj, width = 8, height = 7, dpi = 300)

print(p_traj)
cat("✓ Regulatory Trajectory Plot Saved\n")
```











## REFINED FIGURE: Regulatory Bi-plot (Trajectory + TF Drivers))
```{r, fig.height=12, fig.width=16}
library(Seurat)
library(ggplot2)
library(dplyr)
library(ggrepel)

# 1. Run PCA on TF Activity
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)

# 2. Extract Cell Embeddings (Points)
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>% 
  as.data.frame() %>% 
  mutate(Cluster = as.factor(seurat_obj$seurat_clusters))

# 3. Extract Feature Loadings (Arrows)
loadings <- Loadings(seurat_obj, reduction = "pca")
# Select top 5 drivers for PC1 and PC2
top_pc1 <- names(sort(abs(loadings[, 1]), decreasing = TRUE))[1:5]
top_pc2 <- names(sort(abs(loadings[, 2]), decreasing = TRUE))[1:5]
top_drivers <- unique(c(top_pc1, top_pc2))

arrow_data <- as.data.frame(loadings[top_drivers, 1:2])
arrow_data$TF <- rownames(arrow_data)

# Scale arrows to match plot dimensions (scaling factor for visibility)
scale_factor <- max(abs(pca_data$PC_1)) / max(abs(arrow_data$PC_1)) * 0.8
arrow_data$PC_1 <- arrow_data$PC_1 * scale_factor
arrow_data$PC_2 <- arrow_data$PC_2 * scale_factor

# 4. Plot
p_biplot <- ggplot(pca_data, aes(x = PC_1, y = PC_2)) +
  # Cell Points
  geom_point(aes(color = Cluster), alpha = 0.5, size = 1.5) +
  
  # TF Arrows
  geom_segment(data = arrow_data, aes(x = 0, y = 0, xend = PC_1, yend = PC_2),
               arrow = arrow(length = unit(0.2, "cm")), color = "black", linewidth = 0.8) +
  
  # TF Labels
  geom_text_repel(data = arrow_data, aes(x = PC_1, y = PC_2, label = TF),
                  fontface = "bold", color = "black", size = 4, box.padding = 0.5) +
  
  # Styling
  scale_color_manual(values = Seurat::DiscretePalette(14)) +
  labs(title = "Regulatory Landscape Bi-plot",
       subtitle = "Arrows indicate direction of TF activity driving heterogeneity",
       x = "PC1: Malignancy Axis (Cell Cycle)", 
       y = "PC2: Identity Axis (Inflammation vs. Differentiation)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 14),
        legend.position = "right")

# Save
ggsave("Output_Figures/Supplementary_TF_Biplot.pdf", p_biplot, width = 10, height = 8)
ggsave("Output_Figures/Supplementary_TF_Biplot.png", p_biplot, width = 10, height = 8, dpi = 300)

print(p_biplot)
cat("✓ Bi-plot Saved. This visualizes exactly WHICH TFs pull clusters apart.\n")
```

## REFINED FIGURE: Regulatory Bi-plot (Cleaned)
```{r, fig.height=12, fig.width=16}
library(Seurat)
library(ggplot2)
library(dplyr)
library(ggrepel)

# 1. Run PCA on TF Activity (if not already done)
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(seurat_obj, features = rownames(seurat_obj), verbose = FALSE)

# 2. Extract Cell Embeddings
pca_data <- Embeddings(seurat_obj, reduction = "pca")[, 1:2] %>% 
  as.data.frame() %>% 
  mutate(Cluster = as.factor(seurat_obj$seurat_clusters))

# 3. Extract Feature Loadings (The "Arrows")
loadings <- Loadings(seurat_obj, reduction = "pca")

# --- FILTERING STEP ---
# Select only the top 3 positive and top 3 negative drivers for PC1 and PC2
pc1_top <- names(sort(loadings[, 1], decreasing = TRUE))[1:3]
pc1_bottom <- names(sort(loadings[, 1], decreasing = FALSE))[1:3]
pc2_top <- names(sort(loadings[, 2], decreasing = TRUE))[1:3]
pc2_bottom <- names(sort(loadings[, 2], decreasing = FALSE))[1:3]

top_drivers <- unique(c(pc1_top, pc1_bottom, pc2_top, pc2_bottom))
arrow_data <- as.data.frame(loadings[top_drivers, 1:2])
arrow_data$TF <- rownames(arrow_data)

# Scale arrows to be visible on the plot (multiply by factor)
scale_factor <- max(abs(pca_data$PC_1)) / max(abs(arrow_data$PC_1)) * 0.8
arrow_data$PC_1 <- arrow_data$PC_1 * scale_factor
arrow_data$PC_2 <- arrow_data$PC_2 * scale_factor

# 4. Plot
p_biplot <- ggplot(pca_data, aes(x = PC_1, y = PC_2)) +
  # Points (Cells)
  geom_point(aes(color = Cluster), alpha = 0.6, size = 1.5) +
  
  # Arrows (TFs)
  geom_segment(data = arrow_data, aes(x = 0, y = 0, xend = PC_1, yend = PC_2),
               arrow = arrow(length = unit(0.2, "cm")), color = "black", linewidth = 0.8) +
  
  # Labels (TFs) - using ggrepel to avoid overlap
  geom_text_repel(data = arrow_data, aes(x = PC_1, y = PC_2, label = TF),
                  fontface = "bold", color = "black", size = 4, 
                  box.padding = 0.5, point.padding = 0.2) +
  
  # Styling
  scale_color_manual(values = Seurat::DiscretePalette(14)) +
  labs(title = "Regulatory Drivers of Heterogeneity",
       subtitle = "Arrows indicate TFs driving separation along PC1 (Malignancy) and PC2 (Identity)",
       x = "PC1: Malignancy Axis", 
       y = "PC2: Identity Axis") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 14),
        legend.position = "right")

# Save
ggsave("Output_Figures/Supplementary_TF_Biplot_Clean.pdf", p_biplot, width = 10, height = 8)
print(p_biplot)
cat("✓ Clean Bi-plot Saved. Shows only top drivers.\n")
```

# SUPPLEMENTARY FIGURE: Slingshot Trajectory Analysis
```{r, fig.height=12, fig.width=16}
# 1. Load Libraries
library(slingshot)
library(SingleCellExperiment)
library(RColorBrewer)
library(scales)

# 2. Convert Seurat to SingleCellExperiment
# We use the 'dorothea' assay for TF-based trajectory, or 'SCT' for gene-based.
# TF-based is better for "regulatory" trajectories.
sce <- as.SingleCellExperiment(seurat_obj, assay = "dorothea")

# 3. Run Slingshot
# We specify the reduction (UMAP) and the cluster labels
# 'start.clus' = 5 sets the root at the Stem-like cluster
sce <- slingshot(sce, clusterLabels = seurat_obj$seurat_clusters, 
                 reducedDim = 'UMAP', start.clus = '5')

# 4. Extract Trajectory Data for Plotting in ggplot2
# Get the curves (lineages)
slingshot_curves <- SlingshotDataSet(sce)
curve_coords <- data.frame()

for (i in seq_along(slingCurves(slingshot_curves))) {
  c <- slingCurves(slingshot_curves)[[i]]
  df <- as.data.frame(c$s[c$ord, ])
  df$Lineage <- paste0("Lineage", i)
  curve_coords <- rbind(curve_coords, df)
}

# Get cell embeddings
umap_data <- as.data.frame(Embeddings(seurat_obj, "umap"))
umap_data$Cluster <- seurat_obj$seurat_clusters

# 5. Plot
p_slingshot <- ggplot(umap_data, aes(x = umap_1, y = umap_2)) +
  # Cell points
  geom_point(aes(color = Cluster), size = 0.8, alpha = 0.6) +
  
  # Trajectory paths
  geom_path(data = curve_coords, aes(x = umap_1, y = umap_2, group = Lineage), 
            color = "black", linewidth = 1.2, arrow = arrow(type = "closed", length = unit(0.1, "inches"))) +
  
  # Styling
  scale_color_manual(values = Seurat::DiscretePalette(14)) +
  labs(title = "Regulatory Trajectory Inference (Slingshot)",
       subtitle = "Root set to Cluster 5 (Stem-like State)",
       x = "UMAP 1", y = "UMAP 2") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 14))

# 6. Save
ggsave("Output_Figures/Supplementary_Slingshot_Trajectory.pdf", p_slingshot, width = 8, height = 7)
ggsave("Output_Figures/Supplementary_Slingshot_Trajectory.png", p_slingshot, width = 8, height = 7, dpi = 300)

print(p_slingshot)
cat("✓ Slingshot Trajectory Saved. Arrows indicate developmental flow from Cluster 5.\n")
```


# SUPPLEMENTARY FIGURE:  Monocle3 (If you prefer pseudotime coloring)
```{r, fig.height=12, fig.width=16}
library(monocle3)
library(SeuratWrappers)
library(ggplot2)
library(dplyr)

# --- 1. Define Helper Function (Required for Monocle3) ---
get_earliest_principal_node <- function(cds, start_cells, reduction_method = "UMAP"){
  # Get the closest principal node to the geometric center of start_cells
  cell_ids <- colnames(cds)
  
  closest_vertex <- cds@principal_graph_aux[[reduction_method]]$pr_graph_cell_proj_closest_vertex
  closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
  
  root_pr_nodes <- igraph::V(principal_graph(cds)[[reduction_method]])$name[as.numeric(names(which.max(table(closest_vertex[start_cells,]))))]
  
  return(root_pr_nodes)
}

# --- 2. Convert Seurat to CellDataSet (CDS) ---
# We use the 'dorothea' assay for TF-based trajectory (or 'SCT' for gene-based)
cds <- as.cell_data_set(seurat_obj)

# --- 3. Preprocess & Learn Graph ---
# Monocle needs to re-cluster to learn the graph structure
cds <- cluster_cells(cds, reduction_method = "UMAP")
cds <- learn_graph(cds, use_partition = TRUE, verbose = FALSE)

# --- 4. Order Cells (Root = Cluster 5) ---
# Identify cells in Cluster 5 (Stem-like)
stem_cells <- colnames(seurat_obj)[seurat_obj$seurat_clusters == "5"]

# Find the root node
closest_node <- get_earliest_principal_node(cds, start_cells = stem_cells)

# Order cells based on pseudotime starting from that node
cds <- order_cells(cds, root_pr_nodes = closest_node)

# --- 5. Plot Pseudotime ---
p_monocle <- plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups = FALSE,
           label_leaves = FALSE,
           label_branch_points = FALSE,
           graph_label_size = 1.5,
           trajectory_graph_color = "black",
           trajectory_graph_segment_size = 1.0) +
  labs(title = "Regulatory Trajectory (Pseudotime)",
       subtitle = "Root: Cluster 5 (Stem-like State)") +
  theme(plot.title = element_text(face = "bold", size = 14))

# Save
ggsave("Output_Figures/Supplementary_Monocle3_Pseudotime.pdf", p_monocle, width = 8, height = 7)
ggsave("Output_Figures/Supplementary_Monocle3_Pseudotime.png", p_monocle, width = 8, height = 7, dpi = 300)

print(p_monocle)
cat("✓ Monocle3 Trajectory Saved. Cells colored by pseudotime (Purple = Early, Yellow = Late).\n")

```


# SUPPLEMENTARY FIGURE:  R Code: PAGA-like Connectivity Graph (TF Activity)
```{r, fig.height=8, fig.width=8}
# ============================================================================
# SUPPLEMENTARY FIGURE: PAGA-like Connectivity Graph (TF Activity)
# ============================================================================

library(Seurat)
library(ggraph) # Make sure ggraph is installed: install.packages("ggraph")
library(dplyr)

# 1. Build a k-Nearest Neighbor (KNN) Graph on TF Activity
# We use the 'dorothea' assay to ensure connections are based on REGULATION
DefaultAssay(seurat_obj) <- "dorothea"
seurat_obj <- FindNeighbors(seurat_obj, features = rownames(seurat_obj), k.param = 20, verbose = FALSE)

# 2. Construct the PAGA Graph (Cluster-to-Cluster Connectivity)
# We calculate the number of edges between cells of different clusters
# Access the graph directly using igraph functions (already available via Seurat)
adj_mat <- seurat_obj@graphs$dorothea_snn
clusters <- seurat_obj$seurat_clusters
n_clusters <- length(levels(clusters))

# Initialize connectivity matrix
connect_mat <- matrix(0, nrow = n_clusters, ncol = n_clusters)
rownames(connect_mat) <- levels(clusters)
colnames(connect_mat) <- levels(clusters)

# Fill matrix with edge weights (sum of shared NN links)
cluster_indices <- split(1:ncol(seurat_obj), clusters)

for (i in 1:n_clusters) {
  for (j in i:n_clusters) {
    if (i == j) next # Skip self-loops
    
    cells_i <- cluster_indices[[i]]
    cells_j <- cluster_indices[[j]]
    
    # Calculate total weight of connections between Cluster i and Cluster j
    sub_mat <- adj_mat[cells_i, cells_j]
    weight <- sum(sub_mat)
    
    # Normalize by cluster size to avoid bias towards large clusters
    norm_weight <- weight / (length(cells_i) * length(cells_j))
    
    connect_mat[i, j] <- norm_weight
    connect_mat[j, i] <- norm_weight
  }
}

# 3. Create Igraph Object
# Threshold edges to show only strong connections (top 20%)
threshold <- quantile(connect_mat[connect_mat > 0], 0.80) 
connect_mat[connect_mat < threshold] <- 0

# Use igraph::graph_from_adjacency_matrix explicitly
graph <- igraph::graph_from_adjacency_matrix(connect_mat, mode = "undirected", weighted = TRUE)

# Add Node Attributes (Size = Number of Cells)
igraph::V(graph)$size <- as.numeric(table(clusters))
igraph::V(graph)$label <- levels(clusters)

# 4. Plot PAGA Graph
p_paga <- ggraph(graph, layout = "fr") + # Fruchterman-Reingold layout
  # Edges (Thickness = Connectivity Strength)
  geom_edge_link(aes(width = weight), color = "grey50", alpha = 0.6) +
  scale_edge_width(range = c(0.5, 3)) +
  
  # Nodes (Clusters)
  geom_node_point(aes(size = size, color = label)) +
  scale_size(range = c(5, 15)) +
  
  # Labels
  geom_node_text(aes(label = label), fontface = "bold", color = "white") +
  
  # Styling
  # Use scales::hue_pal() for colors to avoid function errors
  scale_color_manual(values = scales::hue_pal()(n_clusters)) +
  labs(title = "Regulatory PAGA Graph (TF Connectivity)",
       subtitle = "Nodes = Clusters; Edges = Regulatory Similarity (DoRothEA)",
       caption = "Thick lines indicate strong regulatory transitions") +
  theme_void() +
  theme(legend.position = "none", plot.title = element_text(face="bold", size=14))

# Save
ggsave("Output_Figures/Supplementary_TF_PAGA_Graph.pdf", p_paga, width = 8, height = 8)
ggsave("Output_Figures/Supplementary_TF_PAGA_Graph.png", p_paga, width = 8, height = 8, dpi = 300)

print(p_paga)
cat("✓ PAGA Graph Saved. Look for connections from Cluster 5 (Stem) to 11/4/7.\n")

```

# Save
```{r, fig.height=12, fig.width=16}

saveRDS(seurat_obj, "temp_seurat_obj.rds")




```
