load libraries
# Data Processing
library(dplyr)
library(Seurat)
library(tibble)
library(tidyr)
library(stringr)
# Visualization
library(ggplot2)
library(ComplexHeatmap)
library(patchwork)
library(SCpubr)
# Regulatory Network Inference
library(decoupleR)
library(dorothea)
data(dorothea_hs, package = "dorothea")
library(tictoc)
Load Seurat Object
library(Seurat)
library(reticulate)
library(ggplot2)
# 1. Load Object
seurat_obj <- readRDS("temp_seurat_obj.rds")
Verification of TF
Availability
library(Seurat)
library(dplyr)
cat("=== CHECKING TF AVAILABILITY IN DOROTHEA ASSAY ===\n\n")
=== CHECKING TF AVAILABILITY IN DOROTHEA ASSAY ===
# 1. Identify the TF assay
available_assays <- Assays(seurat_obj)
tf_assay_name <- grep("dorothea|viper|TF", available_assays, value=TRUE)[1]
if(is.na(tf_assay_name)) {
stop("❌ No TF assay found! Please run DoRothEA first.")
}
cat("✓ Found TF assay:", tf_assay_name, "\n")
✓ Found TF assay: dorothea
# 2. Get all available TFs
all_tfs <- rownames(seurat_obj[[tf_assay_name]])
cat("✓ Total TFs inferred:", length(all_tfs), "\n\n")
✓ Total TFs inferred: 271
# 3. Define our Desired Panel (Literature-Based)
desired_tfs <- list(
"Stemness" = c("TCF7", "LEF1", "BACH2", "FOXO1", "MYC"),
"Malignancy" = c("TOX", "STAT5A", "STAT3", "STAT5B", "GATA3", "IKZF2"),
"Exhaustion" = c("EOMES", "TBX21", "PRDM1", "NFATC1", "BATF"),
"Treg_Like" = c("FOXP3", "IKZF2", "SATB1")
)
# 4. Check Presence
available_panel <- list()
missing_tfs <- c()
cat("--- PANEL VERIFICATION ---\n")
--- PANEL VERIFICATION ---
for(category in names(desired_tfs)) {
tfs <- desired_tfs[[category]]
present <- intersect(tfs, all_tfs)
missing <- setdiff(tfs, all_tfs)
available_panel[[category]] <- present
missing_tfs <- c(missing_tfs, missing)
cat(sprintf("%s: %d/%d found\n", category, length(present), length(tfs)))
if(length(present) > 0) cat(" ✅ Present:", paste(present, collapse=", "), "\n")
if(length(missing) > 0) cat(" ❌ Missing:", paste(missing, collapse=", "), "\n")
cat("\n")
}
Stemness: 5/5 found
✅ Present: TCF7, LEF1, BACH2, FOXO1, MYC
Malignancy: 4/6 found
✅ Present: STAT5A, STAT3, STAT5B, GATA3
❌ Missing: TOX, IKZF2
Exhaustion: 5/5 found
✅ Present: EOMES, TBX21, PRDM1, NFATC1, BATF
Treg_Like: 0/3 found
❌ Missing: FOXP3, IKZF2, SATB1
# 5. Summary
final_tf_list <- unlist(available_panel)
cat("=== FINAL TF LIST FOR TRAJECTORY ===\n")
=== FINAL TF LIST FOR TRAJECTORY ===
cat("Total TFs to plot:", length(final_tf_list), "\n")
Total TFs to plot: 14
print(final_tf_list)
Stemness1 Stemness2 Stemness3 Stemness4 Stemness5 Malignancy1 Malignancy2 Malignancy3 Malignancy4 Exhaustion1 Exhaustion2 Exhaustion3
"TCF7" "LEF1" "BACH2" "FOXO1" "MYC" "STAT5A" "STAT3" "STAT5B" "GATA3" "EOMES" "TBX21" "PRDM1"
Exhaustion4 Exhaustion5
"NFATC1" "BATF"
if("TOX" %in% final_tf_list) cat("\n✓ CRITICAL: TOX is available!\n")
if("TCF7" %in% final_tf_list) cat("✓ CRITICAL: TCF7 is available!\n")
✓ CRITICAL: TCF7 is available!
if("STAT5A" %in% final_tf_list) cat("✓ CRITICAL: STAT5A is available!\n")
✓ CRITICAL: STAT5A is available!
# 6. Save valid list for next step
saveRDS(final_tf_list, "valid_tf_panel.rds")
PAGA Gene Dynamics for
Malignant Sézary Cells (TF)
library(reticulate)
library(Seurat)
library(dplyr)
# Setup Python
sc <- import("scanpy")
ad <- import("anndata")
pl <- import("matplotlib.pyplot")
cat("=== TF REGULATORY DYNAMICS (5 TRAJECTORIES) ===\n\n")
=== TF REGULATORY DYNAMICS (5 TRAJECTORIES) ===
# 1. IDENTIFY TF ASSAY & CHECK AVAILABILITY
available_assays <- Assays(seurat_obj)
tf_assay_name <- grep("dorothea|viper|TF", available_assays, value=TRUE)[1]
if(is.na(tf_assay_name)) {
stop("❌ No TF assay found! Please run DoRothEA first.")
}
cat("✓ Using TF assay:", tf_assay_name, "\n")
✓ Using TF assay: dorothea
# Get all available TFs
all_tfs_in_object <- rownames(seurat_obj[[tf_assay_name]])
cat("✓ Total TFs inferred:", length(all_tfs_in_object), "\n\n")
✓ Total TFs inferred: 271
# 2. DEFINE & VALIDATE TF PANEL
desired_tfs <- list(
"Stemness" = c("TCF7", "LEF1", "BACH2", "FOXO1", "MYC"),
"Malignancy" = c("TOX", "STAT5A", "STAT3", "STAT5B", "GATA3", "IKZF2"),
"Exhaustion" = c("EOMES", "TBX21", "PRDM1", "NFATC1", "BATF"),
"Treg_Like" = c("FOXP3", "SATB1")
)
flat_valid_tfs <- c()
cat("--- TF PANEL VALIDATION ---\n")
--- TF PANEL VALIDATION ---
for(category in names(desired_tfs)) {
tfs <- desired_tfs[[category]]
present <- intersect(tfs, all_tfs_in_object)
missing <- setdiff(tfs, all_tfs_in_object)
if(length(present) > 0) {
flat_valid_tfs <- c(flat_valid_tfs, present)
cat(sprintf("✅ %s: %s\n", category, paste(present, collapse=", ")))
}
if(length(missing) > 0) {
cat(sprintf("❌ %s (Missing): %s\n", category, paste(missing, collapse=", ")))
}
}
✅ Stemness: TCF7, LEF1, BACH2, FOXO1, MYC
✅ Malignancy: STAT5A, STAT3, STAT5B, GATA3
❌ Malignancy (Missing): TOX, IKZF2
✅ Exhaustion: EOMES, TBX21, PRDM1, NFATC1, BATF
❌ Treg_Like (Missing): FOXP3, SATB1
if(length(flat_valid_tfs) == 0) stop("No TFs found!")
cat("\n✓ Using", length(flat_valid_tfs), "TFs\n\n")
✓ Using 14 TFs
# 3. PREPARE DATA (Malignant Only) - CLEAN RECREATION
malignant_clusters <- setdiff(unique(seurat_obj$seurat_clusters), c(3, 10))
seurat_malignant <- subset(seurat_obj, subset = seurat_clusters %in% malignant_clusters)
DefaultAssay(seurat_malignant) <- tf_assay_name
tf_matrix <- GetAssayData(seurat_malignant, assay = tf_assay_name, layer = "data")
tf_matrix <- tf_matrix[flat_valid_tfs, ]
# Convert to dense matrix and transpose
expr_matrix <- t(as.matrix(tf_matrix))
# Create clean obs dataframe
obs_df <- data.frame(
seurat_clusters = as.character(seurat_malignant$seurat_clusters),
row.names = colnames(seurat_malignant),
stringsAsFactors = FALSE
)
# Create AnnData using Python directly to avoid reticulate issues
cat("Creating AnnData object...\n")
Creating AnnData object...
# Export data to Python environment
py$expr_matrix <- expr_matrix
py$obs_df <- obs_df
py$var_names <- flat_valid_tfs
py_run_string("
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
# Create AnnData cleanly in Python
adata_tf = ad.AnnData(
X=r.expr_matrix,
obs=r.obs_df
)
# Set variable names
adata_tf.var_names = r.var_names
print(f'✓ AnnData created: {adata_tf.n_obs} cells x {adata_tf.n_vars} TFs')
")
Error in eval(parse(text = as_r_value(code)), envir = envir) :
RuntimeError: object 'var_names' not found
Run ]8;;rstudio:run:reticulate::py_last_error()`reticulate::py_last_error()`]8;; for details.
PAGA Gene Dynamics for
Malignant Sézary Cells (SCT)
---
title: "PAGA using TF Activity"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---


# load libraries
```{r setup, include=TRUE}
# Data Processing
library(dplyr)
library(Seurat)
library(tibble)
library(tidyr)
library(stringr)

# Visualization
library(ggplot2)
library(ComplexHeatmap)
library(patchwork)
library(SCpubr)

# Regulatory Network Inference
library(decoupleR)
library(dorothea)
data(dorothea_hs, package = "dorothea")
library(tictoc)


```

# Load Seurat Object 
```{r}
library(Seurat)
library(reticulate)
library(ggplot2)

# 1. Load Object
seurat_obj <- readRDS("temp_seurat_obj.rds")



```

# SUPPLEMENTARY FIGURE: PAGA Trajectory Analysis (via Reticulate/Scanpy)-TF based
```{r, fig.height=16, fig.width=16}
library(Seurat)
library(reticulate)
library(ggplot2)

# 1. FORCE USE of the specific python binary where scanpy is installed
# This overrides the default "auto" discovery which is failing you
Sys.setenv(RETICULATE_PYTHON = "/home/bioinfo/.virtualenvs/r-reticulate/bin/python")
use_python("/home/bioinfo/.virtualenvs/r-reticulate/bin/python", required = TRUE)

# 2. Verify it's working (Optional but good for debugging)
cat("Using Python:", py_config()$python, "\n")

# 3. Import Libraries
sc <- import("scanpy")
ad <- import("anndata")

cat("✓ Python libraries imported successfully!\n")

# 4. Prepare Data for PAGA
cat("Converting Seurat object to AnnData...\n")
tf_matrix <- t(GetAssayData(seurat_obj, assay = "dorothea", layer = "data"))
obs <- data.frame(row.names = colnames(seurat_obj), 
                  louvain = as.character(seurat_obj$seurat_clusters))

adata <- ad$AnnData(X = tf_matrix, obs = obs)

# 5. Run Scanpy Workflow
cat("Running PAGA...\n")
sc$pp$neighbors(adata, use_rep = "X", n_neighbors = 15L)
sc$tl$paga(adata, groups = "louvain")

# 6. Plot and Save
output_file <- "Output_Figures/Supplementary_PAGA_TF_Activity.png"
if(!dir.exists("Output_Figures")) dir.create("Output_Figures")

# CORRECT ORDER: Draw PAGA first to compute positions
sc$pl$paga(adata, threshold = 0.15, show = FALSE)

# Now UMAP can use those positions
sc$tl$umap(adata, init_pos = "paga")

# Plot PAGA with connectivity threshold
sc$pl$paga(adata, threshold = 0.15, show = FALSE, save = "_TF_Activity.png")

# Scanpy saves to "figures/" by default
if(file.exists("figures/paga_TF_Activity.png")){
  file.rename("figures/paga_TF_Activity.png", output_file)
  cat(sprintf("✓ PAGA plot saved to: %s\n", output_file))
} else {
  cat("Note: Check './figures/' directory for output\n")
}

# Display in notebook
if(file.exists(output_file)){
  knitr::include_graphics(output_file)
}

```

# SUPPLEMENTARY FIGURE: PAGA on Gene Expression (SCT Assay)
```{r, fig.height=16, fig.width=16}
library(Seurat)
library(reticulate)

# Use same python environment
Sys.setenv(RETICULATE_PYTHON = "/home/bioinfo/.virtualenvs/r-reticulate/bin/python")
use_python("/home/bioinfo/.virtualenvs/r-reticulate/bin/python", required = TRUE)

sc <- import("scanpy")
ad <- import("anndata")

cat("✓ Python libraries imported\n")

# 1. Extract SCT-normalized gene expression
DefaultAssay(seurat_obj) <- "SCT"

# Use highly variable features from SCT
hvgs <- VariableFeatures(seurat_obj)

# Get normalized data (log1p of corrected counts)
gene_matrix <- t(GetAssayData(seurat_obj, assay = "SCT", layer = "data")[hvgs, ])

cat(sprintf("Using %d highly variable genes from SCT assay\n", length(hvgs)))

# 2. Prepare AnnData object
obs <- data.frame(
  row.names = colnames(seurat_obj),
  louvain = as.character(seurat_obj$seurat_clusters)
)

adata_sct <- ad$AnnData(X = gene_matrix, obs = obs)

# 3. Run PAGA workflow
cat("Computing neighbors and PAGA on SCT-normalized expression...\n")
sc$pp$neighbors(adata_sct, use_rep = "X", n_neighbors = 15L)
sc$tl$paga(adata_sct, groups = "louvain")

# 4. Plot and save
output_file_sct <- "Output_Figures/Supplementary_PAGA_GeneExpression_SCT.png"

# Draw PAGA first to compute positions
sc$pl$paga(adata_sct, threshold = 0.15, show = FALSE)

# Compute UMAP initialized by PAGA
sc$tl$umap(adata_sct, init_pos = "paga")

# Final PAGA plot
sc$pl$paga(adata_sct, threshold = 0.15, show = FALSE, save = "_GeneExpression_SCT.png")

# Move from figures/ to Output_Figures/
if(file.exists("figures/paga_GeneExpression_SCT.png")){
  file.rename("figures/paga_GeneExpression_SCT.png", output_file_sct)
  cat(sprintf("✓ SCT Gene Expression PAGA saved to: %s\n", output_file_sct))
}

# Display
if(file.exists(output_file_sct)){
  knitr::include_graphics(output_file_sct)
}
```
# SUPPLEMENTARY FIGURE: Malignant-Only PAGA-SCT Assay
```{r, fig.height=16, fig.width=16}
# Run PAGA on malignant cells only
library(reticulate)
library(Seurat)

# 1. Subset to malignant cells
malignant_clusters <- setdiff(unique(seurat_obj$seurat_clusters), c(3, 10))
seurat_malignant <- subset(seurat_obj, subset = seurat_clusters %in% malignant_clusters)

# 2. Prepare for PAGA
sc <- import("scanpy")
ad <- import("anndata")

# Get expression matrix and metadata (malignant only)
expr_matrix <- t(GetAssayData(seurat_malignant, assay = "SCT", layer = "data"))
obs <- data.frame(
  row.names = colnames(seurat_malignant),
  louvain = as.character(seurat_malignant$seurat_clusters)
)

adata_malignant <- ad$AnnData(X = expr_matrix, obs = obs)

# 3. Run PAGA
sc$pp$neighbors(adata_malignant, use_rep = "X", n_neighbors = 15L)
sc$tl$paga(adata_malignant, groups = "louvain")

# 4. Plot
sc$pl$paga(adata_malignant, threshold = 0.15, show = FALSE)

# Compute UMAP initialized by PAGA
sc$tl$umap(adata_malignant, init_pos = "paga")

# Save figure
plt <- import("matplotlib.pyplot")
sc$pl$paga(adata_malignant, threshold = 0.15, show = FALSE)  # Draw the plot
plt$savefig("Output_Figures/PAGA_Malignant_Only.png", dpi = 300L, bbox_inches = "tight")
plt$close()

cat("✓ Malignant-only PAGA saved to Output_Figures/PAGA_Malignant_Only.png\n")


output_file <- "Output_Figures/PAGA_Malignant_Only.png"

if (file.exists(output_file)) {
  knitr::include_graphics(output_file)
}

```
# SUPPLEMENTARY FIGURE: Malignant-Only PAGA-TF Assay
```{r, fig.height=16, fig.width=16}
# Run PAGA on malignant cells using TF activity
library(reticulate)
library(Seurat)

cat("=== PAGA ANALYSIS USING TF ACTIVITY ===\n\n")

# 1. Subset to malignant cells
malignant_clusters <- setdiff(unique(seurat_obj$seurat_clusters), c(3, 10))
seurat_malignant <- subset(seurat_obj, subset = seurat_clusters %in% malignant_clusters)

cat("Malignant cells:", ncol(seurat_malignant), "\n")
cat("Clusters:", paste(sort(unique(seurat_malignant$seurat_clusters)), collapse = ", "), "\n\n")

# 2. Check for TF assay
available_assays <- Assays(seurat_malignant)
tf_assay_name <- NULL
possible_tf_names <- c("dorothea", "DoRothEA", "TF", "viper", "VIPER", "regulon")

for(name in possible_tf_names) {
  if(name %in% available_assays) {
    tf_assay_name <- name
    break
  }
}

if(is.null(tf_assay_name)) {
  cat("⚠️ No TF assay found. Available assays:", paste(available_assays, collapse = ", "), "\n")
  cat("Please run DoRothEA first. Stopping...\n")
  knitr::knit_exit()
}

cat("✓ Using TF assay:", tf_assay_name, "\n")
cat("TFs available:", nrow(seurat_malignant[[tf_assay_name]]), "\n\n")

# 3. Prepare for PAGA
sc <- import("scanpy")
ad <- import("anndata")

# Get TF activity matrix and metadata (malignant only)
DefaultAssay(seurat_malignant) <- tf_assay_name
tf_matrix <- GetAssayData(seurat_malignant, assay = tf_assay_name, layer = "data")

# Transpose: cells x TFs
expr_matrix <- t(as.matrix(tf_matrix))

obs <- data.frame(
  row.names = colnames(seurat_malignant),
  louvain = as.character(seurat_malignant$seurat_clusters)
)

adata_malignant_tf <- ad$AnnData(X = expr_matrix, obs = obs)

cat("AnnData created:", nrow(adata_malignant_tf$obs), "cells x", 
    ncol(adata_malignant_tf$X), "TFs\n\n")

# 4. Run PAGA on TF activity
cat("Computing PAGA on TF activity...\n")
sc$pp$neighbors(adata_malignant_tf, use_rep = "X", n_neighbors = 15L)
sc$tl$paga(adata_malignant_tf, groups = "louvain")

# 5. Plot
sc$pl$paga(adata_malignant_tf, threshold = 0.15, show = FALSE)

# Compute UMAP initialized by PAGA
cat("Computing PAGA-initialized UMAP...\n")
sc$tl$umap(adata_malignant_tf, init_pos = "paga")

# Save figure
plt <- import("matplotlib.pyplot")
sc$pl$paga(adata_malignant_tf, threshold = 0.15, show = FALSE)  # Draw the plot
plt$savefig("Output_Figures/PAGA_Malignant_TF.png", dpi = 300L, bbox_inches = "tight")
plt$close()

cat("✓ Malignant-only PAGA (TF) saved to Output_Figures/PAGA_Malignant_TF.png\n\n")

# Display
output_file <- "Output_Figures/PAGA_Malignant_TF.png"

if (file.exists(output_file)) {
  knitr::include_graphics(output_file)
}

cat("=== TF-BASED PAGA COMPLETE ===\n")

```


# Verification of TF Availability
```{r , fig.height=16, fig.width=16}
library(Seurat)
library(dplyr)

cat("=== CHECKING TF AVAILABILITY IN DOROTHEA ASSAY ===\n\n")

# 1. Identify the TF assay
available_assays <- Assays(seurat_obj)
tf_assay_name <- grep("dorothea|viper|TF", available_assays, value=TRUE)[1]

if(is.na(tf_assay_name)) {
  stop("❌ No TF assay found! Please run DoRothEA first.")
}
cat("✓ Found TF assay:", tf_assay_name, "\n")

# 2. Get all available TFs
all_tfs <- rownames(seurat_obj[[tf_assay_name]])
cat("✓ Total TFs inferred:", length(all_tfs), "\n\n")

# 3. Define our Desired Panel (Literature-Based)
desired_tfs <- list(
  "Stemness" = c("TCF7", "LEF1", "BACH2", "FOXO1", "MYC"),
  "Malignancy" = c("TOX", "STAT5A", "STAT3", "STAT5B", "GATA3", "IKZF2"),
  "Exhaustion" = c("EOMES", "TBX21", "PRDM1", "NFATC1", "BATF"),
  "Treg_Like" = c("FOXP3", "IKZF2", "SATB1") 
)

# 4. Check Presence
available_panel <- list()
missing_tfs <- c()

cat("--- PANEL VERIFICATION ---\n")
for(category in names(desired_tfs)) {
  tfs <- desired_tfs[[category]]
  present <- intersect(tfs, all_tfs)
  missing <- setdiff(tfs, all_tfs)
  
  available_panel[[category]] <- present
  missing_tfs <- c(missing_tfs, missing)
  
  cat(sprintf("%s: %d/%d found\n", category, length(present), length(tfs)))
  if(length(present) > 0) cat("  ✅ Present:", paste(present, collapse=", "), "\n")
  if(length(missing) > 0) cat("  ❌ Missing:", paste(missing, collapse=", "), "\n")
  cat("\n")
}

# 5. Summary
final_tf_list <- unlist(available_panel)
cat("=== FINAL TF LIST FOR TRAJECTORY ===\n")
cat("Total TFs to plot:", length(final_tf_list), "\n")
print(final_tf_list)

if("TOX" %in% final_tf_list) cat("\n✓ CRITICAL: TOX is available!\n")
if("TCF7" %in% final_tf_list) cat("✓ CRITICAL: TCF7 is available!\n")
if("STAT5A" %in% final_tf_list) cat("✓ CRITICAL: STAT5A is available!\n")

# 6. Save valid list for next step
saveRDS(final_tf_list, "valid_tf_panel.rds")

```


# PAGA Gene Dynamics for Malignant Sézary Cells (TF)
```{r , fig.height=14, fig.width=25}
library(reticulate)
library(Seurat)
library(dplyr)

# Setup Python
sc <- import("scanpy")
ad <- import("anndata")
pl <- import("matplotlib.pyplot")

cat("=== TF REGULATORY DYNAMICS (5 TRAJECTORIES) ===\n\n")

# 1. IDENTIFY TF ASSAY & CHECK AVAILABILITY
available_assays <- Assays(seurat_obj)
tf_assay_name <- grep("dorothea|viper|TF", available_assays, value=TRUE)[1]

if(is.na(tf_assay_name)) {
  stop("❌ No TF assay found! Please run DoRothEA first.")
}
cat("✓ Using TF assay:", tf_assay_name, "\n")

# Get all available TFs
all_tfs_in_object <- rownames(seurat_obj[[tf_assay_name]])
cat("✓ Total TFs inferred:", length(all_tfs_in_object), "\n\n")

# 2. DEFINE & VALIDATE TF PANEL
desired_tfs <- list(
  "Stemness" = c("TCF7", "LEF1", "BACH2", "FOXO1", "MYC"),
  "Malignancy" = c("TOX", "STAT5A", "STAT3", "STAT5B", "GATA3", "IKZF2"),
  "Exhaustion" = c("EOMES", "TBX21", "PRDM1", "NFATC1", "BATF"),
  "Treg_Like" = c("FOXP3", "SATB1")
)

flat_valid_tfs <- c()
cat("--- TF PANEL VALIDATION ---\n")
for(category in names(desired_tfs)) {
  tfs <- desired_tfs[[category]]
  present <- intersect(tfs, all_tfs_in_object)
  missing <- setdiff(tfs, all_tfs_in_object)
  
  if(length(present) > 0) {
    flat_valid_tfs <- c(flat_valid_tfs, present)
    cat(sprintf("✅ %s: %s\n", category, paste(present, collapse=", ")))
  }
  if(length(missing) > 0) {
    cat(sprintf("❌ %s (Missing): %s\n", category, paste(missing, collapse=", ")))
  }
}

if(length(flat_valid_tfs) == 0) stop("No TFs found!")
cat("\n✓ Using", length(flat_valid_tfs), "TFs\n\n")

# 3. PREPARE DATA (Malignant Only) - CLEAN RECREATION
malignant_clusters <- setdiff(unique(seurat_obj$seurat_clusters), c(3, 10))
seurat_malignant <- subset(seurat_obj, subset = seurat_clusters %in% malignant_clusters)

DefaultAssay(seurat_malignant) <- tf_assay_name
tf_matrix <- GetAssayData(seurat_malignant, assay = tf_assay_name, layer = "data")
tf_matrix <- tf_matrix[flat_valid_tfs, ]

# Convert to dense matrix and transpose
expr_matrix <- t(as.matrix(tf_matrix))

# Create clean obs dataframe
obs_df <- data.frame(
  seurat_clusters = as.character(seurat_malignant$seurat_clusters),
  row.names = colnames(seurat_malignant),
  stringsAsFactors = FALSE
)

# Create AnnData using Python directly to avoid reticulate issues
cat("Creating AnnData object...\n")

# Export data to Python environment
py$expr_matrix <- expr_matrix
py$obs_df <- obs_df
py$var_names <- flat_valid_tfs

py_run_string("
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np

# Create AnnData cleanly in Python
adata_tf = ad.AnnData(
    X=r.expr_matrix,
    obs=r.obs_df
)

# Set variable names
adata_tf.var_names = r.var_names

print(f'✓ AnnData created: {adata_tf.n_obs} cells x {adata_tf.n_vars} TFs')
")

# Retrieve the clean adata object back to R
adata_tf <- py$adata_tf

cat("✓ AnnData object ready\n\n")

# 4. RUN PAGA & PSEUDOTIME - ALL IN PYTHON
cat("Computing PAGA on TF activity...\n")

py_run_string("
import scanpy as sc

adata = adata_tf  # Use the clean object we just created

# Compute neighbors
sc.pp.neighbors(adata, n_neighbors=15, use_rep='X')
print('✓ Neighbors computed')

# Compute PAGA
sc.tl.paga(adata, groups='seurat_clusters')
print('✓ PAGA computed')

# Plot PAGA to generate positions
sc.pl.paga(adata, threshold=0.15, show=False)
print('✓ PAGA layout computed')

# Compute UMAP with PAGA initialization
sc.tl.umap(adata, init_pos='paga')
print('✓ UMAP computed')

# Compute force-directed layout
sc.tl.draw_graph(adata, layout='fr')
print('✓ Graph layout computed')
")

# Update R reference
adata_tf <- py$adata_tf

# ENHANCED ROOT SELECTION - FIXED
cat("\n--- ROOT CELL SELECTION ---\n")
stem_tfs_for_root <- intersect(c('TCF7', 'LEF1', 'BACH2', 'FOXO1'), flat_valid_tfs)
cat("Stem TFs available:", paste(stem_tfs_for_root, collapse=", "), "\n")

cluster_5_indices <- which(adata_tf$obs$seurat_clusters == "5")

if(length(cluster_5_indices) > 0 && length(stem_tfs_for_root) > 0) {
  stem_expr <- adata_tf$X[cluster_5_indices, ]
  if(inherits(stem_expr, "dgCMatrix")) stem_expr <- as.matrix(stem_expr)
  
  if(length(stem_tfs_for_root) > 1) {
    stem_indices <- match(stem_tfs_for_root, colnames(adata_tf$X))
    avg_expr <- rowMeans(stem_expr[, stem_indices])
  } else {
    avg_expr <- stem_expr[, match(stem_tfs_for_root, colnames(adata_tf$X))]
  }
  
  root_cell <- cluster_5_indices[which.max(avg_expr)] - 1L
  
  # Set iroot in Python
  py_run_string(sprintf("adata_tf.uns['iroot'] = %d", root_cell))
  
  cat("✓ Root cell:", root_cell, "(Cluster 5)\n")
} else {
  py_run_string("adata_tf.uns['iroot'] = 0")
  cat("⚠️ Using cell 0 as fallback\n")
}

# DIFFUSION PSEUDOTIME
cat("Computing diffusion pseudotime...\n")

py_run_string("
import scanpy as sc

# Compute diffusion map and pseudotime
sc.tl.diffmap(adata_tf, n_comps=15)
sc.tl.dpt(adata_tf)

print('✓ Diffusion pseudotime computed')
")

# Update R reference
adata_tf <- py$adata_tf

cat("✓ Pseudotime computation complete\n\n")

# 5. VISUALIZE PSEUDOTIME
cat("Plotting pseudotime...\n")
sc$pl$draw_graph(
  adata_tf,
  color = list('seurat_clusters', 'dpt_pseudotime'),
  legend_loc = 'on data',
  legend_fontsize = 10,
  cmap = 'plasma',
  size = 50,
  alpha = 0.8,
  show = FALSE
)

plt_file <- "Output_Figures/PAGA_TF_Pseudotime_5paths.png"
if(!dir.exists("Output_Figures")) dir.create("Output_Figures")
pl$savefig(plt_file, dpi = 300L, bbox_inches = "tight")
pl$close()
cat("✓ Pseudotime saved\n\n")
knitr::include_graphics(plt_file)

# 6. DEFINE 5 TRAJECTORIES
paths <- list(
  list('Stem_to_Terminal_via_Hub', c('5', '11', '1')),
  list('Stem_to_Exhausted_State', c('5', '7', '9')),
  list('Stem_to_Activated_Effector', c('5', '12', '0', '4')),
  list('Stem_via_Hub8_to_Terminal', c('5', '11', '8', '2')),
  list('Stem_to_Treg_Like', c('5', '11', '2', '6'))
)

cat("=== DEFINED TRAJECTORIES ===\n")
for(i in seq_along(paths)) {
  cat(sprintf("%d. %s: %s\n", i, paths[[i]][], paste(paths[[i]][], collapse = " → ")))[1][2]
}
cat("\n")

# 7. PLOT TF DYNAMICS
cat("Plotting TF dynamics...\n")

n_paths <- length(paths)
fig_width <- 5 * n_paths

fig_obj <- pl$subplots(
  ncols = as.integer(n_paths),
  figsize = tuple(fig_width, 12),
  gridspec_kw = dict(wspace = 0.12, left = 0.08)
)

axs_list <- if(n_paths == 1) list(fig_obj[]) else fig_obj[][2]
pl$subplots_adjust(top = 0.92, bottom = 0.1)

for(ipath in seq_along(paths)) {
  path_info <- paths[[ipath]]
  path_name <- path_info[][1]
  path_nodes <- path_info[][2]
  
  cat(sprintf("  Path %d: %s\n", ipath, path_name))
  
  current_ax <- if(n_paths == 1) axs_list[] else axs_list[[ipath]][1]
  
  sc$pl$paga_path(
    adata = adata_tf,
    nodes = path_nodes,
    keys = flat_valid_tfs,
    show_node_names = FALSE,
    ax = current_ax,
    ytick_fontsize = 10L,
    left_margin = 0.15,
    n_avg = 50L,
    annotations = list('dpt_pseudotime'),
    show_yticks = if(ipath == 1) TRUE else FALSE,
    show_colorbar = FALSE,
    color_map = 'RdBu_r',
    groups_key = 'seurat_clusters',
    color_maps_annotations = dict(dpt_pseudotime = 'viridis'),
    title = gsub("_", " ", path_name),
    use_raw = FALSE,
    show = FALSE
  )
}

# Save
output_file <- "Output_Figures/PAGA_TF_Dynamics_5Paths.png"
pl$savefig(output_file, dpi = 300L, bbox_inches = "tight")
pl$close()

cat("\n✓ TF dynamics saved:", output_file, "\n")
knitr::include_graphics(output_file)

cat("\n=== TF TRAJECTORY ANALYSIS COMPLETE ===\n")

```

# PAGA Gene Dynamics for Malignant Sézary Cells (SCT)
```{r , fig.height=16, fig.width=16}


```




