load libraries
Download required
cisTarget motif databases
# Load Bioconductor motif rankings (replaces feather files)
data(hg19_500bpUpstream_motifRanking_cispbOnly)
motifRankings <- hg19_500bpUpstream_motifRanking_cispbOnly
# Load motif annotations (motif -> TF mapping)
data(hg19_motifAnnotation_cisbpOnly)
motifAnnotations <- hg19_motifAnnotation_cisbpOnly
cat("Motif database loaded successfully\n")
Load Seurat Object
All_samples_Merged <- readRDS("/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/0-Seurat_RDS_Final_OBJECT/All_samples_Merged_with_Renamed_Clusters_Cell_state-03-12-2025.rds.rds")
DefaultAssay(All_samples_Merged) <- "RNA"
Idents(All_samples_Merged) <- "seurat_clusters"
cat("Seurat object loaded:", ncol(All_samples_Merged), "cells,", nrow(All_samples_Merged), "genes\n")
Step 1: Gene
filtering
# SCENIC-style gene filtering
minCountsPerGene <- 3 * 0.01 * ncol(expr_mat)
genesLeft <- rownames(expr_mat)[rowSums(expr_mat > 0) >= minCountsPerGene]
cat("Genes before filtering:", nrow(expr_mat), "\n")
cat("Genes after filtering:", length(genesLeft), "\n")
expr_mat_filtered <- expr_mat[genesLeft, ]
expr_mat_filtered_log <- log2(expr_mat_filtered + 1)
Step 2: Co-expression
network with GENIE3
# Run GENIE3 (this may take 30-60 minutes for large datasets)
cat("Running GENIE3 co-expression network inference...\n")
cat("This may take 30-60 minutes. Please wait...\n")
set.seed(123)
weightMatrix <- GENIE3(
as.matrix(expr_mat_filtered_log),
nCores = 10,
verbose = TRUE
)
# Convert to link list
linkList <- getLinkList(weightMatrix)
colnames(linkList) <- c("TF", "Target", "weight")
# Filter for meaningful links
linkList <- linkList[linkList$weight > 0.001, ]
cat("GENIE3 complete:", nrow(linkList), "TF-target links identified\n")
Step 3: Create TF
modules and run motif enrichment
# Create TF modules (co-expression clusters)
tfModules <- list()
for (tf in unique(linkList$TF)) {
targets <- linkList$Target[linkList$TF == tf]
if (length(targets) >= 10) { # minimum 10 targets per TF
tfModules[[tf]] <- targets
}
}
cat("TF modules created:", length(tfModules), "\n")
# Motif enrichment using Bioconductor rankings
cat("Running motif enrichment analysis...\n")
motif_enrichment_results <- cisTarget(
tfModules,
motifRankings = motifRankings,
motifAnnot = motifAnnotations,
nesThreshold = 3.0,
geneErrThreshold = 0.01,
nCores = 10
)
# Extract regulons (TF + high-confidence targets)
regulons <- list()
for (tf in names(tfModules)) {
if (tf %in% names(motif_enrichment_results)) {
# Use motif-enriched targets
regulons[[tf]] <- motif_enrichment_results[[tf]]
} else {
# Keep top co-expressed targets if no motif enrichment
regulons[[tf]] <- head(tfModules[[tf]], 50)
}
}
cat("Regulons created:", length(regulons), "\n")
Step 4: Score cells
with AUCell
# Build cell rankings
cat("Building cell rankings for AUCell...\n")
cells_rankings <- AUCell_buildRankings(
as.matrix(expr_mat_log),
nCores = 10,
plotStats = FALSE
)
# Calculate AUC per regulon per cell
cat("Calculating regulon activity (AUC) per cell...\n")
cells_AUC <- AUCell_calcAUC(
regulons,
cells_rankings,
nCores = 10
)
# Extract AUC matrix (regulons x cells)
regulonAUC <- getAUC(cells_AUC)
cat("AUCell complete:", nrow(regulonAUC), "regulons scored across", ncol(regulonAUC), "cells\n")
Add regulon activity to
Seurat object
# Add as new assay
All_samples_Merged[["Regulons"]] <- CreateAssayObject(
counts = regulonAUC,
data = regulonAUC
)
# Store regulon definitions
All_samples_Merged@misc$SCENIC_Regulons <- regulons
# Set as default assay
DefaultAssay(All_samples_Merged) <- "Regulons"
cat("Regulon activity added to Seurat object\n")
Define key Sézary
syndrome TFs
# Comprehensive list of Sézary-relevant TFs from literature + your heatmap
sezary_tfs <- c(
# Core malignant drivers
"MYC", "JUNB", "IRF4", "BCL11B", "RUNX3",
# Th2/cytokine signaling
"GATA3", "STAT3", "STAT6", "STAT5A", "STAT5B",
# T-cell identity & stemness
"TCF7", "LEF1", "TOX", "BCL6",
# Exhaustion & evasion
"NR4A1", "FOXP3", "IKZF2", "BATF",
# Th1/cytotoxic
"TBX21", "EOMES", "STAT4", "IRF8",
# Migration & homing
"KLF2", "KLF10", "ZEB1",
# Additional SS-specific
"AIRE", "CCND2", "BCL2"
)
# Check which TFs are available in your regulons
existing_tfs <- intersect(sezary_tfs, rownames(All_samples_Merged[["Regulons"]]))
cat("Available TFs in regulons:", length(existing_tfs), "/", length(sezary_tfs), "\n")
cat("TFs found:", paste(head(existing_tfs, 10), collapse = ", "), "...\n")
Visualization: Violin
plots
# Violin plots for key TFs (up to 16 at a time)
n_plot <- min(16, length(existing_tfs))
VlnPlot(
All_samples_Merged,
features = existing_tfs[1:n_plot],
ncol = 4,
pt.size = 0
) +
plot_annotation(title = "Regulon Activity by Cluster (Top Sézary TFs)")
Visualization: UMAP
FeaturePlots
# FeaturePlot for top 12 TFs
top_12_tfs <- head(existing_tfs, 12)
FeaturePlot(
All_samples_Merged,
features = top_12_tfs,
ncol = 3,
pt.size = 0.1,
cols = c("lightgrey", "red")
) +
plot_annotation(title = "Regulon Activity on UMAP")
Visualization: Heatmap
of regulon activity by cluster
# Aggregate regulon activity by cluster
cluster_regulons <- AggregateExpression(
All_samples_Merged,
assays = "Regulons",
group.by = "seurat_clusters",
return.seurat = FALSE
)
# Select available TFs
plot_tfs <- intersect(existing_tfs, rownames(cluster_regulons$Regulons))
cluster_regulons_subset <- cluster_regulons$Regulons[plot_tfs, ]
# Scale regulons (z-score across clusters)
cluster_scaled <- t(scale(t(cluster_regulons_subset)))
# Heatmap
pheatmap(
cluster_scaled,
cluster_rows = TRUE,
cluster_cols = FALSE,
breaks = seq(-3, 3, length.out = 101),
color = viridis(100),
main = "Sézary syndrome regulons by cluster (z-scored)",
fontsize_row = 7,
fontsize_col = 9,
angle_col = 45
)
Visualization:
DoHeatmap for top 15 regulons
# Get top 15 regulons by max cluster mean
cluster_means <- AggregateExpression(
All_samples_Merged,
assays = "Regulons",
group.by = "seurat_clusters"
)$Regulons
top_15 <- names(sort(apply(cluster_means, 1, max), decreasing = TRUE)[1:15])
DoHeatmap(
All_samples_Merged,
features = top_15,
group.by = "seurat_clusters"
) +
NoLegend() +
ggtitle("Top 15 Regulons by Maximum Cluster Activity")
Differential regulon
activity analysis
# Check your cluster IDs first
cat("Available clusters:\n")
print(table(Idents(All_samples_Merged)))
# Clusters 3 and 10 are non-malignant, rest (0-13) are malignant
All_samples_Merged$malignancy_group <- ifelse(
Idents(All_samples_Merged) %in% c("3", "10"), # Non-malignant clusters
"Non_Malignant",
"Malignant"
)
# Verify assignments
cat("\nMalignancy group distribution:\n")
print(table(All_samples_Merged$malignancy_group, Idents(All_samples_Merged)))
# Set identity to malignancy group
Idents(All_samples_Merged) <- "malignancy_group"
# Find differential regulons
de_regulons <- FindMarkers(
All_samples_Merged,
assay = "Regulons",
ident.1 = "Malignant",
ident.2 = "Non_Malignant",
test.use = "wilcox",
min.pct = 0.1,
logfc.threshold = 0.1
)
# Add gene names as column
de_regulons$gene <- rownames(de_regulons)
# Show top 20 differential regulons
cat("\nTop 20 differential regulons:\n")
top_de <- head(de_regulons[order(de_regulons$p_val_adj), ], 20)
print(top_de[, c("gene", "avg_log2FC", "p_val_adj", "pct.1", "pct.2")])
# Volcano plot
ggplot(de_regulons, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
geom_point(aes(color = p_val_adj < 0.05 & abs(avg_log2FC) > 0.25), size = 2, alpha = 0.6) +
scale_color_manual(values = c("TRUE" = "red", "FALSE" = "gray50"), name = "Significant") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red", alpha = 0.5) +
geom_vline(xintercept = c(-0.25, 0.25), linetype = "dashed", color = "blue", alpha = 0.5) +
geom_text_repel(
data = head(de_regulons[order(de_regulons$p_val_adj), ], 10),
aes(label = gene),
size = 3,
max.overlaps = 10
) +
theme_minimal() +
theme(legend.position = "top") +
labs(
title = "Differential Regulon Activity: Malignant vs Non-Malignant",
subtitle = "Malignant: clusters 0,1,2,4,5,6,7,8,9,11,12,13 | Non-malignant: clusters 3,10",
x = "Log2 Fold Change (Regulon AUC)",
y = "-log10(Adjusted P-value)"
)
# Reset identity to clusters
Idents(All_samples_Merged) <- "seurat_clusters"
Top regulon per
cluster
# Find top regulon per cluster
top_per_cluster <- data.frame()
for (cluster in unique(Idents(All_samples_Merged))) {
cluster_cells <- WhichCells(All_samples_Merged, idents = cluster)
cluster_auc <- regulonAUC[, cluster_cells, drop = FALSE]
# Calculate mean AUC per regulon in this cluster
mean_auc <- rowMeans(cluster_auc)
top_tf <- names(which.max(mean_auc))
top_auc <- max(mean_auc)
top_per_cluster <- rbind(
top_per_cluster,
data.frame(
cluster = cluster,
top_regulon = top_tf,
mean_AUC = round(top_auc, 3)
)
)
}
cat("\nTop regulon per cluster:\n")
print(top_per_cluster)
Save all results
# Save Seurat object with regulons
saveRDS(All_samples_Merged, "All_samples_Merged_with_Regulons_SCENIC.rds")
cat("Seurat object saved: All_samples_Merged_with_Regulons_SCENIC.rds\n")
# Export regulon activity matrix
write.csv(as.data.frame(regulonAUC), "Regulon_AUC_matrix.csv")
cat("AUC matrix saved: Regulon_AUC_matrix.csv\n")
# Export regulon definitions (TF -> target genes)
regulon_df <- data.frame(
TF = rep(names(regulons), sapply(regulons, length)),
Target = unlist(regulons),
row.names = NULL
)
write.csv(regulon_df, "Regulon_TF_Target_List.csv", row.names = FALSE)
cat("Regulon definitions saved: Regulon_TF_Target_List.csv\n")
# Export differential regulon analysis
if (exists("de_regulons")) {
write.csv(de_regulons, "Differential_Regulons_Malignant_vs_NonMalignant.csv")
cat("Differential analysis saved: Differential_Regulons_Malignant_vs_NonMalignant.csv\n")
}
# Export top regulon per cluster
write.csv(top_per_cluster, "Top_Regulon_Per_Cluster.csv", row.names = FALSE)
cat("Top regulons per cluster saved: Top_Regulon_Per_Cluster.csv\n")
cat("\n=== SCENIC Analysis Complete ===\n")
cat("Total regulons identified:", nrow(regulonAUC), "\n")
cat("Cells analyzed:", ncol(regulonAUC), "\n")
cat("Key Sézary TFs available:", length(existing_tfs), "\n")
---
title: "GRN Analysis (SCENIC)"
author: "Nasir Mahmood Abbasi"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---


# load libraries
```{r setup, include=FALSE}
# Load below libraries
library(SCENIC)
library(AUCell)
library(RcisTarget)
library(RcisTarget.hg19.motifDBs.cisbpOnly.500bp)
library(GENIE3)
library(Seurat)
library(dplyr)
library(ggplot2)
library(ggrepel)  # Added for volcano plot labels
library(pheatmap)
library(patchwork)
library(viridis)
```

# Download required cisTarget motif databases
```{r}
# Load Bioconductor motif rankings (replaces feather files)
data(hg19_500bpUpstream_motifRanking_cispbOnly)
motifRankings <- hg19_500bpUpstream_motifRanking_cispbOnly

# Load motif annotations (motif -> TF mapping)
data(hg19_motifAnnotation_cisbpOnly)
motifAnnotations <- hg19_motifAnnotation_cisbpOnly

cat("Motif database loaded successfully\n")
```

# Load Seurat Object 
```{r}

All_samples_Merged <- readRDS("/home/bioinfo/1-Thesis_Final_Year_2025/2025-Year3_Analysis/1-scRNA_RESULTS-19-11-2025/0-Seurat_RDS_Final_OBJECT/All_samples_Merged_with_Renamed_Clusters_Cell_state-03-12-2025.rds.rds")

DefaultAssay(All_samples_Merged) <- "RNA"
Idents(All_samples_Merged) <- "seurat_clusters"

cat("Seurat object loaded:", ncol(All_samples_Merged), "cells,", nrow(All_samples_Merged), "genes\n")  
```

# Extract expression matrix 
```{r}

# Seurat v5: Extract counts using LayerData
expr_mat <- LayerData(All_samples_Merged[["RNA"]], layer = "counts")
dim(expr_mat)

# Log-transform
expr_mat_log <- log2(expr_mat + 1)

cat("Expression matrix extracted:", nrow(expr_mat), "genes,", ncol(expr_mat), "cells\n")

```

# Step 1: Gene filtering
```{r}

# SCENIC-style gene filtering
minCountsPerGene <- 3 * 0.01 * ncol(expr_mat)
genesLeft <- rownames(expr_mat)[rowSums(expr_mat > 0) >= minCountsPerGene]

cat("Genes before filtering:", nrow(expr_mat), "\n")
cat("Genes after filtering:", length(genesLeft), "\n")

expr_mat_filtered <- expr_mat[genesLeft, ]
expr_mat_filtered_log <- log2(expr_mat_filtered + 1)


```

# Step 2: Co-expression network with GENIE3
```{r}
# Run GENIE3 (this may take 30-60 minutes for large datasets)
cat("Running GENIE3 co-expression network inference...\n")
cat("This may take 30-60 minutes. Please wait...\n")

set.seed(123)
weightMatrix <- GENIE3(
  as.matrix(expr_mat_filtered_log),
  nCores = 10,
  verbose = TRUE
)

# Convert to link list
linkList <- getLinkList(weightMatrix)
colnames(linkList) <- c("TF", "Target", "weight")

# Filter for meaningful links
linkList <- linkList[linkList$weight > 0.001, ]

cat("GENIE3 complete:", nrow(linkList), "TF-target links identified\n")

```

# Step 3: Create TF modules and run motif enrichment
```{r}

# Create TF modules (co-expression clusters)
tfModules <- list()
for (tf in unique(linkList$TF)) {
  targets <- linkList$Target[linkList$TF == tf]
  if (length(targets) >= 10) {  # minimum 10 targets per TF
    tfModules[[tf]] <- targets
  }
}

cat("TF modules created:", length(tfModules), "\n")

# Motif enrichment using Bioconductor rankings
cat("Running motif enrichment analysis...\n")

motif_enrichment_results <- cisTarget(
  tfModules,
  motifRankings = motifRankings,
  motifAnnot = motifAnnotations,
  nesThreshold = 3.0,
  geneErrThreshold = 0.01,
  nCores = 10
)

# Extract regulons (TF + high-confidence targets)
regulons <- list()
for (tf in names(tfModules)) {
  if (tf %in% names(motif_enrichment_results)) {
    # Use motif-enriched targets
    regulons[[tf]] <- motif_enrichment_results[[tf]]
  } else {
    # Keep top co-expressed targets if no motif enrichment
    regulons[[tf]] <- head(tfModules[[tf]], 50)
  }
}

cat("Regulons created:", length(regulons), "\n")

```

# Step 4: Score cells with AUCell
```{r}

# Build cell rankings
cat("Building cell rankings for AUCell...\n")
cells_rankings <- AUCell_buildRankings(
  as.matrix(expr_mat_log),
  nCores = 10,
  plotStats = FALSE
)

# Calculate AUC per regulon per cell
cat("Calculating regulon activity (AUC) per cell...\n")
cells_AUC <- AUCell_calcAUC(
  regulons,
  cells_rankings,
  nCores = 10
)

# Extract AUC matrix (regulons x cells)
regulonAUC <- getAUC(cells_AUC)

cat("AUCell complete:", nrow(regulonAUC), "regulons scored across", ncol(regulonAUC), "cells\n")

```


# Add regulon activity to Seurat object
```{r}



# Add as new assay
All_samples_Merged[["Regulons"]] <- CreateAssayObject(
  counts = regulonAUC,
  data = regulonAUC
)

# Store regulon definitions
All_samples_Merged@misc$SCENIC_Regulons <- regulons

# Set as default assay
DefaultAssay(All_samples_Merged) <- "Regulons"

cat("Regulon activity added to Seurat object\n")  


```


# Define key Sézary syndrome TFs
```{r}

# Comprehensive list of Sézary-relevant TFs from literature + your heatmap
sezary_tfs <- c(
  # Core malignant drivers
  "MYC", "JUNB", "IRF4", "BCL11B", "RUNX3", 
  
  # Th2/cytokine signaling
  "GATA3", "STAT3", "STAT6", "STAT5A", "STAT5B",
  
  # T-cell identity & stemness
  "TCF7", "LEF1", "TOX", "BCL6",
  
  # Exhaustion & evasion
  "NR4A1", "FOXP3", "IKZF2", "BATF",
  
  # Th1/cytotoxic
  "TBX21", "EOMES", "STAT4", "IRF8",
  
  # Migration & homing
  "KLF2", "KLF10", "ZEB1",
  
  # Additional SS-specific
  "AIRE", "CCND2", "BCL2"
)

# Check which TFs are available in your regulons
existing_tfs <- intersect(sezary_tfs, rownames(All_samples_Merged[["Regulons"]]))

cat("Available TFs in regulons:", length(existing_tfs), "/", length(sezary_tfs), "\n")
cat("TFs found:", paste(head(existing_tfs, 10), collapse = ", "), "...\n")


```


# Visualization: Violin plots
```{r}

# Violin plots for key TFs (up to 16 at a time)
n_plot <- min(16, length(existing_tfs))

VlnPlot(
  All_samples_Merged, 
  features = existing_tfs[1:n_plot], 
  ncol = 4, 
  pt.size = 0
) + 
  plot_annotation(title = "Regulon Activity by Cluster (Top Sézary TFs)")

```

# Visualization: UMAP FeaturePlots
```{r}
# FeaturePlot for top 12 TFs
top_12_tfs <- head(existing_tfs, 12)

FeaturePlot(
  All_samples_Merged, 
  features = top_12_tfs, 
  ncol = 3, 
  pt.size = 0.1,
  cols = c("lightgrey", "red")
) + 
  plot_annotation(title = "Regulon Activity on UMAP")
```

# Visualization: Heatmap of regulon activity by cluster
```{r}

# Aggregate regulon activity by cluster
cluster_regulons <- AggregateExpression(
  All_samples_Merged,
  assays = "Regulons",
  group.by = "seurat_clusters",
  return.seurat = FALSE
)

# Select available TFs
plot_tfs <- intersect(existing_tfs, rownames(cluster_regulons$Regulons))
cluster_regulons_subset <- cluster_regulons$Regulons[plot_tfs, ]

# Scale regulons (z-score across clusters)
cluster_scaled <- t(scale(t(cluster_regulons_subset)))

# Heatmap
pheatmap(
  cluster_scaled,
  cluster_rows = TRUE,
  cluster_cols = FALSE,
  breaks = seq(-3, 3, length.out = 101),
  color = viridis(100),
  main = "Sézary syndrome regulons by cluster (z-scored)",
  fontsize_row = 7,
  fontsize_col = 9,
  angle_col = 45
)

```


# Visualization: DoHeatmap for top 15 regulons
```{r}
# Get top 15 regulons by max cluster mean
cluster_means <- AggregateExpression(
  All_samples_Merged, 
  assays = "Regulons", 
  group.by = "seurat_clusters"
)$Regulons

top_15 <- names(sort(apply(cluster_means, 1, max), decreasing = TRUE)[1:15])

DoHeatmap(
  All_samples_Merged, 
  features = top_15, 
  group.by = "seurat_clusters"
) + 
  NoLegend() +
  ggtitle("Top 15 Regulons by Maximum Cluster Activity")

```


# Differential regulon activity analysis
```{r}

# Check your cluster IDs first
cat("Available clusters:\n")
print(table(Idents(All_samples_Merged)))

# Clusters 3 and 10 are non-malignant, rest (0-13) are malignant
All_samples_Merged$malignancy_group <- ifelse(
  Idents(All_samples_Merged) %in% c("3", "10"),  # Non-malignant clusters
  "Non_Malignant", 
  "Malignant"
)

# Verify assignments
cat("\nMalignancy group distribution:\n")
print(table(All_samples_Merged$malignancy_group, Idents(All_samples_Merged)))

# Set identity to malignancy group
Idents(All_samples_Merged) <- "malignancy_group"

# Find differential regulons
de_regulons <- FindMarkers(
  All_samples_Merged,
  assay = "Regulons",
  ident.1 = "Malignant",
  ident.2 = "Non_Malignant",
  test.use = "wilcox",
  min.pct = 0.1,
  logfc.threshold = 0.1
)

# Add gene names as column
de_regulons$gene <- rownames(de_regulons)

# Show top 20 differential regulons
cat("\nTop 20 differential regulons:\n")
top_de <- head(de_regulons[order(de_regulons$p_val_adj), ], 20)
print(top_de[, c("gene", "avg_log2FC", "p_val_adj", "pct.1", "pct.2")])

# Volcano plot
ggplot(de_regulons, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
  geom_point(aes(color = p_val_adj < 0.05 & abs(avg_log2FC) > 0.25), size = 2, alpha = 0.6) +
  scale_color_manual(values = c("TRUE" = "red", "FALSE" = "gray50"), name = "Significant") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red", alpha = 0.5) +
  geom_vline(xintercept = c(-0.25, 0.25), linetype = "dashed", color = "blue", alpha = 0.5) +
  geom_text_repel(
    data = head(de_regulons[order(de_regulons$p_val_adj), ], 10),
    aes(label = gene),
    size = 3,
    max.overlaps = 10
  ) +
  theme_minimal() +
  theme(legend.position = "top") +
  labs(
    title = "Differential Regulon Activity: Malignant vs Non-Malignant",
    subtitle = "Malignant: clusters 0,1,2,4,5,6,7,8,9,11,12,13 | Non-malignant: clusters 3,10",
    x = "Log2 Fold Change (Regulon AUC)", 
    y = "-log10(Adjusted P-value)"
  )

# Reset identity to clusters
Idents(All_samples_Merged) <- "seurat_clusters"

```

# Top regulon per cluster
```{r}
# Find top regulon per cluster
top_per_cluster <- data.frame()

for (cluster in unique(Idents(All_samples_Merged))) {
  cluster_cells <- WhichCells(All_samples_Merged, idents = cluster)
  cluster_auc <- regulonAUC[, cluster_cells, drop = FALSE]
  
  # Calculate mean AUC per regulon in this cluster
  mean_auc <- rowMeans(cluster_auc)
  top_tf <- names(which.max(mean_auc))
  top_auc <- max(mean_auc)
  
  top_per_cluster <- rbind(
    top_per_cluster, 
    data.frame(
      cluster = cluster, 
      top_regulon = top_tf,
      mean_AUC = round(top_auc, 3)
    )
  )
}

cat("\nTop regulon per cluster:\n")
print(top_per_cluster)

```

# Multi-panel publication figure
```{r}
# Panel 1: Top 6 TFs FeaturePlot
p1 <- FeaturePlot(
  All_samples_Merged, 
  features = head(existing_tfs, 6), 
  ncol = 2, 
  pt.size = 0.1
) + 
  plot_annotation(title = "A. Regulon Activity on UMAP")

# Panel 2: Top 6 TFs violin by cluster
p2 <- VlnPlot(
  All_samples_Merged, 
  features = head(existing_tfs, 6), 
  ncol = 2, 
  pt.size = 0
) + 
  plot_annotation(title = "B. Regulon Activity by Cluster")

# Panel 3: Heatmap of top 15 regulons
p3 <- DoHeatmap(
  All_samples_Merged, 
  features = top_15, 
  group.by = "seurat_clusters"
) + 
  NoLegend() +
  ggtitle("C. Top 15 Regulons Heatmap")

# Combine panels
combined_plot <- p1 / p2 / p3
print(combined_plot)

# Save figure
ggsave(
  "Sezary_SCENIC_Figure.pdf", 
  plot = combined_plot, 
  width = 16, 
  height = 22,
  limitsize = FALSE
)

cat("\nFigure saved as: Sezary_SCENIC_Figure.pdf\n")

```

# Save all results
```{r, fig.width=12, fig.height=6}
# Save Seurat object with regulons
saveRDS(All_samples_Merged, "All_samples_Merged_with_Regulons_SCENIC.rds")
cat("Seurat object saved: All_samples_Merged_with_Regulons_SCENIC.rds\n")

# Export regulon activity matrix
write.csv(as.data.frame(regulonAUC), "Regulon_AUC_matrix.csv")
cat("AUC matrix saved: Regulon_AUC_matrix.csv\n")

# Export regulon definitions (TF -> target genes)
regulon_df <- data.frame(
  TF = rep(names(regulons), sapply(regulons, length)),
  Target = unlist(regulons),
  row.names = NULL
)
write.csv(regulon_df, "Regulon_TF_Target_List.csv", row.names = FALSE)
cat("Regulon definitions saved: Regulon_TF_Target_List.csv\n")

# Export differential regulon analysis
if (exists("de_regulons")) {
  write.csv(de_regulons, "Differential_Regulons_Malignant_vs_NonMalignant.csv")
  cat("Differential analysis saved: Differential_Regulons_Malignant_vs_NonMalignant.csv\n")
}

# Export top regulon per cluster
write.csv(top_per_cluster, "Top_Regulon_Per_Cluster.csv", row.names = FALSE)
cat("Top regulons per cluster saved: Top_Regulon_Per_Cluster.csv\n")

cat("\n=== SCENIC Analysis Complete ===\n")
cat("Total regulons identified:", nrow(regulonAUC), "\n")
cat("Cells analyzed:", ncol(regulonAUC), "\n")
cat("Key Sézary TFs available:", length(existing_tfs), "\n")

```



