1 load libraries

# Data Processing
library(dplyr)
library(Seurat)
library(tibble)
library(tidyr)
library(stringr)

# Visualization
library(ggplot2)
library(ComplexHeatmap)
library(patchwork)
library(SCpubr)

# Regulatory Network Inference
library(decoupleR)
library(dorothea)
data(dorothea_hs, package = "dorothea")
library(tictoc)

2 Load Seurat Object


# Load your Seurat Object
seurat_obj <- readRDS("Output_Objects/Seurat_Object_With_TF_Activity.rds")

Idents(seurat_obj) <- "seurat_clusters"
print("Object Loaded.")
[1] "Object Loaded."

2.1 Run this code block to restore activities instantly:


# If 'activities' is missing but 'dorothea' assay exists, reconstruct it:
if (!exists("activities") && "dorothea" %in% names(seurat_obj@assays)) {
  
  print("Reconstructing 'activities' dataframe from Seurat object...")
  
  # Extract the matrix (Seurat v5 uses 'layer' instead of 'slot')
  # Since you ran ScaleData, we use 'scale.data'
  mat <- GetAssayData(seurat_obj, assay = "dorothea", layer = "scale.data")
  
  # Convert to long format (what SCpubr needs)
  activities <- as.data.frame(mat) %>%
    rownames_to_column("source") %>%
    pivot_longer(cols = -source, names_to = "condition", values_to = "score") %>%
    mutate(statistic = "norm_wmean") # SCpubr requires this column
    
  print("Activities dataframe restored!")
}
[1] "Reconstructing 'activities' dataframe from Seurat object..."
[1] "Activities dataframe restored!"

2.2 SCpubr Heatmap Visualization-Heatmap of averaged scores

library(SCpubr)
# General heatmap (Top Variable TFs)
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities)
p1 <- out$heatmaps$average_scores
print(p1)

# 1. Save as PDF
pdf("Output_Figures/SCpubr_Heatmap_Default.pdf", width = 10, height = 8)
print(p1) # ComplexHeatmap requires explicit print() inside pdf()
dev.off()
png 
  2 
# 2. Save as PNG
png("Output_Figures/SCpubr_Heatmap_Default.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p1)
dev.off()
png 
  2 

2.3 Set the scale limits


out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5)
p2 <- out$heatmaps$average_scores
print(p2)

# Save ComplexHeatmap properly
pdf("Output_Figures/SCpubr_Heatmap_Scaled.pdf", width = 10, height = 8)
print(p2)
dev.off()
png 
  2 
png("Output_Figures/SCpubr_Heatmap_Scaled.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p2)
dev.off()
png 
  2 

2.4 Enforce Symmetry (Best for Manuscript)


out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5,
                                 enforce_symmetry = TRUE)
p3 <- out$heatmaps$average_scores
print(p3)

pdf("Output_Figures/SCpubr_Heatmap_Symmetric.pdf", width = 10, height = 8)
print(p3)
dev.off()
png 
  2 
png("Output_Figures/SCpubr_Heatmap_Symmetric.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p3)
dev.off()
png 
  2 

2.5 Top 40 TFs

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 40)
p4 <- out$heatmaps$average_scores
print(p4)

pdf("Output_Figures/SCpubr_Heatmap_Top40.pdf", width = 14, height = 6)
print(p4)
dev.off()
png 
  2 
png("Output_Figures/SCpubr_Heatmap_Top40.png", width = 14 * 300, height = 6 * 300, res = 300)
print(p4)
dev.off()
png 
  2 

2.6 Top 100 TFs (Figure A for Manuscript)


out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 100)
p5 <- out$heatmaps$average_scores
print(p5)

pdf("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.pdf", width = 32, height = 12)
print(p5)
dev.off()
png 
  2 
png("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.png", width = 32 * 300, height = 12 * 300, res = 300)
print(p5)
dev.off()
png 
  2 

3 Differential TF Activity (Malignant vs. Normal)


# Define Comparison: Clusters 3 & 10 (Normal) vs Rest (Malignant)
non_malignant_clusters <- c(3, 10)
seurat_obj$Condition <- ifelse(seurat_obj$seurat_clusters %in% non_malignant_clusters, "Non-Malignant", "Malignant")

# Perform Differential Analysis on TF Activity
DefaultAssay(seurat_obj) <- "dorothea"
Idents(seurat_obj) <- "Condition"

print("Running FindMarkers on TF Activity...")
[1] "Running FindMarkers on TF Activity..."
diff_tfs <- FindMarkers(seurat_obj, 
                        ident.1 = "Malignant", 
                        ident.2 = "Non-Malignant", 
                        logfc.threshold = 0, # Get all for volcano
                        min.pct = 0)

# Add gene column for labeling
diff_tfs$gene <- rownames(diff_tfs)

# Save Results
write.csv(diff_tfs, "Output_Tables/Differential_TF_Activity_Malignant_vs_Normal.csv")
print("Differential analysis complete.")
[1] "Differential analysis complete."

4 Figure C: Volcano Plot (Loss of Homeostasis)

# Highlight key drivers mentioned in text
highlight_tfs <- c("FOXO1", "MYC", "E2F1", "E2F4", "FOXM1", "RELA", "IRF1", "STAT1")

p_volcano <- SCpubr::do_VolcanoPlot(sample = seurat_obj,
                                    de_genes = diff_tfs
                                   )

ggsave("Output_Figures/Figure_3.16C_Volcano_TF_Activity.pdf", plot = p_volcano, width = 8, height = 6)
ggsave("Output_Figures/Figure_3.16C_Volcano_TF_Activity.png", plot = p_volcano, width = 8, height = 6, dpi = 300)
print(p_volcano)

5 Updated Figure C: EnhancedVolcano

library(EnhancedVolcano)

# Highlight key drivers mentioned in text
highlight_tfs <- c("FOXO1", "MYC", "E2F1", "E2F4", "FOXM1", "RELA", "IRF1", "STAT1", "TOX", "GATA3")

# Create the EnhancedVolcano Plot
p_volcano <- EnhancedVolcano(diff_tfs,
    lab = rownames(diff_tfs),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    
    title = 'Differential TF Activity: Malignant vs. Non-Malignant',
    subtitle = 'DecoupleR Inferred Activity',
    pCutoff = 1e-5,
    FCcutoff = 0.5,
    pointSize = 3.0,
    labSize = 5.0,
    colAlpha = 0.8,
    legendPosition = 'right',
    legendLabSize = 12,
    legendIconSize = 4.0,
    drawConnectors = TRUE, # Draw lines to labels to avoid overlap
    widthConnectors = 0.5,
    colConnectors = 'grey30',
    # Custom Colors: Down (Blue), Up (Red), NS (Grey)
    col = c("grey30", "forestgreen", "royalblue", "firebrick2")
)

# Print
print(p_volcano)


# Save
ggsave("Output_Figures/Figure_3.16C_EnhancedVolcano_TF_Activity.pdf", plot = p_volcano, width = 10, height = 8)
ggsave("Output_Figures/Figure_3.16C_EnhancedVolcano_TF_Activity.png", plot = p_volcano, width = 10, height = 8, dpi = 300)

6 Figure D: Mixed Feature Plots (Activity vs Expression)


# We manually construct this to mix Assays

# Part 1: TF Activity Plots (Assay: dorothea)
DefaultAssay(seurat_obj) <- "dorothea"

p1 <- FeaturePlot(seurat_obj, features = "FOXO1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("FOXO1 Activity (Homeostasis)")
p2 <- FeaturePlot(seurat_obj, features = "RELA", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("RELA Activity (Inflammatory)")
p3 <- FeaturePlot(seurat_obj, features = "IRF1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("IRF1 Activity (IFN-Response)")
p4 <- FeaturePlot(seurat_obj, features = "FOXM1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("FOXM1 Activity (Proliferation)")

# Part 2: Gene Expression Plots (Assay: SCT/RNA)
DefaultAssay(seurat_obj) <- "SCT"

p5 <- FeaturePlot(seurat_obj, features = "HMGA2", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "darkblue")) + ggtitle("HMGA2 Expression (Stem-like)")
p6 <- FeaturePlot(seurat_obj, features = "SOX4", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "darkblue")) + ggtitle("SOX4 Expression (Stem-like)")

# Combine
final_figure_D <- (p1 | p2 | p3) / (p4 | p5 | p6) + 
                  plot_annotation(title = "Figure 3.16D: Key Drivers (Red=Activity, Blue=Expression)")

ggsave("Output_Figures/Figure_3.16D_Mixed_Features.pdf", plot = final_figure_D, width = 14, height = 10)
ggsave("Output_Figures/Figure_3.16D_Mixed_Features.png", plot = final_figure_D, width = 14, height = 10, dpi = 300)
print(final_figure_D)

7 Figure E (ComplexHeatmap) chunk

8 Final Save

print("Analysis pipeline complete. All figures and objects saved in Output_Figures folder.")
[1] "Analysis pipeline complete. All figures and objects saved in Output_Figures folder."
---
title: "TF Activity Inference Analysis (DecoupleR + DoRothEA) Visualization"
author: "Nasir Mahmood Abbasi"
date: "`r Sys.Date()`"
output:
  html_notebook:
    number_sections: true
    toc: true
    toc_float:
      collapsed: true
    theme: journal
---


# load libraries
```{r setup, include=TRUE}
# Data Processing
library(dplyr)
library(Seurat)
library(tibble)
library(tidyr)
library(stringr)

# Visualization
library(ggplot2)
library(ComplexHeatmap)
library(patchwork)
library(SCpubr)

# Regulatory Network Inference
library(decoupleR)
library(dorothea)
data(dorothea_hs, package = "dorothea")
library(tictoc)


```

# Load Seurat Object 
```{r}

# Load your Seurat Object
seurat_obj <- readRDS("Output_Objects/Seurat_Object_With_TF_Activity.rds")

Idents(seurat_obj) <- "seurat_clusters"
print("Object Loaded.")
```

## Run this code block to restore activities instantly:
```{r}

# If 'activities' is missing but 'dorothea' assay exists, reconstruct it:
if (!exists("activities") && "dorothea" %in% names(seurat_obj@assays)) {
  
  print("Reconstructing 'activities' dataframe from Seurat object...")
  
  # Extract the matrix (Seurat v5 uses 'layer' instead of 'slot')
  # Since you ran ScaleData, we use 'scale.data'
  mat <- GetAssayData(seurat_obj, assay = "dorothea", layer = "scale.data")
  
  # Convert to long format (what SCpubr needs)
  activities <- as.data.frame(mat) %>%
    rownames_to_column("source") %>%
    pivot_longer(cols = -source, names_to = "condition", values_to = "score") %>%
    mutate(statistic = "norm_wmean") # SCpubr requires this column
    
  print("Activities dataframe restored!")
}
```
## SCpubr Heatmap Visualization-Heatmap of averaged scores
```{r, fig.height=8, fig.width=10}
library(SCpubr)
# General heatmap (Top Variable TFs)
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities)
p1 <- out$heatmaps$average_scores
print(p1)

# 1. Save as PDF
pdf("Output_Figures/SCpubr_Heatmap_Default.pdf", width = 10, height = 8)
print(p1) # ComplexHeatmap requires explicit print() inside pdf()
dev.off()

# 2. Save as PNG
png("Output_Figures/SCpubr_Heatmap_Default.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p1)
dev.off()
```


## Set the scale limits
```{r, fig.height=8, fig.width=10}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5)
p2 <- out$heatmaps$average_scores
print(p2)

# Save ComplexHeatmap properly
pdf("Output_Figures/SCpubr_Heatmap_Scaled.pdf", width = 10, height = 8)
print(p2)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Scaled.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p2)
dev.off()


```

## Enforce Symmetry (Best for Manuscript)
```{r, fig.height=8, fig.width=10}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 min.cutoff = -1.5,
                                 max.cutoff = 1.5,
                                 enforce_symmetry = TRUE)
p3 <- out$heatmaps$average_scores
print(p3)

pdf("Output_Figures/SCpubr_Heatmap_Symmetric.pdf", width = 10, height = 8)
print(p3)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Symmetric.png", width = 10 * 300, height = 8 * 300, res = 300)
print(p3)
dev.off()

```

## Top 40 TFs
```{r, fig.height=6, fig.width=14}
out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 40)
p4 <- out$heatmaps$average_scores
print(p4)

pdf("Output_Figures/SCpubr_Heatmap_Top40.pdf", width = 14, height = 6)
print(p4)
dev.off()

png("Output_Figures/SCpubr_Heatmap_Top40.png", width = 14 * 300, height = 6 * 300, res = 300)
print(p4)
dev.off()
```


## Top 100 TFs (Figure A for Manuscript)
```{r, fig.height=12, fig.width=32}

out <- SCpubr::do_TFActivityPlot(sample = seurat_obj,
                                 activities = activities,
                                 n_tfs = 100)
p5 <- out$heatmaps$average_scores
print(p5)

pdf("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.pdf", width = 32, height = 12)
print(p5)
dev.off()

png("Output_Figures/Figure_3.16A_Global_TF_Heatmap_Top100.png", width = 32 * 300, height = 12 * 300, res = 300)
print(p5)
dev.off()
```


# Differential TF Activity (Malignant vs. Normal)
```{r, fig.height=12, fig.width=16}

# Define Comparison: Clusters 3 & 10 (Normal) vs Rest (Malignant)
non_malignant_clusters <- c(3, 10)
seurat_obj$Condition <- ifelse(seurat_obj$seurat_clusters %in% non_malignant_clusters, "Non-Malignant", "Malignant")

# Perform Differential Analysis on TF Activity
DefaultAssay(seurat_obj) <- "dorothea"
Idents(seurat_obj) <- "Condition"

print("Running FindMarkers on TF Activity...")
diff_tfs <- FindMarkers(seurat_obj, 
                        ident.1 = "Malignant", 
                        ident.2 = "Non-Malignant", 
                        logfc.threshold = 0, # Get all for volcano
                        min.pct = 0)

# Add gene column for labeling
diff_tfs$gene <- rownames(diff_tfs)

# Save Results
write.csv(diff_tfs, "Output_Tables/Differential_TF_Activity_Malignant_vs_Normal.csv")
print("Differential analysis complete.")

```


# Figure C: Volcano Plot (Loss of Homeostasis)
```{r, fig.height=6, fig.width=8}
# Highlight key drivers mentioned in text
highlight_tfs <- c("FOXO1", "MYC", "E2F1", "E2F4", "FOXM1", "RELA", "IRF1", "STAT1")

p_volcano <- SCpubr::do_VolcanoPlot(sample = seurat_obj,
                                    de_genes = diff_tfs
                                   )

ggsave("Output_Figures/Figure_3.16C_Volcano_TF_Activity.pdf", plot = p_volcano, width = 8, height = 6)
ggsave("Output_Figures/Figure_3.16C_Volcano_TF_Activity.png", plot = p_volcano, width = 8, height = 6, dpi = 300)
print(p_volcano)
```


# Updated Figure C: EnhancedVolcano
```{r, fig.height=12, fig.width=16}
library(EnhancedVolcano)

# Highlight key drivers mentioned in text
highlight_tfs <- c("FOXO1", "MYC", "E2F1", "E2F4", "FOXM1", "RELA", "IRF1", "STAT1", "TOX", "GATA3")

# Create the EnhancedVolcano Plot
p_volcano <- EnhancedVolcano(diff_tfs,
    lab = rownames(diff_tfs),
    x = 'avg_log2FC',
    y = 'p_val_adj',
    
    title = 'Differential TF Activity: Malignant vs. Non-Malignant',
    subtitle = 'DecoupleR Inferred Activity',
    pCutoff = 1e-5,
    FCcutoff = 0.5,
    pointSize = 3.0,
    labSize = 5.0,
    colAlpha = 0.8,
    legendPosition = 'right',
    legendLabSize = 12,
    legendIconSize = 4.0,
    drawConnectors = TRUE, # Draw lines to labels to avoid overlap
    widthConnectors = 0.5,
    colConnectors = 'grey30',
    # Custom Colors: Down (Blue), Up (Red), NS (Grey)
    col = c("grey30", "forestgreen", "royalblue", "firebrick2")
)

# Print
print(p_volcano)

# Save
ggsave("Output_Figures/Figure_3.16C_EnhancedVolcano_TF_Activity.pdf", plot = p_volcano, width = 10, height = 8)
ggsave("Output_Figures/Figure_3.16C_EnhancedVolcano_TF_Activity.png", plot = p_volcano, width = 10, height = 8, dpi = 300)
```








# Figure D: Mixed Feature Plots (Activity vs Expression)
```{r, fig.height=12, fig.width=16}

# We manually construct this to mix Assays

# Part 1: TF Activity Plots (Assay: dorothea)
DefaultAssay(seurat_obj) <- "dorothea"

p1 <- FeaturePlot(seurat_obj, features = "FOXO1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("FOXO1 Activity (Homeostasis)")
p2 <- FeaturePlot(seurat_obj, features = "RELA", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("RELA Activity (Inflammatory)")
p3 <- FeaturePlot(seurat_obj, features = "IRF1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("IRF1 Activity (IFN-Response)")
p4 <- FeaturePlot(seurat_obj, features = "FOXM1", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "firebrick")) + ggtitle("FOXM1 Activity (Proliferation)")

# Part 2: Gene Expression Plots (Assay: SCT/RNA)
DefaultAssay(seurat_obj) <- "SCT"

p5 <- FeaturePlot(seurat_obj, features = "HMGA2", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "darkblue")) + ggtitle("HMGA2 Expression (Stem-like)")
p6 <- FeaturePlot(seurat_obj, features = "SOX4", order = T, reduction = "umap") + 
      scale_color_gradientn(colors = c("grey90", "darkblue")) + ggtitle("SOX4 Expression (Stem-like)")

# Combine
final_figure_D <- (p1 | p2 | p3) / (p4 | p5 | p6) + 
                  plot_annotation(title = "Figure 3.16D: Key Drivers (Red=Activity, Blue=Expression)")

ggsave("Output_Figures/Figure_3.16D_Mixed_Features.pdf", plot = final_figure_D, width = 14, height = 10)
ggsave("Output_Figures/Figure_3.16D_Mixed_Features.png", plot = final_figure_D, width = 14, height = 10, dpi = 300)
print(final_figure_D)
```




# Figure E (ComplexHeatmap) chunk
```{r, fig.height=6, fig.width=10}

library(ComplexHeatmap)
library(circlize)
library(Matrix)

# Curated list of Sezary drivers
literature_tfs <- c(
  "GATA3", "TBX21", "RORC", "FOXP3",
  "TOX", "SATB1", "IKZF2",
  "STAT3", "STAT5B", "STAT6",
  "RELA", "NFKB1", "MYC", "E2F1"
)

# Keep only TFs present in the dorothea assay
available_tfs <- intersect(literature_tfs, rownames(seurat_obj[["dorothea"]]))
if (length(available_tfs) < 5) stop("Too few TFs found in dorothea assay. Check TF naming / assay content.")

# Extract TF activity matrix (TFs x cells)
# Use scale.data if available; otherwise fall back to data layer.
mat_scaled <- tryCatch(
  SeuratObject::GetAssayData(seurat_obj, assay = "dorothea", layer = "scale.data"),
  error = function(e) NULL
)
mat_data <- SeuratObject::GetAssayData(seurat_obj, assay = "dorothea", layer = "data")

mat_use <- if (!is.null(mat_scaled) && nrow(mat_scaled) > 0) mat_scaled else mat_data
mat_use <- mat_use[available_tfs, , drop = FALSE]

# Average per cluster (TF x cluster)
clusters <- as.factor(seurat_obj$seurat_clusters)
avg_mat <- sapply(levels(clusters), function(cl) {
  Matrix::rowMeans(mat_use[, clusters == cl, drop = FALSE])
})
colnames(avg_mat) <- levels(clusters)

# Optional: z-score across clusters (helps readability if you used raw 'data' instead of 'scale.data')
avg_mat_z <- t(scale(t(avg_mat)))
avg_mat_z[is.na(avg_mat_z)] <- 0

# Colors
col_fun <- circlize::colorRamp2(c(-2, 0, 2), c("#313695", "white", "#A50026"))

ht <- Heatmap(
  avg_mat_z,
  name = "TF activity (z)",
  col = col_fun,
  cluster_rows = TRUE,
  cluster_columns = TRUE,
  show_row_dend = TRUE,
  show_column_dend = TRUE,
  row_names_gp = grid::gpar(fontsize = 10),
  column_names_gp = grid::gpar(fontsize = 10),
  column_title = "Literature-validated Sézary TF modules (DoRothEA/decoupleR)",
  heatmap_legend_param = list(direction = "vertical")
)

# Draw to notebook
draw(ht)

# Save PDF (vector)
pdf("Output_Figures/Figure_3.16E_Literature_TF_Heatmap_ComplexHeatmap.pdf", width = 10, height = 8)
draw(ht)
dev.off()

# Save PNG (raster, publication-ready)
png("Output_Figures/Figure_3.16E_Literature_TF_Heatmap_ComplexHeatmap.png",
    width = 10 * 300, height = 8 * 300, res = 300)
draw(ht)
dev.off()
```


# Final Save
```{r, fig.height=12, fig.width=16}
print("Analysis pipeline complete. All figures and objects saved in Output_Figures folder.")
```




























