load libraries————————————

1. Load DE results

obj <- readRDS("../../0-Most_important_Seurat_objects/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")
Loading required namespace: SeuratObject

2. Volcano plot (log2FC vs -log10 adjP)

# Choose significance thresholds (tunable)
fc_cut <- 1        # >=1 log2 fold for highlighting (change as appropriate)
padj_cut <- 0.05   # significance threshold

de <- de %>% mutate(
  sig = case_when(
    p_val_adj < padj_cut & avg_logFC >= fc_cut  ~ "Up",
    p_val_adj < padj_cut & avg_logFC <= -fc_cut ~ "Down",
    TRUE ~ "NS"
  ),
  neglog10padj = -log10(pmax(p_val_adj, 1e-300))
)

# volcano plot
volcano_plot <- ggplot(de, aes(x = avg_logFC, y = neglog10padj)) +
  geom_point(aes(color = sig), alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("Up"="red","Down"="blue","NS"="grey70")) +
  geom_vline(xintercept = c(-fc_cut, fc_cut), linetype=2, color="black") +
  geom_hline(yintercept = -log10(padj_cut), linetype=2, color="black") +
  theme_bw() +
  labs(x = "avg_logFC", y = "-log10(adj p-value)", color = "Direction",
       title = "Volcano plot: pseudobulk DE (Malignant vs Control)") +
  theme(plot.title = element_text(hjust = 0.5))

# label top hits (customize genes to label)
genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", "MYLB6", "SLC25A5", "TXNIP", "BTG1", "CDKN1B", "ZFP36")
volcano_plot <- volcano_plot +
  geom_text_repel(data = filter(de, gene %in% genes_to_label),
                  aes(label = gene),
                  size = 3.5, max.overlaps = 30)

# # Save
ggsave("volcano_pseudobulk.png", volcano_plot, width = 7, height = 6, dpi = 300)


volcano_plot

Volcano plot2

library(EnhancedVolcano)

genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", 
                    "MYLB6", "SLC25A5", "TXNIP", "BTG1", 
                    "CDKN1B", "ZFP36")

fc_cut <- 1
padj_cut <- 0.05

# Determine fold change range for x-axis
fc_range <- range(de$avg_logFC, na.rm = TRUE)
fc_buffer <- 1  # add small buffer for aesthetics

EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = genes_to_label,    # only label these genes
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 10e-14,
                FCcutoff = 2.0,
                pointSize = 4.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                colAlpha = 4/5,
                legendPosition = 'right',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black')

NA
NA

Volcano plot3

library(EnhancedVolcano)

genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", 
                    "MYLB6", "SLC25A5", "TXNIP", "BTG1", 
                    "CDKN1B", "ZFP36")

fc_cut <- 1
padj_cut <- 0.05

# Determine fold change range for x-axis
fc_range <- range(de$avg_logFC, na.rm = TRUE)
fc_buffer <- 1  # add small buffer for aesthetics

EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = genes_to_label,    # only label these genes
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1.0,
                pointSize = 3.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                parseLabels = TRUE,
                col = c('black', 'pink', 'purple', 'red3'),
                colAlpha = 4/5,
                legendPosition = 'bottom',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black') + coord_flip()

NA
NA

Volcano plot4

library(EnhancedVolcano)


# Example thresholds
pval_cut_high <- 1e-10      # extremely significant genes
fc_cut_high <- 2.0           # very high fold-change
fc_cut_signif <- 1.0         # regular significant fold-change
pval_cut_signif <- 0.05      # regular significance

# Select genes to label
selectLab_italics <- de$gene[ 
  (de$p_val_adj < pval_cut_high) |                        # extremely significant
  ((abs(de$avg_logFC) > fc_cut_signif) & (de$p_val_adj < pval_cut_signif))  # large effect + sig
]


EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = selectLab_italics,
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = pval_cut_signif,
                FCcutoff = fc_cut_signif,
                pointSize = 3.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                parseLabels = FALSE,   # <-- important
                col = c('black', 'pink', 'purple', 'red3'),
                colAlpha = 0.8,
                legendPosition = 'bottom',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black'
)

3. Prepare preranked list for GSEA (fgsea)

4. Dotplot function

Dotplot function

5. Enrichment map / cnetplot for leading-edge genes


#### Enrichment map / cnetplot for leading-edge genes ####
library(clusterProfiler)
library(enrichplot)
library(tidyverse)

make_cnet_emap <- function(fgsea_res,
                           dataset_name,
                           padj_cutoff = 0.25,
                           topN_fallback = 10,
                           showCategory = 15,
                           fold_change_vec = NULL,
                           save_plots = TRUE) {
  
  message("Processing dataset: ", dataset_name)
  
  # 1) Try to filter by padj
  leading_list <- fgsea_res %>%
    filter(padj < padj_cutoff) %>%
    select(pathway, leadingEdge) %>%
    mutate(leadingEdge = map(leadingEdge, as.character)) %>%
    deframe()
  
  # 2) Fallback to topN
  if (length(leading_list) < 2) {
    message("⚠ No significant pathways. Using top ", topN_fallback, " by NES.")
    leading_list <- fgsea_res %>%
      arrange(padj, desc(abs(NES))) %>%
      slice_head(n = topN_fallback) %>%
      select(pathway, leadingEdge) %>%
      mutate(leadingEdge = map(leadingEdge, as.character)) %>%
      deframe()
  }
  
  if (length(leading_list) < 2) {
    message("❌ Still too few pathways for ", dataset_name)
    return(NULL)
  }
  
  # Convert to TERM2GENE for plotting
  lead_term2gene <- enframe(leading_list, name = "TERM", value = "GENE") %>%
    unnest(cols = c(GENE))
  
  # ---- Build fake enrichmentResult object ----
  enr <- enricher(
    gene = unique(unlist(leading_list)),  # union of leading-edge genes
    TERM2GENE = lead_term2gene,
    minGSSize = 1, maxGSSize = 5000
  )
  
  # ---- cnetplot ----
  cnet <- cnetplot(
    enr,
    foldChange = fold_change_vec,
    showCategory = showCategory,
    circular = FALSE,
    colorEdge = TRUE
  ) + ggtitle(paste("Cnetplot —", dataset_name))
  
  if (save_plots) {
    ggsave(paste0("cnetplot_leadingEdge_", dataset_name, ".png"),
           cnet, width = 12, height = 8, dpi = 300)
  }
  
  # ---- emapplot ----
  emap <- pairwise_termsim(enr)
  emap_plot <- emapplot(emap, showCategory = showCategory) +
    ggtitle(paste("Enrichment Map —", dataset_name))
  
  if (save_plots) {
    ggsave(paste0("emapplot_leadingEdge_", dataset_name, ".png"),
           emap_plot, width = 12, height = 8, dpi = 300)
  }
  
  return(list(enrich_res = enr, cnet = cnet, emap = emap_plot))
}

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_h[["pathway"]] %>% head(10)
titles <- sub("HALLMARK_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_h))

# Titles for plots
titles <- sub("HALLMARK_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_h[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
NA

library(dplyr)
library(Seurat)
library(cowplot)
library(SeuratObject)

# 1️⃣ Sort the Hallmark FGSEA table by NES
fg_h_sorted <- fg_h %>% arrange(desc(NES))  # descending NES

# 2️⃣ Get top 10 up and top 10 down pathways
top_up   <- fg_h_sorted$pathway[1:10]       # top 10 up by NES
top_down <- fg_h_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)  # top 10 down

# 3️⃣ Combine up and down
topPathways <- c(top_up, top_down)

# Keep only pathways that exist in the Hallmark gene sets
topPathways <- intersect(topPathways, names(pathways_h))

# Titles for plotting
titles <- sub("HALLMARK_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# 4️⃣ Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_h[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# 5️⃣ Combine plots in a grid
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 4)

NA
NA

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_kegg[["pathway"]] %>% head(10)
titles <- sub("KEGG_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_kegg))

# Titles for plots
titles <- sub("KEGG_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_kegg[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
NA

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_react[["pathway"]] %>% head(10)
titles <- sub("REACTOME_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_react))

# Titles for plots
titles <- sub("REACTOME_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_react[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
NA

# Sort Reactome FGSEA table by NES
fg_react_sorted <- fg_react %>% arrange(desc(NES))

# Top 10 up and top 10 down
top_up   <- fg_react_sorted$pathway[1:10]
top_down <- fg_react_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)

# Combine and keep only pathways present in Reactome gene sets
topPathways <- intersect(c(top_up, top_down), names(pathways_react))

# Titles
titles <- sub("REACTOME_", "", topPathways)

# SCT assay
DefaultAssay(obj) <- "SCT"

# Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_react[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
NA

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_bp[["pathway"]] %>% head(10)
titles <- sub("GO_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_bp))

# Titles for plots
titles <- sub("GO_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_bp[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
NA

# Sort GO:BP FGSEA table by NES
fg_bp_sorted <- fg_bp %>% arrange(desc(NES))

# Top 10 up and top 10 down
top_up   <- fg_bp_sorted$pathway[1:10]
top_down <- fg_bp_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)

# Combine and keep only pathways present in GO:BP gene sets
topPathways <- intersect(c(top_up, top_down), names(pathways_bp))

# Titles
titles <- sub("GO_BP_", "", topPathways)

# SCT assay
DefaultAssay(obj) <- "SCT"

# Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_bp[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)

NA
NA
---
title: "fgsea Analysis for Newmoon Preparation"
author: Nasir Mahmood Abbasi
date: "`r Sys.Date()`"
output:
  #rmdformats::readthedown
  html_notebook:
    toc: true
    toc_float: true
    toc_collapsed: true
---


## load libraries------------------------------------
```{r setup, include=FALSE}

# 1. Load libraries
library(tidyverse)
library(fgsea)
library(msigdbr)
library(enrichplot)
library(clusterProfiler)
library(ggrepel)

```


# 1. Load DE results
```{r loadSeurat}

de_file <- "../1-LIBRA_Pseudobulk_Deseq2_LRT_filtered_on_mean.csv"
de <- read.csv(de_file, stringsAsFactors = FALSE)
head(de)


obj <- readRDS("../../0-Most_important_Seurat_objects/All_samples_Merged_with_STCAT_Annotation_final-5-09-2025.rds")

```


# 2. Volcano plot (log2FC vs -log10 adjP)
```{r, fig.height= 6, fig.width= 10}
# Choose significance thresholds (tunable)
fc_cut <- 1        # >=1 log2 fold for highlighting (change as appropriate)
padj_cut <- 0.05   # significance threshold

de <- de %>% mutate(
  sig = case_when(
    p_val_adj < padj_cut & avg_logFC >= fc_cut  ~ "Up",
    p_val_adj < padj_cut & avg_logFC <= -fc_cut ~ "Down",
    TRUE ~ "NS"
  ),
  neglog10padj = -log10(pmax(p_val_adj, 1e-300))
)

# volcano plot
volcano_plot <- ggplot(de, aes(x = avg_logFC, y = neglog10padj)) +
  geom_point(aes(color = sig), alpha = 0.6, size = 1.5) +
  scale_color_manual(values = c("Up"="red","Down"="blue","NS"="grey70")) +
  geom_vline(xintercept = c(-fc_cut, fc_cut), linetype=2, color="black") +
  geom_hline(yintercept = -log10(padj_cut), linetype=2, color="black") +
  theme_bw() +
  labs(x = "avg_logFC", y = "-log10(adj p-value)", color = "Direction",
       title = "Volcano plot: pseudobulk DE (Malignant vs Control)") +
  theme(plot.title = element_text(hjust = 0.5))

# label top hits (customize genes to label)
genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", "MYLB6", "SLC25A5", "TXNIP", "BTG1", "CDKN1B", "ZFP36")
volcano_plot <- volcano_plot +
  geom_text_repel(data = filter(de, gene %in% genes_to_label),
                  aes(label = gene),
                  size = 3.5, max.overlaps = 30)

# # Save
ggsave("volcano_pseudobulk.png", volcano_plot, width = 7, height = 6, dpi = 300)


volcano_plot

```
## Volcano plot2
```{r, fig.height= 12, fig.width= 14}
library(EnhancedVolcano)

genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", 
                    "MYLB6", "SLC25A5", "TXNIP", "BTG1", 
                    "CDKN1B", "ZFP36")

fc_cut <- 1
padj_cut <- 0.05

# Determine fold change range for x-axis
fc_range <- range(de$avg_logFC, na.rm = TRUE)
fc_buffer <- 1  # add small buffer for aesthetics

EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = genes_to_label,    # only label these genes
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 10e-14,
                FCcutoff = 2.0,
                pointSize = 4.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                colAlpha = 4/5,
                legendPosition = 'right',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black')


```
## Volcano plot3
```{r, fig.height= 12, fig.width= 14}
library(EnhancedVolcano)

genes_to_label <- c("CLIC1","YWHAH","COX5A","MYBL2", "NME1", 
                    "MYLB6", "SLC25A5", "TXNIP", "BTG1", 
                    "CDKN1B", "ZFP36")

fc_cut <- 1
padj_cut <- 0.05

# Determine fold change range for x-axis
fc_range <- range(de$avg_logFC, na.rm = TRUE)
fc_buffer <- 1  # add small buffer for aesthetics

EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = genes_to_label,    # only label these genes
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = 0.05,
                FCcutoff = 1.0,
                pointSize = 3.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                parseLabels = TRUE,
                col = c('black', 'pink', 'purple', 'red3'),
                colAlpha = 4/5,
                legendPosition = 'bottom',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black') + coord_flip()


```

## Volcano plot4
```{r, fig.height= 12, fig.width= 14}
library(EnhancedVolcano)


# Example thresholds
pval_cut_high <- 1e-10      # extremely significant genes
fc_cut_high <- 2.0           # very high fold-change
fc_cut_signif <- 1.0         # regular significant fold-change
pval_cut_signif <- 0.05      # regular significance

# Select genes to label
selectLab_italics <- de$gene[ 
  (de$p_val_adj < pval_cut_high) |                        # extremely significant
  ((abs(de$avg_logFC) > fc_cut_signif) & (de$p_val_adj < pval_cut_signif))  # large effect + sig
]


EnhancedVolcano(de,
                lab = de$gene,
                x = 'avg_logFC',
                y = 'p_val_adj',
                selectLab = selectLab_italics,
                xlab = bquote(~Log[2]~ 'fold change'),
                pCutoff = pval_cut_signif,
                FCcutoff = fc_cut_signif,
                pointSize = 3.0,
                labSize = 6.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                parseLabels = FALSE,   # <-- important
                col = c('black', 'pink', 'purple', 'red3'),
                colAlpha = 0.8,
                legendPosition = 'bottom',
                legendLabSize = 14,
                legendIconSize = 4.0,
                drawConnectors = TRUE,
                widthConnectors = 1.0,
                colConnectors = 'black'
)

```


# 3. Prepare preranked list for GSEA (fgsea)
```{r, fig.height=12, fig.width= 16}

# Ranking: signed_stat = logFC * -log10(padj)
de <- de %>% mutate(signed_stat = avg_logFC * (-log10(p_val_adj + 1e-300)))
ranks <- de %>% arrange(desc(signed_stat)) %>% distinct(gene, .keep_all=TRUE)
ranks_vec <- ranks$signed_stat; names(ranks_vec) <- ranks$gene
ranks_vec <- ranks_vec[!is.na(names(ranks_vec)) & !duplicated(names(ranks_vec))]

# Load MSigDB gene sets

# Hallmark
msig_h <- msigdbr(species = "Homo sapiens", category = "H") %>%
  dplyr::select(gs_name, gene_symbol)
pathways_h <- split(msig_h$gene_symbol, msig_h$gs_name)

# C2 canonical pathways: KEGG + Reactome
msig_c2 <- msigdbr(species = "Homo sapiens", category = "C2") %>%
  dplyr::select(gs_name, gs_subcollection, gene_symbol)

# KEGG - choose Medicus or Legacy
msig_kegg <- msigdbr(
  species = "Homo sapiens",
  category = "C2",
  subcategory = "CP:KEGG_LEGACY"   # or "CP:KEGG_LEGACY"
) %>%
  select(gs_name, gene_symbol)

pathways_kegg <- split(msig_kegg$gene_symbol, msig_kegg$gs_name)

# Reactome
msig_react <- msig_c2 %>% filter(gs_subcollection == "CP:REACTOME") %>% select(gs_name, gene_symbol)
pathways_react <- split(msig_react$gene_symbol, msig_react$gs_name)

# GO Biological Process
msig_bp <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "BP") %>%
  dplyr::select(gs_name, gene_symbol)
pathways_bp <- split(msig_bp$gene_symbol, msig_bp$gs_name)

# fgsea function
run_fgsea <- function(pathways, ranks_vec, label){
  set.seed(42)
  fg <- fgseaMultilevel(pathways = pathways, stats = ranks_vec)
  fg <- as_tibble(fg) %>%
    arrange(padj) %>%
    mutate(
      dataset = label,
      leadingEdgeCount = lengths(leadingEdge),
      leadingEdge = sapply(leadingEdge, function(x) paste(x, collapse = ";"))
    ) %>%
    dplyr::select(dataset, pathway, NES, padj, pval, size, leadingEdge, leadingEdgeCount)
  write.csv(fg, paste0("fgsea_", label, "_results.csv"), row.names = FALSE)
  return(fg)
}

# Run GSEA for each dataset
fg_h     <- run_fgsea(pathways_h, ranks_vec, "hallmark")
fg_kegg  <- run_fgsea(pathways_kegg, ranks_vec, "kegg")
fg_react <- run_fgsea(pathways_react, ranks_vec, "reactome")
fg_bp    <- run_fgsea(pathways_bp, ranks_vec, "go_bp")

```



# 4. Dotplot function
```{r, fig.height=6, fig.width= 12}

# Dotplot function
dotplot_fgsea <- function(fg_tbl, label, topN=10, width=10, height=6){
  fg_top_up <- fg_tbl %>% filter(NES > 0) %>% arrange(desc(NES)) %>% slice_head(n = topN)
  fg_top_down <- fg_tbl %>% filter(NES < 0) %>% arrange(NES) %>% slice_head(n = topN)
  fg_top <- bind_rows(fg_top_up, fg_top_down)
  
  p <- ggplot(fg_top, aes(x = NES, y = reorder(pathway, NES))) +
    geom_point(aes(size = leadingEdgeCount, color = padj)) +
    scale_color_viridis_c(option = "magma", direction = -1) +
    theme_minimal() +
    labs(title = paste0("Preranked GSEA — ", label, " (Top ", topN, " up & down)"),
         x = "Normalized Enrichment Score (NES)",
         y = "",
         size = "Leading edge genes",
         color = "FDR (padj)") +
    theme(plot.title = element_text(hjust = 0.5))
  
  ggsave(paste0("fgsea_", label, "_dotplot.png"), p, width = width, height = height, dpi = 300)
  return(p)
}

# Make dotplots
p_h     <- dotplot_fgsea(fg_h, "Hallmark")
p_kegg  <- dotplot_fgsea(fg_kegg, "KEGG")
p_react <- dotplot_fgsea(fg_react, "Reactome")
p_bp    <- dotplot_fgsea(fg_bp, "GO:BP")



fg_h$leadingEdge     <- as.character(fg_h$leadingEdge)
fg_kegg$leadingEdge  <- as.character(fg_kegg$leadingEdge)
fg_react$leadingEdge <- as.character(fg_react$leadingEdge)
fg_bp$leadingEdge    <- as.character(fg_bp$leadingEdge)

fg_all <- bind_rows(fg_h, fg_kegg, fg_react, fg_bp)
write.csv(fg_all, "fgsea_all_results.csv", row.names = FALSE)

library(tidytext)

fg_all <- bind_rows(fg_h, fg_kegg, fg_react, fg_bp)
write.csv(fg_all, "fgsea_all_results.csv", row.names = FALSE)

fg_all %>%
  group_by(dataset) %>% slice_min(padj, n = 10) %>%
  ggplot(aes(x = NES, y = reorder_within(pathway, NES, dataset),
             color = padj, size = leadingEdgeCount)) +
  geom_point() +
  scale_y_reordered() +
  facet_wrap(~dataset, scales = "free_y") +
  scale_color_viridis_c(option = "magma", direction = -1) +
  theme_minimal() +
  labs(title = "Top GSEA pathways across Hallmark, KEGG, Reactome, and GO-BP",
       x = "NES", y = "", size = "Leading edge", color = "FDR (padj)")

```

## Dotplot function
```{r, fig.height=4, fig.width= 6}
# Prepare top pathways

topN <- 3  # top 3 up and down per dataset

get_top_updown <- function(fg_tbl, dataset_name, topN=3){
  fg_top_up <- fg_tbl %>% filter(NES > 0) %>% arrange(desc(NES)) %>% slice_head(n=topN)
  fg_top_down <- fg_tbl %>% filter(NES < 0) %>% arrange(NES) %>% slice_head(n=topN)
  fg_top <- bind_rows(fg_top_up, fg_top_down)
  fg_top$dataset <- dataset_name
  fg_top$direction <- ifelse(fg_top$NES > 0, "Up", "Down")
  fg_top$direction <- as.character(fg_top$direction)   # <--- force character
  return(fg_top)
}


# Combine top pathways from each dataset
fg_combined <- bind_rows(
  get_top_updown(fg_h, "Hallmark", topN),
  get_top_updown(fg_kegg, "KEGG", topN),
  get_top_updown(fg_react, "Reactome", topN),
  get_top_updown(fg_bp, "GO:BP", topN)
)

# Ensure leadingEdge is character for CSV
fg_combined$leadingEdge <- as.character(fg_combined$leadingEdge)

# Plot combined dotplot
library(ggplot2)

p_combined <- ggplot(fg_combined, 
                     aes(x = NES, 
                         y = reorder(pathway, NES),
                         color = direction,  # Up = red, Down = blue
                         shape = dataset,
                         size = leadingEdgeCount)) +
  geom_point() +
  scale_color_manual(values = c("Up"="red", "Down"="blue")) +
  theme_minimal() +
  labs(title="",
       x="Normalized Enrichment Score (NES)",
       y="Pathway",
       color="Direction",
       shape="Dataset",
       size="Leading edge genes") +
  theme(axis.text.y = element_text(size=8, face="bold"))  # <--- make labels bold

# Save figure
ggsave("fgsea_top3_updown_combined.png", p_combined, width=9, height=6, dpi=300)

p_combined




```


# 5. Enrichment map / cnetplot for leading-edge genes
```{r, fig.height=12, fig.width= 16}

#### Enrichment map / cnetplot for leading-edge genes ####
library(clusterProfiler)
library(enrichplot)
library(tidyverse)

# ---- Function ----
make_cnet_emap <- function(fgsea_res,
                           pathways,
                           dataset_name,
                           padj_cutoff = 0.25,
                           topN_fallback = 10,
                           showCategory = 15,
                           fold_change_vec = NULL,
                           save_plots = TRUE) {
  
  message("Processing dataset: ", dataset_name)
  
  # 1) Try to filter by padj
  leading_list <- fgsea_res %>%
    filter(padj < padj_cutoff) %>%
    select(pathway, leadingEdge) %>%
    mutate(leadingEdge = map(leadingEdge, as.character)) %>%
    deframe()
  
  # 2) Fallback: if too few, take topN pathways by NES
  if (length(leading_list) < 2) {
    message("⚠ No pathways pass padj <", padj_cutoff, 
            " for ", dataset_name, ". Using top ", topN_fallback, " pathways by NES.")
    
    leading_list <- fgsea_res %>%
      arrange(padj, desc(abs(NES))) %>%
      slice_head(n = topN_fallback) %>%
      select(pathway, leadingEdge) %>%
      mutate(leadingEdge = map(leadingEdge, as.character)) %>%
      deframe()
  }
  
  if (length(leading_list) < 2) {
    message("❌ Still too few pathways for ", dataset_name, " — skipping.")
    return(NULL)
  }
  
  # TERM2GENE conversion
  lead_term2gene <- enframe(leading_list, name = "TERM", value = "GENE") %>%
    unnest(cols = c(GENE))
  
  # Union of leading-edge genes
  union_leading_genes <- unique(unlist(leading_list))
  
  if (length(union_leading_genes) < 2) {
    message("❌ Too few leading-edge genes for ", dataset_name, " — skipping.")
    return(NULL)
  }
  
  # Run enrichment (very relaxed min/max sizes)
  enr <- enricher(
    union_leading_genes,
    TERM2GENE = lead_term2gene,
    minGSSize = 1,
    maxGSSize = 5000
  )
  
  if (is.null(enr) || nrow(enr) == 0) {
    message("❌ No enrichment result for ", dataset_name)
    return(NULL)
  }
  
  # ---- cnetplot ----
  cnet <- cnetplot(
    enr,
    foldChange = fold_change_vec,
    showCategory = showCategory,
    circular = FALSE,
    colorEdge = TRUE
  ) + ggtitle(paste("Cnetplot —", dataset_name))
  
  if (save_plots) {
    ggsave(
      paste0("cnetplot_leadingEdge_", dataset_name, ".png"),
      cnet,
      width = 12, height = 8, dpi = 300
    )
  }
  
  # ---- emapplot ----
  emap <- pairwise_termsim(enr)
  emap_plot <- emapplot(emap, showCategory = showCategory) +
    ggtitle(paste("Enrichment Map —", dataset_name))
  
  if (save_plots) {
    ggsave(
      paste0("emapplot_leadingEdge_", dataset_name, ".png"),
      emap_plot,
      width = 12, height = 8, dpi = 300
    )
  }
  
  return(list(enrich_res = enr, cnet = cnet, emap = emap_plot))
}

# ---- Run for all datasets ----
# Optional: fold-change vector from DE results
fold_change_vec <- setNames(de$avg_logFC, de$gene)

res_h     <- make_cnet_emap(fg_h,     pathways_h,     "Hallmark", fold_change_vec = fold_change_vec)
res_kegg  <- make_cnet_emap(fg_kegg,  pathways_kegg,  "KEGG",     fold_change_vec = fold_change_vec)
res_react <- make_cnet_emap(fg_react, pathways_react, "Reactome", fold_change_vec = fold_change_vec)
res_bp    <- make_cnet_emap(fg_bp,    pathways_bp,    "GO_BP",    fold_change_vec = fold_change_vec)

message("✅ Finished Cnet/Enrichment map plots for all datasets")

```


```{r,  fig.height=22, fig.width= 26}

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_h[["pathway"]] %>% head(10)
titles <- sub("HALLMARK_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_h))

# Titles for plots
titles <- sub("HALLMARK_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_h[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)



```
```{r,  fig.height=22, fig.width= 26}

library(dplyr)
library(Seurat)
library(cowplot)
library(SeuratObject)

# 1️⃣ Sort the Hallmark FGSEA table by NES
fg_h_sorted <- fg_h %>% arrange(desc(NES))  # descending NES

# 2️⃣ Get top 10 up and top 10 down pathways
top_up   <- fg_h_sorted$pathway[1:10]       # top 10 up by NES
top_down <- fg_h_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)  # top 10 down

# 3️⃣ Combine up and down
topPathways <- c(top_up, top_down)

# Keep only pathways that exist in the Hallmark gene sets
topPathways <- intersect(topPathways, names(pathways_h))

# Titles for plotting
titles <- sub("HALLMARK_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# 4️⃣ Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_h[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# 5️⃣ Combine plots in a grid
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 4)


```
```{r,  fig.height=22, fig.width= 26}

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_kegg[["pathway"]] %>% head(10)
titles <- sub("KEGG_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_kegg))

# Titles for plots
titles <- sub("KEGG_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_kegg[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)



```



```{r,  fig.height=22, fig.width= 26}

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_react[["pathway"]] %>% head(10)
titles <- sub("REACTOME_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_react))

# Titles for plots
titles <- sub("REACTOME_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_react[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)



```

```{r,  fig.height=30, fig.width= 20}

# Sort Reactome FGSEA table by NES
fg_react_sorted <- fg_react %>% arrange(desc(NES))

# Top 10 up and top 10 down
top_up   <- fg_react_sorted$pathway[1:10]
top_down <- fg_react_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)

# Combine and keep only pathways present in Reactome gene sets
topPathways <- intersect(c(top_up, top_down), names(pathways_react))

# Titles
titles <- sub("REACTOME_", "", topPathways)

# SCT assay
DefaultAssay(obj) <- "SCT"

# Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_react[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)



```



```{r,  fig.height=22, fig.width= 26}

library(Seurat)
library(cowplot)
library(SeuratObject)


topPathways <- fg_bp[["pathway"]] %>% head(10)
titles <- sub("GO_", "", topPathways)


# Make sure topPathways are in the Hallmark list
topPathways <- intersect(topPathways, names(pathways_bp))

# Titles for plots
titles <- sub("GO_", "", topPathways)

# Use SCT assay explicitly
DefaultAssay(obj) <- "SCT"

# Run plotCoregulationProfileReduction
ps <- plotCoregulationProfileReduction(
  pathways_bp[topPathways],
  obj,
  assay = "SCT",         # explicitly use SCT assay
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)



```

```{r, fig.height=30, fig.width= 20}

# Sort GO:BP FGSEA table by NES
fg_bp_sorted <- fg_bp %>% arrange(desc(NES))

# Top 10 up and top 10 down
top_up   <- fg_bp_sorted$pathway[1:10]
top_down <- fg_bp_sorted %>% arrange(NES) %>% slice(1:10) %>% pull(pathway)

# Combine and keep only pathways present in GO:BP gene sets
topPathways <- intersect(c(top_up, top_down), names(pathways_bp))

# Titles
titles <- sub("GO_BP_", "", topPathways)

# SCT assay
DefaultAssay(obj) <- "SCT"

# Run co-regulation UMAP
ps <- plotCoregulationProfileReduction(
  pathways_bp[topPathways],
  obj,
  assay = "SCT",
  title = titles,
  reduction = "umap"
)

# Combine plots
cowplot::plot_grid(plotlist = ps[1:length(ps)], ncol = 2)


```