1 load libraries

2 LOAD DATA & SETUP

# Make sure the file name matches exactly what you have
fg_all <- read.csv("../../../../fgsea_all_results.csv", stringsAsFactors = FALSE)

cat("Loaded", nrow(fg_all), "pathways from fgsea_all_results.csv\n")
Loaded 9200 pathways from fgsea_all_results.csv
# STRICT EXCLUSION LIST (Updated)
prolif_terms <- c(
  "CELL_CYCLE", "MITOTIC", "G2M", "E2F", "SPINDLE", 
  "CHROMOSOME", "DNA_REPLICATION", "NUCLEAR_DIVISION",
  "ORGANELLE_FISSION", "KINETOCHORE", "CENTROSOME",
  "REPLICATION", "SEGREGATION", "DIVISION", "M_PHASE", 
  "KINESINS", "MEIOSIS", "OOCYTE", 
  "MICROTUBULE", "CYTOSKELETON", "TRAFFIC", "GOLGI", "CYCLIN",
  "RECOMBINATION", "REPAIR", "REPLICATIVE", "POLO_LIKE", "CHECKPOINTS",
  "TRANSCRIPTION", "S_PHASE", "ANAPHASE", "TELOPHASE", "PROPHASE", 
  "CYTOKINESIS", "SPINDLE_ASSEMBLY", "SPINDLE_CHECKPOINT", 
  "MITOTIC_SPINDLE", "MITOTIC_CHECKPOINT", "MITOTIC_G1", 
  "MITOTIC_S_PHASE", "MITOTIC_G2M"
)

3 PREPARE DATA FOR RADAR PLOT (WITH DATABASE PREFIX)–V1

prepare_radar_data_v1 <- function(fg_tbl, topN_per_db = 3, exclude_prolif = TRUE) {

  # A. Filter Proliferation (if requested)
  if (exclude_prolif) {
    fg_tbl <- fg_tbl %>% filter(!grepl(paste(prolif_terms, collapse = "|"), pathway, ignore.case = TRUE))
  }

  # B. Filter for significant pathways only
  fg_tbl <- fg_tbl %>% filter(pval < 0.05)

  # C. Select Top N pathways PER DATABASE (balanced)
  top_paths <- bind_rows(
    fg_tbl %>% filter(dataset == "hallmark") %>% arrange(pval) %>% slice_head(n = topN_per_db),
    fg_tbl %>% filter(dataset == "kegg") %>% arrange(pval) %>% slice_head(n = topN_per_db),
    fg_tbl %>% filter(dataset == "reactome") %>% arrange(pval) %>% slice_head(n = topN_per_db),
    fg_tbl %>% filter(dataset == "go_bp") %>% arrange(pval) %>% slice_head(n = topN_per_db)
  ) %>%
    mutate(
      log_pval = -log10(pval + 1e-15), # Transform P-value
      # Add Database Prefix
      db_prefix = case_when(
        dataset == "hallmark" ~ "HALLMARK",
        dataset == "kegg" ~ "KEGG",
        dataset == "reactome" ~ "REACTOME",
        dataset == "go_bp" ~ "GOBP"
      ),
      # Clean pathway name
      clean_pathway = gsub("^HALLMARK_|^KEGG_|^REACTOME_|^GOBP_", "", pathway),
      clean_pathway = str_trunc(clean_pathway, 25),
      # Combine: DB_PATHWAY format
      plot_label = paste0(db_prefix, "_", clean_pathway),
      direction = ifelse(NES > 0, "Up", "Down")
    ) %>%
    # D. Make unique to avoid duplicates
    mutate(plot_label = make.unique(as.character(plot_label), sep = " ")) %>%
    # E. Sort by log_pval for smooth spiral
    arrange(log_pval) %>%
    mutate(plot_label = factor(plot_label, levels = plot_label))

  return(top_paths)
}

4 PREPARE DATA FOR RADAR PLOT (BALANCED UP/DOWN FROM EACH DATABASE)-V2

prepare_radar_data_v2 <- function(fg_tbl, topN_per_db = 3, exclude_prolif = TRUE) {

  # A. Filter Proliferation (if requested)
  if (exclude_prolif) {
    fg_tbl <- fg_tbl %>% filter(!grepl(paste(prolif_terms, collapse = "|"), pathway, ignore.case = TRUE))
  }

  # B. Filter for significant pathways only
  fg_tbl <- fg_tbl %>% filter(pval < 0.05)

  # C. Select Top N UPREGULATED and DOWNREGULATED pathways PER DATABASE
  top_paths <- bind_rows(
    # Hallmark
    fg_tbl %>% filter(dataset == "hallmark", NES > 0) %>% arrange(pval) %>% slice_head(n = ceiling(topN_per_db/2)),
    fg_tbl %>% filter(dataset == "hallmark", NES < 0) %>% arrange(pval) %>% slice_head(n = floor(topN_per_db/2)),
    
    # KEGG
    fg_tbl %>% filter(dataset == "kegg", NES > 0) %>% arrange(pval) %>% slice_head(n = ceiling(topN_per_db/2)),
    fg_tbl %>% filter(dataset == "kegg", NES < 0) %>% arrange(pval) %>% slice_head(n = floor(topN_per_db/2)),
    
    # Reactome
    fg_tbl %>% filter(dataset == "reactome", NES > 0) %>% arrange(pval) %>% slice_head(n = ceiling(topN_per_db/2)),
    fg_tbl %>% filter(dataset == "reactome", NES < 0) %>% arrange(pval) %>% slice_head(n = floor(topN_per_db/2)),
    
    # GO:BP
    fg_tbl %>% filter(dataset == "go_bp", NES > 0) %>% arrange(pval) %>% slice_head(n = ceiling(topN_per_db/2)),
    fg_tbl %>% filter(dataset == "go_bp", NES < 0) %>% arrange(pval) %>% slice_head(n = floor(topN_per_db/2))
  ) %>%
    mutate(
      log_pval = -log10(pval + 1e-15),
      db_prefix = case_when(
        dataset == "hallmark" ~ "HALLMARK",
        dataset == "kegg" ~ "KEGG",
        dataset == "reactome" ~ "REACTOME",
        dataset == "go_bp" ~ "GOBP"
      ),
      clean_pathway = gsub("^HALLMARK_|^KEGG_|^REACTOME_|^GOBP_", "", pathway),
      clean_pathway = str_trunc(clean_pathway, 25),
      plot_label = paste0(db_prefix, "_", clean_pathway),
      direction = ifelse(NES > 0, "Up", "Down")
    ) %>%
    mutate(plot_label = make.unique(as.character(plot_label), sep = " ")) %>%
    arrange(log_pval) %>%
    mutate(plot_label = factor(plot_label, levels = plot_label))

  return(top_paths)
}

4.1 GGPLOT RADAR FUNCTION (SPIRAL STYLE)

create_ggplot_radar <- function(data, title_text) {

  # Threshold line for p < 0.05 (-log10(0.05) ~= 1.3)
  threshold_val <- -log10(0.05)

  # Max limit for the plot
  max_val <- max(data$log_pval) * 1.2

  ggplot(data, aes(x = plot_label, y = log_pval)) +

    # A) The Shaded Area (Spiral Effect)
    geom_area(aes(group = 1), fill = "#DDA0DD", alpha = 0.4) +
    
    # B) The Line Border
    geom_line(aes(group = 1), color = "#800080", linewidth = 1) +

    # C) The Points (Colored by Up/Down)
    geom_point(aes(color = direction), size = 3) +
    scale_color_manual(values = c("Up" = "purple", "Down" = "blue")) +

    # D) The "P < 0.05" Central Threshold Circle
    geom_hline(yintercept = threshold_val, linetype = "dashed", color = "black", linewidth = 0.8) +
    annotate("text", x = 1, y = threshold_val, label = "p < 0.05", 
             color = "black", fontface = "bold", size = 3, vjust = -1) +

    # E) Radial Transformation
    coord_polar(start = 0, clip = "off") +

    # F) Theme Adjustments
    theme_minimal() +
    labs(
      title = title_text,
      x = NULL,
      y = "-log10(p-value)",
      color = "Regulation"
    ) +
    scale_y_continuous(limits = c(0, max_val)) +
    theme(
      axis.text.x = element_text(size = 9, face = "bold", color = "black"),
      axis.text.y = element_text(size = 8, color = "gray50"),
      panel.grid.major = element_line(color = "gray85", linetype = "dotted"),
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      legend.position = "bottom",
      plot.margin = margin(t = 20, r = 20, b = 20, l = 20)
    )
}

4.2 GENERATE FIGURE 1 (Version 1 - No Proliferation)

radar_df_fig1 <- prepare_radar_data_v1(fg_all, topN_per_db = 5, exclude_prolif = TRUE)
p1 <- create_ggplot_radar(radar_df_fig1, "Top Enriched Pathways (Non-Proliferative)")

ggsave("Fig1_Radar_NoProlif_AllSig.png", p1, width = 18, height = 18, dpi = 300)
ggsave("Fig1_Radar_NoProlif_AllSig.pdf", p1, width = 18, height = 18)

print("✅ Created Figure 1: Fig1_Radar_NoProlif_AllSig.png")
[1] "✅ Created Figure 1: Fig1_Radar_NoProlif_AllSig.png"
print(p1)


radar_df_fig2 <- prepare_radar_data_v1(fg_all, topN_per_db = 5, exclude_prolif = FALSE)
p2 <- create_ggplot_radar(radar_df_fig2, "Top Enriched Pathways (Including Proliferation)")

ggsave("Fig2_Radar_WithProlif_AllSig.png", p2, width = 18, height = 18, dpi = 300)
ggsave("Fig2_Radar_WithProlif_AllSig.pdf", p2, width = 18, height = 18)

print("✅ Created Figure 2: Fig2_Radar_WithProlif_AllSig.png")
[1] "✅ Created Figure 2: Fig2_Radar_WithProlif_AllSig.png"
print(p2)

4.3 GENERATE FIGURE 3 (Version 2 - No Proliferation, Balanced Up/Down)

radar_df_fig3 <- prepare_radar_data_v2(fg_all, topN_per_db = 5, exclude_prolif = TRUE)
p3 <- create_ggplot_radar(radar_df_fig3, "Top Enriched Pathways (Non-Proliferative, Balanced)")

ggsave("Fig3_Radar_NoProlif_Balanced.png", p3, width = 18, height = 18, dpi = 300)
ggsave("Fig3_Radar_NoProlif_Balanced.pdf", p3, width = 18, height = 18)

print("✅ Created Figure 3: Fig3_Radar_NoProlif_Balanced.png")
[1] "✅ Created Figure 3: Fig3_Radar_NoProlif_Balanced.png"
print(p3)


radar_df_fig4 <- prepare_radar_data_v2(fg_all, topN_per_db = 5, exclude_prolif = FALSE)
p4 <- create_ggplot_radar(radar_df_fig4, "Top Enriched Pathways (Including Proliferation, Balanced)")

ggsave("Fig4_Radar_WithProlif_Balanced.png", p4, width = 18, height = 18, dpi = 300)
ggsave("Fig4_Radar_WithProlif_Balanced.pdf", p4, width = 18, height = 18)

print("✅ Created Figure 4: Fig4_Radar_WithProlif_Balanced.png")
[1] "✅ Created Figure 4: Fig4_Radar_WithProlif_Balanced.png"
print(p4)

