load libraries
LOAD DATA &
SETUP
# Make sure the file name matches exactly what you have
fg_all <- read.csv("fgsea_all_results.csv", stringsAsFactors = FALSE)
cat("Loaded", nrow(fg_all), "pathways from fgsea_all_results.csv\n")
Loaded 9200 pathways from fgsea_all_results.csv
# Expanded Proliferation Keywords
# STRICT EXCLUSION LIST (Updated)
prolif_terms <- c(
"CELL_CYCLE", "MITOTIC", "G2M", "E2F", "SPINDLE",
"CHROMOSOME", "DNA_REPLICATION", "NUCLEAR_DIVISION",
"ORGANELLE_FISSION", "KINETOCHORE", "CENTROSOME",
"REPLICATION", "SEGREGATION", "DIVISION", "M_PHASE",
"KINESINS", "MEIOSIS", "OOCYTE",
"MICROTUBULE", "CYTOSKELETON", "TRAFFIC", "GOLGI", "CYCLIN",
"RECOMBINATION", "REPAIR", "REPLICATIVE", "POLO_LIKE"
)
DATA PREPARATION
FUNCTION
prepare_data <- function(fg_tbl, topN = 3, exclude_prolif = FALSE) {
if (exclude_prolif) {
fg_tbl <- fg_tbl %>% filter(!grepl(paste(prolif_terms, collapse = "|"), pathway, ignore.case = TRUE))
}
# Get top N up and down per database
fg_plot <- bind_rows(
fg_tbl %>% filter(dataset == "hallmark") %>% { bind_rows(slice_max(., NES, n = topN), slice_min(., NES, n = topN)) },
fg_tbl %>% filter(dataset == "kegg") %>% { bind_rows(slice_max(., NES, n = topN), slice_min(., NES, n = topN)) },
fg_tbl %>% filter(dataset == "reactome") %>% { bind_rows(slice_max(., NES, n = topN), slice_min(., NES, n = topN)) },
fg_tbl %>% filter(dataset == "go_bp") %>% { bind_rows(slice_max(., NES, n = topN), slice_min(., NES, n = topN)) }
)
# Format Labels
fg_plot %>%
mutate(
db_prefix = case_when(
dataset == "hallmark" ~ "HALLMARK",
dataset == "kegg" ~ "KEGG",
dataset == "reactome" ~ "REACTOME",
dataset == "go_bp" ~ "GOBP"
),
clean_pathway = gsub("^HALLMARK_|^KEGG_|^REACTOME_|^GOBP_", "", pathway),
plot_label = paste0(db_prefix, "_", clean_pathway)
) %>%
arrange(NES) %>%
mutate(plot_label = factor(plot_label, levels = unique(plot_label)))
}
PLOTTING FUNCTION
create_plot <- function(data, color_var, color_label, title_text) {
ggplot(data, aes(x = NES, y = plot_label)) +
geom_point(aes(shape = dataset, size = leadingEdgeCount, color = !!sym(color_var)), alpha = 0.9) +
geom_vline(xintercept = 0, linetype = "solid", color = "gray80", linewidth = 0.5) +
scale_color_gradientn(
colors = c("red", "orange", "blue"),
trans = "log10",
name = color_label,
guide = guide_colorbar(reverse = TRUE)
) +
scale_shape_manual(
values = c("hallmark" = 17, "kegg" = 15, "reactome" = 3, "go_bp" = 16),
guide = "none"
) +
scale_size_continuous(range = c(3, 8), name = "Leading edge genes") +
theme_minimal() +
labs(x = "Normalized Enrichment Score (NES)", y = NULL, title = title_text) +
theme(
axis.text.y = element_text(size = 14, face = "bold", color = "black"),
# X-axis labels (Numbers) - BOLD & BIGGER
axis.text.x = element_text(size = 10, face = "bold", color = "black"),
# X-axis TITLE (The Text "Normalized Enrichment Score...") - BOLD
axis.title.x = element_text(size = 14, face = "bold", color = "black", margin = margin(t = 10)),
# Plot Title
plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
# Legend
legend.position = "right",
legend.box = "vertical",
legend.title = element_text(face = "bold", size = 10),
panel.grid.major.y = element_line(color = "gray95")
)
}
GENERATE 4 PLOTS
# A) All Pathways (padj)
df_all <- prepare_data(fg_all, exclude_prolif = FALSE)
p1 <- create_plot(df_all, "padj", "FDR (padj)", "Global Pathway Alterations (Malignant vs. Normal CD4+)")
ggsave("Fig1_All_padj.png", p1, width = 16, height = 8, dpi = 300)
ggsave("Fig1_All_padj.pdf", p1, width = 16, height = 8)
# B) All Pathways (p-value)
p2 <- create_plot(df_all, "pval", "P-value", "Nominally Significant Pathway Alterations")
ggsave("Fig2_All_pval.png", p2, width = 16, height = 8, dpi = 300)
ggsave("Fig2_All_pval.pdf", p2, width = 16, height = 8)
# C) Non-Proliferation (padj)
df_no_prolif <- prepare_data(fg_all, exclude_prolif = TRUE)
p3 <- create_plot(df_no_prolif, "padj", "FDR (padj)", "Functional & Immune Signatures (Non-Proliferative)")
ggsave("Fig3_NonProlif_padj.png", p3, width = 16, height = 8, dpi = 300)
ggsave("Fig3_NonProlif_padj.pdf", p3, width = 16, height = 8)
# D) Non-Proliferation (p-value)
p4 <- create_plot(df_no_prolif, "pval", "P-value", "Exploratory Functional Signatures (Non-Proliferative)")
ggsave("Fig4_NonProlif_pval.png", p4, width = 16, height = 8, dpi = 300)
ggsave("Fig4_NonProlif_pval.pdf", p4, width = 16, height = 8)
print("Created 4 figures: Fig1..Fig4 (.png and .pdf)")
[1] "Created 4 figures: Fig1..Fig4 (.png and .pdf)"
p1

p2

p3

p4

LS0tCnRpdGxlOiAiZmdzZWEgQW5hbHlzaXMgZm9yIE1hbnVTY3JpcHRfRmViMjAyNiIKYXV0aG9yOiBOYXNpciBNYWhtb29kIEFiYmFzaQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDoKICAgICAgY29sbGFwc2VkOiB0cnVlCiAgICB0aGVtZTogam91cm5hbAotLS0KCgojIGxvYWQgbGlicmFyaWVzCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQojIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIwojIEN1c3RvbSBHU0VBIFBsb3R0aW5nIFNjcmlwdCAtIEZPVVIgVkVSU0lPTlMKIyAxLiBBbGwgUGF0aHdheXMgKHBhZGopCiMgMi4gQWxsIFBhdGh3YXlzIChwLXZhbHVlKQojIDMuIE5vbi1Qcm9saWZlcmF0aW9uIChwYWRqKQojIDQuIE5vbi1Qcm9saWZlcmF0aW9uIChwLXZhbHVlKQojCiMgU3R5bGU6IEJvbGQgTGFiZWxzIChEQl9QQVRIV0FZKSwgTm8gRGF0YXNldCBMZWdlbmQsIFRvcCAzIFVwL0Rvd24KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMKCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGZnc2VhKQpsaWJyYXJ5KG1zaWdkYnIpCmxpYnJhcnkoZW5yaWNocGxvdCkKbGlicmFyeShjbHVzdGVyUHJvZmlsZXIpCmxpYnJhcnkoZ2dyZXBlbCkKCmBgYAoKCiMgTE9BRCBEQVRBICYgU0VUVVAgCmBgYHtyIGxvYWRTZXVyYXR9CiMgTWFrZSBzdXJlIHRoZSBmaWxlIG5hbWUgbWF0Y2hlcyBleGFjdGx5IHdoYXQgeW91IGhhdmUKZmdfYWxsIDwtIHJlYWQuY3N2KCJmZ3NlYV9hbGxfcmVzdWx0cy5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCgpjYXQoIkxvYWRlZCIsIG5yb3coZmdfYWxsKSwgInBhdGh3YXlzIGZyb20gZmdzZWFfYWxsX3Jlc3VsdHMuY3N2XG4iKQoKIyBFeHBhbmRlZCBQcm9saWZlcmF0aW9uIEtleXdvcmRzCiMgU1RSSUNUIEVYQ0xVU0lPTiBMSVNUIChVcGRhdGVkKQpwcm9saWZfdGVybXMgPC0gYygKICAiQ0VMTF9DWUNMRSIsICJNSVRPVElDIiwgIkcyTSIsICJFMkYiLCAiU1BJTkRMRSIsIAogICJDSFJPTU9TT01FIiwgIkROQV9SRVBMSUNBVElPTiIsICJOVUNMRUFSX0RJVklTSU9OIiwKICAiT1JHQU5FTExFX0ZJU1NJT04iLCAiS0lORVRPQ0hPUkUiLCAiQ0VOVFJPU09NRSIsCiAgIlJFUExJQ0FUSU9OIiwgIlNFR1JFR0FUSU9OIiwgIkRJVklTSU9OIiwgIk1fUEhBU0UiLCAKICAiS0lORVNJTlMiLCAiTUVJT1NJUyIsICJPT0NZVEUiLCAKICAiTUlDUk9UVUJVTEUiLCAiQ1lUT1NLRUxFVE9OIiwgIlRSQUZGSUMiLCAiR09MR0kiLCAiQ1lDTElOIiwKICAiUkVDT01CSU5BVElPTiIsICJSRVBBSVIiLCAiUkVQTElDQVRJVkUiLCAiUE9MT19MSUtFIgopCmBgYAoKCiMgREFUQSBQUkVQQVJBVElPTiBGVU5DVElPTgpgYGB7ciwgZmlnLmhlaWdodD0gNiwgZmlnLndpZHRoPSAxMH0KcHJlcGFyZV9kYXRhIDwtIGZ1bmN0aW9uKGZnX3RibCwgdG9wTiA9IDMsIGV4Y2x1ZGVfcHJvbGlmID0gRkFMU0UpIHsKICBpZiAoZXhjbHVkZV9wcm9saWYpIHsKICAgIGZnX3RibCA8LSBmZ190YmwgJT4lIGZpbHRlcighZ3JlcGwocGFzdGUocHJvbGlmX3Rlcm1zLCBjb2xsYXBzZSA9ICJ8IiksIHBhdGh3YXksIGlnbm9yZS5jYXNlID0gVFJVRSkpCiAgfQoKICAjIEdldCB0b3AgTiB1cCBhbmQgZG93biBwZXIgZGF0YWJhc2UKICBmZ19wbG90IDwtIGJpbmRfcm93cygKICAgIGZnX3RibCAlPiUgZmlsdGVyKGRhdGFzZXQgPT0gImhhbGxtYXJrIikgJT4lIHsgYmluZF9yb3dzKHNsaWNlX21heCguLCBORVMsIG4gPSB0b3BOKSwgc2xpY2VfbWluKC4sIE5FUywgbiA9IHRvcE4pKSB9LAogICAgZmdfdGJsICU+JSBmaWx0ZXIoZGF0YXNldCA9PSAia2VnZyIpICU+JSB7IGJpbmRfcm93cyhzbGljZV9tYXgoLiwgTkVTLCBuID0gdG9wTiksIHNsaWNlX21pbiguLCBORVMsIG4gPSB0b3BOKSkgfSwKICAgIGZnX3RibCAlPiUgZmlsdGVyKGRhdGFzZXQgPT0gInJlYWN0b21lIikgJT4lIHsgYmluZF9yb3dzKHNsaWNlX21heCguLCBORVMsIG4gPSB0b3BOKSwgc2xpY2VfbWluKC4sIE5FUywgbiA9IHRvcE4pKSB9LAogICAgZmdfdGJsICU+JSBmaWx0ZXIoZGF0YXNldCA9PSAiZ29fYnAiKSAlPiUgeyBiaW5kX3Jvd3Moc2xpY2VfbWF4KC4sIE5FUywgbiA9IHRvcE4pLCBzbGljZV9taW4oLiwgTkVTLCBuID0gdG9wTikpIH0KICApCgogICMgRm9ybWF0IExhYmVscwogIGZnX3Bsb3QgJT4lCiAgICBtdXRhdGUoCiAgICAgIGRiX3ByZWZpeCA9IGNhc2Vfd2hlbigKICAgICAgICBkYXRhc2V0ID09ICJoYWxsbWFyayIgfiAiSEFMTE1BUksiLAogICAgICAgIGRhdGFzZXQgPT0gImtlZ2ciIH4gIktFR0ciLAogICAgICAgIGRhdGFzZXQgPT0gInJlYWN0b21lIiB+ICJSRUFDVE9NRSIsCiAgICAgICAgZGF0YXNldCA9PSAiZ29fYnAiIH4gIkdPQlAiCiAgICAgICksCiAgICAgIGNsZWFuX3BhdGh3YXkgPSBnc3ViKCJeSEFMTE1BUktffF5LRUdHX3xeUkVBQ1RPTUVffF5HT0JQXyIsICIiLCBwYXRod2F5KSwKICAgICAgcGxvdF9sYWJlbCA9IHBhc3RlMChkYl9wcmVmaXgsICJfIiwgY2xlYW5fcGF0aHdheSkKICAgICkgJT4lCiAgICBhcnJhbmdlKE5FUykgJT4lCiAgICBtdXRhdGUocGxvdF9sYWJlbCA9IGZhY3RvcihwbG90X2xhYmVsLCBsZXZlbHMgPSB1bmlxdWUocGxvdF9sYWJlbCkpKQp9CgpgYGAKCiMgUExPVFRJTkcgRlVOQ1RJT04gCmBgYHtyLCBmaWcuaGVpZ2h0PSA2LCBmaWcud2lkdGg9IDEwfQpjcmVhdGVfcGxvdCA8LSBmdW5jdGlvbihkYXRhLCBjb2xvcl92YXIsIGNvbG9yX2xhYmVsLCB0aXRsZV90ZXh0KSB7CiAgZ2dwbG90KGRhdGEsIGFlcyh4ID0gTkVTLCB5ID0gcGxvdF9sYWJlbCkpICsKICAgIGdlb21fcG9pbnQoYWVzKHNoYXBlID0gZGF0YXNldCwgc2l6ZSA9IGxlYWRpbmdFZGdlQ291bnQsIGNvbG9yID0gISFzeW0oY29sb3JfdmFyKSksIGFscGhhID0gMC45KSArCiAgICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAwLCBsaW5ldHlwZSA9ICJzb2xpZCIsIGNvbG9yID0gImdyYXk4MCIsIGxpbmV3aWR0aCA9IDAuNSkgKwoKICAgIHNjYWxlX2NvbG9yX2dyYWRpZW50bigKICAgICAgY29sb3JzID0gYygicmVkIiwgIm9yYW5nZSIsICJibHVlIiksCiAgICAgIHRyYW5zID0gImxvZzEwIiwKICAgICAgbmFtZSA9IGNvbG9yX2xhYmVsLAogICAgICBndWlkZSA9IGd1aWRlX2NvbG9yYmFyKHJldmVyc2UgPSBUUlVFKQogICAgKSArCgogICAgc2NhbGVfc2hhcGVfbWFudWFsKAogICAgICB2YWx1ZXMgPSBjKCJoYWxsbWFyayIgPSAxNywgImtlZ2ciID0gMTUsICJyZWFjdG9tZSIgPSAzLCAiZ29fYnAiID0gMTYpLAogICAgICBndWlkZSA9ICJub25lIgogICAgKSArCgogICAgc2NhbGVfc2l6ZV9jb250aW51b3VzKHJhbmdlID0gYygzLCA4KSwgbmFtZSA9ICJMZWFkaW5nIGVkZ2UgZ2VuZXMiKSArCgogICAgdGhlbWVfbWluaW1hbCgpICsKICAgIGxhYnMoeCA9ICJOb3JtYWxpemVkIEVucmljaG1lbnQgU2NvcmUgKE5FUykiLCB5ID0gTlVMTCwgdGl0bGUgPSB0aXRsZV90ZXh0KSArCiAgICB0aGVtZSgKICAgICAgYXhpcy50ZXh0LnkgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDE0LCBmYWNlID0gImJvbGQiLCBjb2xvciA9ICJibGFjayIpLAogICAgICAjIFgtYXhpcyBsYWJlbHMgKE51bWJlcnMpIC0gQk9MRCAmIEJJR0dFUgogICAgICBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTAsIGZhY2UgPSAiYm9sZCIsIGNvbG9yID0gImJsYWNrIiksCiAgICAgICMgWC1heGlzIFRJVExFIChUaGUgVGV4dCAiTm9ybWFsaXplZCBFbnJpY2htZW50IFNjb3JlLi4uIikgLSBCT0xECiAgICAgIGF4aXMudGl0bGUueCA9IGVsZW1lbnRfdGV4dChzaXplID0gMTQsIGZhY2UgPSAiYm9sZCIsIGNvbG9yID0gImJsYWNrIiwgbWFyZ2luID0gbWFyZ2luKHQgPSAxMCkpLAogICAgICAKICAgICAgIyBQbG90IFRpdGxlCiAgICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoZmFjZSA9ICJib2xkIiwgc2l6ZSA9IDEzLCBoanVzdCA9IDAuNSksCiAgICAgIAogICAgICAjIExlZ2VuZAogICAgICBsZWdlbmQucG9zaXRpb24gPSAicmlnaHQiLAogICAgICBsZWdlbmQuYm94ID0gInZlcnRpY2FsIiwKICAgICAgbGVnZW5kLnRpdGxlID0gZWxlbWVudF90ZXh0KGZhY2UgPSAiYm9sZCIsIHNpemUgPSAxMCksCgogICAgICBwYW5lbC5ncmlkLm1ham9yLnkgPSBlbGVtZW50X2xpbmUoY29sb3IgPSAiZ3JheTk1IikKICAgICkKfQoKYGBgCgoKIyBHRU5FUkFURSA0IFBMT1RTIApgYGB7ciwgZmlnLmhlaWdodD0gOCwgZmlnLndpZHRoPSAxNn0KIyBBKSBBbGwgUGF0aHdheXMgKHBhZGopCmRmX2FsbCA8LSBwcmVwYXJlX2RhdGEoZmdfYWxsLCBleGNsdWRlX3Byb2xpZiA9IEZBTFNFKQpwMSA8LSBjcmVhdGVfcGxvdChkZl9hbGwsICJwYWRqIiwgIkZEUiAocGFkaikiLCAiR2xvYmFsIFBhdGh3YXkgQWx0ZXJhdGlvbnMgKE1hbGlnbmFudCB2cy4gTm9ybWFsIENENCspIikKZ2dzYXZlKCJGaWcxX0FsbF9wYWRqLnBuZyIsIHAxLCB3aWR0aCA9IDE2LCBoZWlnaHQgPSA4LCBkcGkgPSAzMDApCmdnc2F2ZSgiRmlnMV9BbGxfcGFkai5wZGYiLCBwMSwgd2lkdGggPSAxNiwgaGVpZ2h0ID0gOCkKCiMgQikgQWxsIFBhdGh3YXlzIChwLXZhbHVlKQpwMiA8LSBjcmVhdGVfcGxvdChkZl9hbGwsICJwdmFsIiwgIlAtdmFsdWUiLCAiTm9taW5hbGx5IFNpZ25pZmljYW50IFBhdGh3YXkgQWx0ZXJhdGlvbnMiKQpnZ3NhdmUoIkZpZzJfQWxsX3B2YWwucG5nIiwgcDIsIHdpZHRoID0gMTYsIGhlaWdodCA9IDgsIGRwaSA9IDMwMCkKZ2dzYXZlKCJGaWcyX0FsbF9wdmFsLnBkZiIsIHAyLCB3aWR0aCA9IDE2LCBoZWlnaHQgPSA4KQoKIyBDKSBOb24tUHJvbGlmZXJhdGlvbiAocGFkaikKZGZfbm9fcHJvbGlmIDwtIHByZXBhcmVfZGF0YShmZ19hbGwsIGV4Y2x1ZGVfcHJvbGlmID0gVFJVRSkKcDMgPC0gY3JlYXRlX3Bsb3QoZGZfbm9fcHJvbGlmLCAicGFkaiIsICJGRFIgKHBhZGopIiwgIkZ1bmN0aW9uYWwgJiBJbW11bmUgU2lnbmF0dXJlcyAoTm9uLVByb2xpZmVyYXRpdmUpIikKZ2dzYXZlKCJGaWczX05vblByb2xpZl9wYWRqLnBuZyIsIHAzLCB3aWR0aCA9IDE2LCBoZWlnaHQgPSA4LCBkcGkgPSAzMDApCmdnc2F2ZSgiRmlnM19Ob25Qcm9saWZfcGFkai5wZGYiLCBwMywgd2lkdGggPSAxNiwgaGVpZ2h0ID0gOCkKCiMgRCkgTm9uLVByb2xpZmVyYXRpb24gKHAtdmFsdWUpCnA0IDwtIGNyZWF0ZV9wbG90KGRmX25vX3Byb2xpZiwgInB2YWwiLCAiUC12YWx1ZSIsICJFeHBsb3JhdG9yeSBGdW5jdGlvbmFsIFNpZ25hdHVyZXMgKE5vbi1Qcm9saWZlcmF0aXZlKSIpCmdnc2F2ZSgiRmlnNF9Ob25Qcm9saWZfcHZhbC5wbmciLCBwNCwgd2lkdGggPSAxNiwgaGVpZ2h0ID0gOCwgZHBpID0gMzAwKQpnZ3NhdmUoIkZpZzRfTm9uUHJvbGlmX3B2YWwucGRmIiwgcDQsIHdpZHRoID0gMTYsIGhlaWdodCA9IDgpCgpwcmludCgiQ3JlYXRlZCA0IGZpZ3VyZXM6IEZpZzEuLkZpZzQgKC5wbmcgYW5kIC5wZGYpIikKCnAxCnAyCnAzCnA0CmBgYAo=