# =========================
# Setup
# =========================
library(dplyr)
library(fgsea)
library(msigdbr)
library(ggplot2)
library(writexl)
set.seed(1)
# IMPORTANTE:
# El archivo ESAvsES.tsv debe estar en la MISMA carpeta que este .Rmd
# =========================
# Cargar datos y ranking
# =========================
comp <- "ESAvsES"
infile <- paste0(comp, ".tsv")
stopifnot(file.exists(infile))
data_dge <- read.delim(
infile,
header = TRUE, sep = "\t", check.names = FALSE
)
rank_list <- data_dge[, c(1, 2, 5)] # GeneID, logFC, P.Value
colnames(rank_list) <- c("Gene", "logFC", "P.Value")
rank_list$fcSign <- sign(rank_list$logFC)
rank_list$logP <- -log10(rank_list$P.Value)
rank_list$metric <- rank_list$logP / rank_list$fcSign
ranks <- rank_list$metric
names(ranks) <- rank_list$Gene
# =========================
# Gene sets
# =========================
msig_go_mouse <- msigdbr(
species = "Mus musculus",
collection = "M5",
db_species = "MM"
)
msig_hallmark_mouse <- msigdbr(
species = "Mus musculus",
collection = "MH",
db_species = "MM"
)
msigdb_gene_sets <- bind_rows(msig_go_mouse, msig_hallmark_mouse)
m_list <- msigdb_gene_sets %>%
split(x = .$ensembl_gene, f = .$gs_name)
# =========================
# GSEA
# =========================
fgseaRes <- fgsea(
m_list,
ranks,
minSize = 15,
maxSize = 500
)
# =========================
# Top pathways + tabla GSEA
# =========================
topUp <- fgseaRes %>%
filter(NES > 0) %>%
slice_min(padj, n = 5)
topDown <- fgseaRes %>%
filter(NES < 0) %>%
slice_min(padj, n = 5)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(desc(NES))
plotGseaTable(
pathways = m_list[topPathways$pathway],
stats = ranks,
fgseaRes = fgseaRes,
gseaParam = 0.5
)

pdf(paste0(comp, ".pdf"), width = 14, height = 14)
plotGseaTable(
m_list[topPathways$pathway],
ranks,
fgseaRes,
gseaParam = 0.5
)
# =========================
# Exportar resultados a Excel
# =========================
fgseaRes_export <- fgseaRes
fgseaRes_export$leadingEdge <- vapply(
fgseaRes_export$leadingEdge,
function(x) sprintf('c("%s")', paste(x, collapse = '", "')),
character(1)
)
write_xlsx(
fgseaRes_export,
paste0(comp, "_fgsea_results.xlsx")
)
# =========================
# Dotplot
# =========================
fgsea_plot_df <- fgseaRes %>%
filter(!grepl("^GOCC_", pathway)) %>%
filter(padj <= 0.05) %>%
mutate(
Count = lengths(leadingEdge),
GeneRatio = Count / size,
GeneRatioPct = 100 * GeneRatio
) %>%
filter(!is.na(GeneRatioPct), Count > 0) %>%
arrange(padj, desc(abs(NES))) %>%
slice_head(n = 20)
p <- ggplot(
fgsea_plot_df,
aes(x = GeneRatioPct, y = reorder(pathway, GeneRatioPct))
) +
geom_point(aes(size = Count, color = padj), alpha = 0.9) +
labs(
x = "% de genes del set en leadingEdge (Count/size)",
y = NULL,
color = "p.adj",
size = "Count"
) +
theme_bw() +
theme(
panel.grid.major = element_line(color = "grey85", linewidth = 0.3),
panel.grid.minor = element_line(color = "grey92", linewidth = 0.2),
panel.grid.minor.y = element_blank()
)
p

ggsave(
filename = paste0(comp, "_dotplot.pdf"),
plot = p,
width = 10,
height = 6,
units = "in",
device = cairo_pdf
)