Archivos requeridos

Colocar estos archivos en el mismo directorio que este .Rmd:

1) Librerías

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(clusterProfiler)
## 
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan,
## X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal
## enrichment tool for interpreting omics data. The Innovation. 2021,
## 2(3):100141
## 
## Adjuntando el paquete: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Mm.eg.db)
## Cargando paquete requerido: AnnotationDbi
## Cargando paquete requerido: stats4
## Cargando paquete requerido: BiocGenerics
## 
## Adjuntando el paquete: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Cargando paquete requerido: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Cargando paquete requerido: IRanges
## Cargando paquete requerido: S4Vectors
## 
## Adjuntando el paquete: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Adjuntando el paquete: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Adjuntando el paquete: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(ggplot2)

2) Cargar datos y definir universo (ENSEMBL)

ESvsCTA <- read.delim(
  "ESvsCTA.tsv",
  header = TRUE,
  row.names = 1,
  stringsAsFactors = FALSE
)

ESAvsES <- read.delim(
  "ESAvsES.tsv",
  header = TRUE,
  row.names = 1,
  stringsAsFactors = FALSE
)

universe_ensembl <- intersect(
  rownames(ESvsCTA),
  rownames(ESAvsES)
)

length(universe_ensembl)
## [1] 15969

3) Cargar lista de genes con reversión (ENSEMBL)

data_rev <- read.delim(
  "ESvsCTA+ESAvsES-.tsv",
  header = TRUE,
  row.names = 1,
  stringsAsFactors = FALSE
)

rev_ensembl <- rownames(data_rev)
length(rev_ensembl)
## [1] 74

4) Conversión ENSEMBL → ENTREZ (con rescate por SYMBOL)

rev_symbol <- data_rev$SYMBOL
names(rev_symbol) <- rev_ensembl

# Mapeo principal ENSEMBL -> ENTREZID
map_ens <- bitr(
  rev_ensembl,
  fromType = "ENSEMBL",
  toType   = "ENTREZID",
  OrgDb    = org.Mm.eg.db
) %>% distinct(ENSEMBL, ENTREZID)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(rev_ensembl, fromType = "ENSEMBL", toType = "ENTREZID", :
## 39.19% of input gene IDs are fail to map...
# ENSEMBL que no mapearon
missing_ens <- setdiff(rev_ensembl, map_ens$ENSEMBL)

# Rescate por SYMBOL para los faltantes
missing_symbols <- rev_symbol[missing_ens]
missing_symbols <- missing_symbols[!is.na(missing_symbols) & missing_symbols != ""]

map_sym <- bitr(
  unique(missing_symbols),
  fromType = "SYMBOL",
  toType   = "ENTREZID",
  OrgDb    = org.Mm.eg.db
) %>% distinct(SYMBOL, ENTREZID)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(unique(missing_symbols), fromType = "SYMBOL", toType =
## "ENTREZID", : 65.52% of input gene IDs are fail to map...
rescue_tbl <- tibble(
  ENSEMBL = names(missing_symbols),
  SYMBOL  = as.character(missing_symbols)
) %>%
  dplyr::left_join(map_sym, by = "SYMBOL") %>%
  dplyr::select(ENSEMBL, ENTREZID) %>%
  dplyr::filter(!is.na(ENTREZID)) %>%
  dplyr::distinct()

map_final <- dplyr::bind_rows(map_ens, rescue_tbl) %>%
  dplyr::distinct(ENSEMBL, ENTREZID)

# Lista final en ENTREZ (puede incluir genes que luego no estén en el universo ENTREZ por diferencias de mapeo)
rev_entrez_raw <- unique(map_final$ENTREZID)

5) Universo: ENSEMBL → ENTREZ

universe_conv <- bitr(
  universe_ensembl,
  fromType = "ENSEMBL",
  toType   = "ENTREZID",
  OrgDb    = org.Mm.eg.db
) %>% dplyr::distinct(ENSEMBL, ENTREZID)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(universe_ensembl, fromType = "ENSEMBL", toType = "ENTREZID", :
## 3.45% of input gene IDs are fail to map...
universe_entrez <- unique(universe_conv$ENTREZID)
length(universe_entrez)
## [1] 15475
# Asegurar consistencia: mantener solo genes de la lista presentes en el universo ENTREZ
rev_entrez <- intersect(rev_entrez_raw, universe_entrez)

message("Genes con reversión (ENTREZ) antes de ajustar al universo: ", length(rev_entrez_raw))
## Genes con reversión (ENTREZ) antes de ajustar al universo: 56
message("Genes con reversión (ENTREZ) usados en ORA (presentes en el universo): ", length(rev_entrez))
## Genes con reversión (ENTREZ) usados en ORA (presentes en el universo): 47

6) ORA con enrichGO (BP y MF)

Nota: análisis exploratorio. No se filtra por pvalue/qvalue en enrichGO.

ego_BP <- enrichGO(
  gene          = rev_entrez,
  universe      = universe_entrez,
  OrgDb         = org.Mm.eg.db,
  keyType       = "ENTREZID",
  ont           = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff  = 1,
  qvalueCutoff  = 1,
  readable      = TRUE
)

ego_MF <- enrichGO(
  gene          = rev_entrez,
  universe      = universe_entrez,
  OrgDb         = org.Mm.eg.db,
  keyType       = "ENTREZID",
  ont           = "MF",
  pAdjustMethod = "BH",
  pvalueCutoff  = 1,
  qvalueCutoff  = 1,
  readable      = TRUE
)

# Exportar resultados completos (para Excel)
write.csv(ego_BP@result, "ORA_GO_BP_ESvsCTA+ESAvsES-.csv", row.names = FALSE)
write.csv(ego_MF@result, "ORA_GO_MF_ESvsCTA+ESAvsES-.csv", row.names = FALSE)

# Vista rápida
head(ego_BP@result[, c("ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","Count")], 10)
head(ego_MF@result[, c("ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","Count")], 10)

7) Figuras en porcentaje (GeneRatio × 100) y export a PDF

BP (porcentaje)

df_BP <- ego_BP@result %>% 
  dplyr::filter(Count >= 3) %>%
  dplyr::mutate(
    GeneRatioNum = as.numeric(sub("/.*", "", GeneRatio)) /
      as.numeric(sub(".*/", "", GeneRatio)),
    GenePercent = GeneRatioNum * 100
  )

p_BP <- ggplot(df_BP, aes(x = GenePercent, y = reorder(Description, GenePercent))) +
  geom_point(aes(size = Count, color = p.adjust)) +
  labs(x = "GeneRatio (%)", y = "GO Biological Process") +
  theme_bw()

p_BP

MF (porcentaje)

df_MF <- ego_MF@result %>% 
  dplyr::filter(Count >= 3) %>%
  dplyr::mutate(
    GeneRatioNum = as.numeric(sub("/.*", "", GeneRatio)) /
      as.numeric(sub(".*/", "", GeneRatio)),
    GenePercent = GeneRatioNum * 100
  )

p_MF <- ggplot(df_MF, aes(x = GenePercent, y = reorder(Description, GenePercent))) +
  geom_point(aes(size = Count, color = p.adjust)) +
  labs(x = "GeneRatio (%)", y = "GO Molecular Function") +
  theme_bw()

p_MF