Colocar estos archivos en el mismo directorio que este
.Rmd:
ESvsCTA.tsvESAvsES.tsvESvsCTA+ESAvsES-.tsvlibrary(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(clusterProfiler)
##
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
##
## Please cite:
##
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan,
## X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal
## enrichment tool for interpreting omics data. The Innovation. 2021,
## 2(3):100141
##
## Adjuntando el paquete: 'clusterProfiler'
## The following object is masked from 'package:stats':
##
## filter
library(org.Mm.eg.db)
## Cargando paquete requerido: AnnotationDbi
## Cargando paquete requerido: stats4
## Cargando paquete requerido: BiocGenerics
##
## Adjuntando el paquete: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Cargando paquete requerido: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Cargando paquete requerido: IRanges
## Cargando paquete requerido: S4Vectors
##
## Adjuntando el paquete: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Adjuntando el paquete: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
##
## Adjuntando el paquete: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
library(ggplot2)
ESvsCTA <- read.delim(
"ESvsCTA.tsv",
header = TRUE,
row.names = 1,
stringsAsFactors = FALSE
)
ESAvsES <- read.delim(
"ESAvsES.tsv",
header = TRUE,
row.names = 1,
stringsAsFactors = FALSE
)
universe_ensembl <- intersect(
rownames(ESvsCTA),
rownames(ESAvsES)
)
length(universe_ensembl)
## [1] 15969
data_rev <- read.delim(
"ESvsCTA+ESAvsES-.tsv",
header = TRUE,
row.names = 1,
stringsAsFactors = FALSE
)
rev_ensembl <- rownames(data_rev)
length(rev_ensembl)
## [1] 74
rev_symbol <- data_rev$SYMBOL
names(rev_symbol) <- rev_ensembl
# Mapeo principal ENSEMBL -> ENTREZID
map_ens <- bitr(
rev_ensembl,
fromType = "ENSEMBL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db
) %>% distinct(ENSEMBL, ENTREZID)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(rev_ensembl, fromType = "ENSEMBL", toType = "ENTREZID", :
## 39.19% of input gene IDs are fail to map...
# ENSEMBL que no mapearon
missing_ens <- setdiff(rev_ensembl, map_ens$ENSEMBL)
# Rescate por SYMBOL para los faltantes
missing_symbols <- rev_symbol[missing_ens]
missing_symbols <- missing_symbols[!is.na(missing_symbols) & missing_symbols != ""]
map_sym <- bitr(
unique(missing_symbols),
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db
) %>% distinct(SYMBOL, ENTREZID)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(unique(missing_symbols), fromType = "SYMBOL", toType =
## "ENTREZID", : 65.52% of input gene IDs are fail to map...
rescue_tbl <- tibble(
ENSEMBL = names(missing_symbols),
SYMBOL = as.character(missing_symbols)
) %>%
dplyr::left_join(map_sym, by = "SYMBOL") %>%
dplyr::select(ENSEMBL, ENTREZID) %>%
dplyr::filter(!is.na(ENTREZID)) %>%
dplyr::distinct()
map_final <- dplyr::bind_rows(map_ens, rescue_tbl) %>%
dplyr::distinct(ENSEMBL, ENTREZID)
# Lista final en ENTREZ (puede incluir genes que luego no estén en el universo ENTREZ por diferencias de mapeo)
rev_entrez_raw <- unique(map_final$ENTREZID)
universe_conv <- bitr(
universe_ensembl,
fromType = "ENSEMBL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db
) %>% dplyr::distinct(ENSEMBL, ENTREZID)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(universe_ensembl, fromType = "ENSEMBL", toType = "ENTREZID", :
## 3.45% of input gene IDs are fail to map...
universe_entrez <- unique(universe_conv$ENTREZID)
length(universe_entrez)
## [1] 15475
# Asegurar consistencia: mantener solo genes de la lista presentes en el universo ENTREZ
rev_entrez <- intersect(rev_entrez_raw, universe_entrez)
message("Genes con reversión (ENTREZ) antes de ajustar al universo: ", length(rev_entrez_raw))
## Genes con reversión (ENTREZ) antes de ajustar al universo: 56
message("Genes con reversión (ENTREZ) usados en ORA (presentes en el universo): ", length(rev_entrez))
## Genes con reversión (ENTREZ) usados en ORA (presentes en el universo): 47
Nota: análisis exploratorio. No se filtra por pvalue/qvalue en enrichGO.
ego_BP <- enrichGO(
gene = rev_entrez,
universe = universe_entrez,
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE
)
ego_MF <- enrichGO(
gene = rev_entrez,
universe = universe_entrez,
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE
)
# Exportar resultados completos (para Excel)
write.csv(ego_BP@result, "ORA_GO_BP_ESvsCTA+ESAvsES-.csv", row.names = FALSE)
write.csv(ego_MF@result, "ORA_GO_MF_ESvsCTA+ESAvsES-.csv", row.names = FALSE)
# Vista rápida
head(ego_BP@result[, c("ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","Count")], 10)
head(ego_MF@result[, c("ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","Count")], 10)
df_BP <- ego_BP@result %>%
dplyr::filter(Count >= 3) %>%
dplyr::mutate(
GeneRatioNum = as.numeric(sub("/.*", "", GeneRatio)) /
as.numeric(sub(".*/", "", GeneRatio)),
GenePercent = GeneRatioNum * 100
)
p_BP <- ggplot(df_BP, aes(x = GenePercent, y = reorder(Description, GenePercent))) +
geom_point(aes(size = Count, color = p.adjust)) +
labs(x = "GeneRatio (%)", y = "GO Biological Process") +
theme_bw()
p_BP
df_MF <- ego_MF@result %>%
dplyr::filter(Count >= 3) %>%
dplyr::mutate(
GeneRatioNum = as.numeric(sub("/.*", "", GeneRatio)) /
as.numeric(sub(".*/", "", GeneRatio)),
GenePercent = GeneRatioNum * 100
)
p_MF <- ggplot(df_MF, aes(x = GenePercent, y = reorder(Description, GenePercent))) +
geom_point(aes(size = Count, color = p.adjust)) +
labs(x = "GeneRatio (%)", y = "GO Molecular Function") +
theme_bw()
p_MF