suppressPackageStartupMessages({
library(magrittr)
library(clusterProfiler)
library(SummarizedExperiment)
library(GenomicSignatures)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(DOSE)
})
dat_dir <- "~/data2/Genomic_Super_Signature/refinebio/methods/5_Hierarchical_Clustering_with_SpikeInTCGA"
PCclusters <- readRDS(file.path(dat_dir, "PCclusters.rds"))
al <- PCclusters$avgLoading[,263] ## feature 1: numeric vector
names(al) <- AnnotationDbi::mapIds(org.Hs.eg.db, keys=names(al), column='ENTREZID', keytype='SYMBOL') ## feature 2: named vector
## 'select()' returned 1:many mapping between keys and columns
al <- sort(al, decreasing = TRUE) ## feature 3: decreasing order
geneList <- al
gene <- names(geneList)[abs(geneList) > mean(abs(geneList))]
keyword <- "breast"
## MSigDB_C2
library(msigdbr)
m_c2 <- msigdbr(species = "Homo sapiens", category = "C2") %>%
dplyr::select(gs_name, entrez_gene)
msC2_2 <- GSEA(geneList, TERM2GENE = m_c2)
x <- msC2_2
## Disease Ontology
edo2 <- gseDO(geneList)
Visualize GSEA result using MSigDB_C2
library(clusterProfiler.dplyr)
## Registered S3 methods overwritten by 'clusterProfiler.dplyr':
## method from
## arrange.compareClusterResult clusterProfiler
## arrange.enrichResult clusterProfiler
## arrange.gseaResult clusterProfiler
## filter.compareClusterResult clusterProfiler
## filter.enrichResult clusterProfiler
## filter.gseaResult clusterProfiler
## group_by.compareClusterResult clusterProfiler
## group_by.enrichResult clusterProfiler
## group_by.gseaResult clusterProfiler
## mutate.compareClusterResult clusterProfiler
## mutate.enrichResult clusterProfiler
## mutate.gseaResult clusterProfiler
## rename.compareClusterResult clusterProfiler
## rename.enrichResult clusterProfiler
## rename.gseaResult clusterProfiler
## select.compareClusterResult clusterProfiler
## select.enrichResult clusterProfiler
## select.gseaResult clusterProfiler
## slice.compareClusterResult clusterProfiler
## slice.enrichResult clusterProfiler
## slice.gseaResult clusterProfiler
## summarise.compareClusterResult clusterProfiler
## summarise.enrichResult clusterProfiler
## summarise.gseaResult clusterProfiler
##
## Attaching package: 'clusterProfiler.dplyr'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
y <- mutate(x, ordering = abs(NES)) %>%
arrange(desc(ordering))
library(ggstance)
library(enrichplot)
library(forcats)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:ggstance':
##
## geom_errorbarh, GeomErrorbarh
n <- 10
y_bar <- group_by(y, sign(NES)) %>%
slice(1:n)
## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ggplot(y_bar, aes(NES, fct_reorder(Description, NES), fill = qvalues), showCategory=(n*2)) +
geom_barh(stat='identity') +
scale_fill_continuous(low='red', high='blue', guide=guide_colorbar(reverse=TRUE)) +
theme_minimal() + ylab(NULL)
Visualize GSEA results for Disaese Ontology (= output from gseDO
)
dotplot(edo2, showCategory = 10, font.size = 8,
x = "GeneRatio", # option -> c("GeneRatio", "Count")
color = "p.adjust") # option -> c("pvalue", "p.adjust", "qvalue")
The cnetplot
depicts the linkeage of genes and biological concepts (e.g. GO terms
or KEGG pathways) as a network.
n <- 3
## Error with `cnetplot` after applying any dplyr function
## Issue: https://github.com/YuLab-SMU/clusterProfiler.dplyr/issues/5
# y <- mutate(x, ordering = abs(NES)) %>%
# group_by(sign(NES)) %>%
# slice(1:n)
# p1 <- cnetplot(y, showCategory = (n*2), colorEdge = TRUE, node_label = "category")
# cowplot::plot_grid(p1, ncol=1, labels=LETTERS[1], rel_widths=c(1))
p1 <- cnetplot(x, showCategory = (n*2), colorEdge = TRUE, node_label = "category")
cowplot::plot_grid(p1, ncol=1, labels=LETTERS[1], rel_widths=c(1))
p2 <- emapplot(y, showCategory = 10)
cowplot::plot_grid(p2, ncol = 1, lables = LETTERS[1])
## Error in FUN(X[[i]], ...) : object 'size' not found
library(ggupset)
upsetplot(edo2)
ridgeplot(msC2_2)
y2 <- arrange(x, desc(NES))
p1 <- gseaplot(y2, geneSetID = 1, title = y2$Description[1]) # max NES
n <- nrow(y2)
p2 <- gseaplot(y2, geneSetID = n, title = y2$Description[n]) # min NES
cowplot::plot_grid(p1, p2, ncol = 1, labels = LETTERS[1:2])
gseaplot2
p3 <- gseaplot2(y2, geneSetID = 1, title = y2$Description[1])
p4 <- gseaplot2(y2, geneSetID = n, title = y2$Description[n]) # min NES
cowplot::plot_grid(p3, p4, ncol = 1, labels = LETTERS[1:2])
Disease Ontology : “sporadic breast cancer” was ranked 96 out of 277 by NES
ind <- grep(keyword, edo2$Description, ignore.case = TRUE)
gseaplot2(edo2, geneSetID = ind, title = edo2$Description[ind])
gseaplot2(y2, geneSetID = 1:10)
gsearank
The gsearank
function plot the ranked list of genes belong to the specific gene set.
gsearank(y2, geneSetID = 1, title = y2$Description[1])
terms <- edo2$Description[1:3]
pmcplot(terms, 2010:2017, proportion=FALSE)