suppressPackageStartupMessages({
  library(magrittr)
  library(clusterProfiler)
  library(SummarizedExperiment)
  library(GenomicSignatures)
  library(AnnotationDbi)
  library(org.Hs.eg.db)
  library(DOSE)
})

1 GSEA on Cl4935_263

1.1 Load avgLoading

dat_dir <- "~/data2/Genomic_Super_Signature/refinebio/methods/5_Hierarchical_Clustering_with_SpikeInTCGA"
PCclusters <- readRDS(file.path(dat_dir, "PCclusters.rds"))

1.2 Format for enrichment analysis

al <- PCclusters$avgLoading[,263]   ## feature 1: numeric vector
names(al) <- AnnotationDbi::mapIds(org.Hs.eg.db, keys=names(al), column='ENTREZID', keytype='SYMBOL')   ## feature 2: named vector
## 'select()' returned 1:many mapping between keys and columns
al <- sort(al, decreasing = TRUE)   ## feature 3: decreasing order

geneList <- al
gene <- names(geneList)[abs(geneList) > mean(abs(geneList))]
keyword <- "breast"

1.3 GSEA

## MSigDB_C2
library(msigdbr)
m_c2 <- msigdbr(species = "Homo sapiens", category = "C2") %>% 
  dplyr::select(gs_name, entrez_gene)
msC2_2 <- GSEA(geneList, TERM2GENE = m_c2)
x <- msC2_2

## Disease Ontology
edo2 <- gseDO(geneList)

2 Visualization

2.1 Barplot

Visualize GSEA result using MSigDB_C2

library(clusterProfiler.dplyr)
## Registered S3 methods overwritten by 'clusterProfiler.dplyr':
##   method                         from           
##   arrange.compareClusterResult   clusterProfiler
##   arrange.enrichResult           clusterProfiler
##   arrange.gseaResult             clusterProfiler
##   filter.compareClusterResult    clusterProfiler
##   filter.enrichResult            clusterProfiler
##   filter.gseaResult              clusterProfiler
##   group_by.compareClusterResult  clusterProfiler
##   group_by.enrichResult          clusterProfiler
##   group_by.gseaResult            clusterProfiler
##   mutate.compareClusterResult    clusterProfiler
##   mutate.enrichResult            clusterProfiler
##   mutate.gseaResult              clusterProfiler
##   rename.compareClusterResult    clusterProfiler
##   rename.enrichResult            clusterProfiler
##   rename.gseaResult              clusterProfiler
##   select.compareClusterResult    clusterProfiler
##   select.enrichResult            clusterProfiler
##   select.gseaResult              clusterProfiler
##   slice.compareClusterResult     clusterProfiler
##   slice.enrichResult             clusterProfiler
##   slice.gseaResult               clusterProfiler
##   summarise.compareClusterResult clusterProfiler
##   summarise.enrichResult         clusterProfiler
##   summarise.gseaResult           clusterProfiler
## 
## Attaching package: 'clusterProfiler.dplyr'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:stats':
## 
##     filter

y <- mutate(x, ordering = abs(NES)) %>%
        arrange(desc(ordering)) 

library(ggstance)
library(enrichplot)
library(forcats)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:ggstance':
## 
##     geom_errorbarh, GeomErrorbarh

n <- 10
y_bar <- group_by(y, sign(NES)) %>%
            slice(1:n)
## Warning: The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## Please use the `.add` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ggplot(y_bar, aes(NES, fct_reorder(Description, NES), fill = qvalues), showCategory=(n*2)) + 
      geom_barh(stat='identity') + 
      scale_fill_continuous(low='red', high='blue', guide=guide_colorbar(reverse=TRUE)) + 
      theme_minimal() + ylab(NULL)

2.2 Dotplot

Visualize GSEA results for Disaese Ontology (= output from gseDO)

dotplot(edo2, showCategory = 10, font.size = 8,
        x = "GeneRatio",   # option -> c("GeneRatio", "Count")
        color = "p.adjust")   # option -> c("pvalue", "p.adjust", "qvalue")

2.3 Gene-Concept Network

The cnetplot depicts the linkeage of genes and biological concepts (e.g. GO terms or KEGG pathways) as a network.

n <- 3
## Error with `cnetplot` after applying any dplyr function
## Issue: https://github.com/YuLab-SMU/clusterProfiler.dplyr/issues/5
# y <- mutate(x, ordering = abs(NES)) %>%
#         group_by(sign(NES)) %>% 
#         slice(1:n)
# p1 <- cnetplot(y, showCategory = (n*2), colorEdge = TRUE, node_label = "category")
# cowplot::plot_grid(p1, ncol=1, labels=LETTERS[1], rel_widths=c(1))

p1 <- cnetplot(x, showCategory = (n*2), colorEdge = TRUE, node_label = "category")
cowplot::plot_grid(p1, ncol=1, labels=LETTERS[1], rel_widths=c(1))

2.4 Heatmap-like functional classification

2.5 Enrichment Map

p2 <- emapplot(y, showCategory = 10)
cowplot::plot_grid(p2, ncol = 1, lables = LETTERS[1])
## Error in FUN(X[[i]], ...) : object 'size' not found

2.6 UpSet Plot

library(ggupset)
upsetplot(edo2)

2.7 ridgeline plot for expressiong distribution

ridgeplot(msC2_2)

2.8 gseaplot

y2 <- arrange(x, desc(NES)) 

p1 <- gseaplot(y2, geneSetID = 1, title = y2$Description[1])   # max NES
n <- nrow(y2)
p2 <- gseaplot(y2, geneSetID = n, title = y2$Description[n])   # min NES
cowplot::plot_grid(p1, p2, ncol = 1, labels = LETTERS[1:2])

2.8.1 gseaplot2

p3 <- gseaplot2(y2, geneSetID = 1, title = y2$Description[1])
p4 <- gseaplot2(y2, geneSetID = n, title = y2$Description[n])   # min NES
cowplot::plot_grid(p3, p4, ncol = 1, labels = LETTERS[1:2])

Disease Ontology : “sporadic breast cancer” was ranked 96 out of 277 by NES

ind <- grep(keyword, edo2$Description, ignore.case = TRUE)
gseaplot2(edo2, geneSetID = ind, title = edo2$Description[ind])

gseaplot2(y2, geneSetID = 1:10)

2.8.2 gsearank

The gsearank function plot the ranked list of genes belong to the specific gene set.

gsearank(y2, geneSetID = 1, title = y2$Description[1])

2.9 PubMed trend of enriched terms

terms <- edo2$Description[1:3]
pmcplot(terms, 2010:2017, proportion=FALSE)