GSEA

从差异基因开始的GSEA分析注意此时应纳入所有的基因,但logFC已经计算完成

require(tidyverse)

## Loading required package: tidyverse

## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --

## √ ggplot2 3.2.0     √ purrr   0.3.2
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   0.8.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0

## Warning: package 'dplyr' was built under R version 3.6.1

## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

require(ggplot2)
require(clusterProfiler)

## Loading required package: clusterProfiler

##

## Registered S3 method overwritten by 'enrichplot':
##   method               from
##   fortify.enrichResult DOSE

## clusterProfiler v3.12.0  For help: https://guangchuangyu.github.io/software/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.

## 
## Attaching package: 'clusterProfiler'

## The following object is masked from 'package:purrr':
## 
##     simplify

require(org.Hs.eg.db)

## Loading required package: org.Hs.eg.db

## Loading required package: AnnotationDbi

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unsplit, which,
##     which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: IRanges

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following object is masked from 'package:base':
## 
##     expand.grid

## 
## Attaching package: 'IRanges'

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## The following object is masked from 'package:purrr':
## 
##     reduce

## The following object is masked from 'package:grDevices':
## 
##     windows

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:dplyr':
## 
##     select

##

require(enrichplot)

## Loading required package: enrichplot

### GSEA
## DEG_3读入所有基因
DEG_3<-read.csv("F:/Bioinfor_project/Breast/AS_research/AS/result/TNBC_mRNA_edgeR.csv",row.names = 1,header = TRUE)
DEG_3<-DEG_3 %>% 
  rownames_to_column("genename") 
dim(DEG_3)

## [1] 24166     6

head(DEG_3)

##   genename     logFC   logCPM        F       PValue          FDR
## 1      CA4 -2.081217 5.566371 581.0757 5.922684e-65 1.431276e-60
## 2    VEGFD -1.208131 5.835019 720.0615 1.387734e-64 1.676799e-60
## 3  CD300LG -1.353763 5.918397 502.2992 7.848027e-60 6.321847e-56
## 4 HSD17B13 -1.472940 5.460879 481.1757 2.303186e-58 1.248068e-54
## 5     MYOC -2.603188 5.361846 480.4715 2.582281e-58 1.248068e-54
## 6   ATP1A2 -1.401117 5.864584 474.6783 6.645676e-58 2.676657e-54

genelist<-DEG_3$genename

## ID转换
genelist<- genelist %>%  bitr(fromType = "SYMBOL",
        toType = c("ENSEMBL", "ENTREZID"),
        OrgDb = org.Hs.eg.db)

## 'select()' returned 1:many mapping between keys and columns

## Warning in bitr(., fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 2.58% of input gene IDs are fail to map...

head(genelist)

##     SYMBOL         ENSEMBL ENTREZID
## 1      CA4 ENSG00000167434      762
## 2    VEGFD ENSG00000165197     2277
## 3  CD300LG ENSG00000161649   146894
## 4 HSD17B13 ENSG00000170509   345275
## 5     MYOC ENSG00000034971     4653
## 6   ATP1A2 ENSG00000018625      477

dim(genelist)## 1689

## [1] 26062     3

colnames(genelist)[1]<-"genename"
DEG_3<-genelist %>%
  inner_join(DEG_3,by='genename') %>% 
  ## select配合everything排序把，改变变量顺序
  dplyr::select(ENTREZID,logFC,everything())
head(DEG_3)

##   ENTREZID     logFC genename         ENSEMBL   logCPM        F
## 1      762 -2.081217      CA4 ENSG00000167434 5.566371 581.0757
## 2     2277 -1.208131    VEGFD ENSG00000165197 5.835019 720.0615
## 3   146894 -1.353763  CD300LG ENSG00000161649 5.918397 502.2992
## 4   345275 -1.472940 HSD17B13 ENSG00000170509 5.460879 481.1757
## 5     4653 -2.603188     MYOC ENSG00000034971 5.361846 480.4715
## 6      477 -1.401117   ATP1A2 ENSG00000018625 5.864584 474.6783
##         PValue          FDR
## 1 5.922684e-65 1.431276e-60
## 2 1.387734e-64 1.676799e-60
## 3 7.848027e-60 6.321847e-56
## 4 2.303186e-58 1.248068e-54
## 5 2.582281e-58 1.248068e-54
## 6 6.645676e-58 2.676657e-54

### 构建genelist  
geneList<-DEG_3$logFC
names(geneList)<-as.character(DEG_3$ENTREZID)
geneList<-geneList[!duplicated(names(geneList))]
geneList<-sort(geneList,decreasing=TRUE)
head(geneList)##构建geneList对象成功

## 100038246     30012    158511 106146148     53940      3651 
##  3.886999  3.770181  3.593276  3.579486  3.425571  3.283401

Msigdbr

MsigDB数据库包括了多种关键的基因集
MsigDB ### msigdbr-GSEA

require(enrichplot)
library(msigdbr)

## Warning: package 'msigdbr' was built under R version 3.6.1

msigdbr_show_species()

##  [1] "Bos taurus"               "Caenorhabditis elegans"  
##  [3] "Canis lupus familiaris"   "Danio rerio"             
##  [5] "Drosophila melanogaster"  "Gallus gallus"           
##  [7] "Homo sapiens"             "Mus musculus"            
##  [9] "Rattus norvegicus"        "Saccharomyces cerevisiae"
## [11] "Sus scrofa"

## 获取所有人类genesets
#m_df <- msigdbr(species = "Homo sapiens")
#head(m_df, 2) %>% as.data.frame

## 获取感兴趣的genesets
## Msigdbr包括H:hallmark genes, C1-C7等
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
head(m_t2g)

## # A tibble: 6 x 2
##   gs_name               entrez_gene
##   <chr>                       <int>
## 1 HALLMARK_ADIPOGENESIS          19
## 2 HALLMARK_ADIPOGENESIS       11194
## 3 HALLMARK_ADIPOGENESIS       10449
## 4 HALLMARK_ADIPOGENESIS          33
## 5 HALLMARK_ADIPOGENESIS          34
## 6 HALLMARK_ADIPOGENESIS          35

##
em <- GSEA(geneList,nPerm = 10000 ,TERM2GENE = m_t2g,pvalueCutoff = 0.05,verbose = FALSE)

## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize = minGSSize, : There are ties in the preranked stats (0.01% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

em[1:5,1:5]

##                                                ID              Description
## HALLMARK_MYC_TARGETS_V1   HALLMARK_MYC_TARGETS_V1  HALLMARK_MYC_TARGETS_V1
## HALLMARK_E2F_TARGETS         HALLMARK_E2F_TARGETS     HALLMARK_E2F_TARGETS
## HALLMARK_G2M_CHECKPOINT   HALLMARK_G2M_CHECKPOINT  HALLMARK_G2M_CHECKPOINT
## HALLMARK_MITOTIC_SPINDLE HALLMARK_MITOTIC_SPINDLE HALLMARK_MITOTIC_SPINDLE
## HALLMARK_MYC_TARGETS_V2   HALLMARK_MYC_TARGETS_V2  HALLMARK_MYC_TARGETS_V2
##                          setSize enrichmentScore      NES
## HALLMARK_MYC_TARGETS_V1      195       0.5475417 1.847455
## HALLMARK_E2F_TARGETS         191       0.6713205 2.258858
## HALLMARK_G2M_CHECKPOINT      192       0.6568366 2.211457
## HALLMARK_MITOTIC_SPINDLE     199       0.4619989 1.562069
## HALLMARK_MYC_TARGETS_V2       57       0.6113801 1.730700

#write.csv(em,"F:/Bioinfor_project/Breast/AS_research/AS/result/GSEA_hallmark_enrich.csv")

## 整体GSEA分析富集情况
dotplot(em,showCategory=12,split=".sign")+facet_grid(~.sign)##

## wrong orderBy parameter; set to default `orderBy = "x"`

#ggsave(filename = "GSEA_dot_plot.pdf",width = 10,height =6)
gseaplot2(em, geneSetID = 1, title = em$ID[1],color="red",pvalue_table=T)

#ggsave(filename = "GSEA_dot_plot_ID1.pdf",width = 8,height =5)
gseaplot2(em, geneSetID = 1:3, title ="Activated Pathway",pvalue_table=T)

#ggsave(filename = "GSEA_dot_plot_Activated.pdf",width = 8,height =5 )

批量绘图-保存为单个文件

for循环法

require(cowplot)

## Loading required package: cowplot

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggplot2':
## 
##     ggsave

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:BiocGenerics':
## 
##     combine

## The following object is masked from 'package:dplyr':
## 
##     combine

p<-list()
for (i in c(1:11)){
  p[[i]]<-gseaplot2(em, geneSetID = i, title=em$ID[i],color="red",pvalue_table=T)
  ggsave(sprintf("GSEAp%s.pdf", i), width = 8, height = 6, onefile = T)##批量保存
  p[[i]]
  dev.off()
}
p3<-marrangeGrob(p,nrow = 2,ncol=2)##marrangeGrob接受list对象组图，指定ncol,nrow
p3

#ggsave("all_gsea.pdf",width = 14,height = 8,plot = p3)

lapply批量出图

实现一键绘制感兴趣的图

GSEAplot<-function(x){
    gseaplot2(em, geneSetID =x,title=em$ID[x],color="red",subplots = ,
              pvalue_table=T)
}
GSEAplot(1)

nrow(em@result)## GSEA富集结果数

## [1] 11

p4<-lapply(c(1:nrow(em@result)), GSEAplot)## 批量绘制
print(p4)

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

GSEA

Chard Liu

2019年7月24日

GSEA

Msigdbr

批量绘图-保存为单个文件

lapply批量出图