rm(list = ls())
######################################___1. library
#installApp('WikiPathways')
#installApp('CyTargetLinker')
#installApp('stringApp')
#installApp('enrichmentMap')
load.libs <- c(
"DOSE","GO.db","GSEABase","org.Hs.eg.db","clusterProfiler","dplyr","tidyr",
"ggplot2","stringr",
"RColorBrewer","rWikiPathways","RCy3")
#install.packages("pacman")
#library(pacman)
#BiocManager::install("clusterProfiler")
#p_load(load.libs, update = TRUE, character.only = TRUE)
sapply(load.libs,require,character.only = TRUE)
## 载入需要的程辑包:DOSE
##
## DOSE v3.18.3 For help: https://guangchuangyu.github.io/software/DOSE
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
## 载入需要的程辑包:GO.db
## 载入需要的程辑包:AnnotationDbi
## 载入需要的程辑包:stats4
## 载入需要的程辑包:BiocGenerics
## 载入需要的程辑包:parallel
##
## 载入程辑包:'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## 载入需要的程辑包:Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## 载入需要的程辑包:IRanges
## 载入需要的程辑包:S4Vectors
##
## 载入程辑包:'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## 载入程辑包:'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## 载入需要的程辑包:GSEABase
## 载入需要的程辑包:annotate
## 载入需要的程辑包:XML
## 载入需要的程辑包:graph
##
## 载入程辑包:'graph'
## The following object is masked from 'package:XML':
##
## addNode
## 载入需要的程辑包:org.Hs.eg.db
##
## 载入需要的程辑包:clusterProfiler
## clusterProfiler v4.0.5 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141. doi: 10.1016/j.xinn.2021.100141
##
## 载入程辑包:'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## 载入需要的程辑包:dplyr
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:GSEABase':
##
## intersect, setdiff, union
## The following object is masked from 'package:graph':
##
## union
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## 载入需要的程辑包:tidyr
##
## 载入程辑包:'tidyr'
## The following object is masked from 'package:S4Vectors':
##
## expand
## 载入需要的程辑包:ggplot2
## 载入需要的程辑包:stringr
##
## 载入程辑包:'stringr'
## The following object is masked from 'package:graph':
##
## boundary
## 载入需要的程辑包:RColorBrewer
## 载入需要的程辑包:rWikiPathways
## 载入需要的程辑包:RCy3
##
## 载入程辑包:'RCy3'
## The following object is masked from 'package:XML':
##
## getNodePosition
## DOSE GO.db GSEABase org.Hs.eg.db clusterProfiler
## TRUE TRUE TRUE TRUE TRUE
## dplyr tidyr ggplot2 stringr RColorBrewer
## TRUE TRUE TRUE TRUE TRUE
## rWikiPathways RCy3
## TRUE TRUE
cytoscapePing()
## You are connected to Cytoscape!
############################################____2. Dataset
lung.expr <- read.csv(system.file("extdata","data-lung-cancer.csv", package="rWikiPathways"),stringsAsFactors = FALSE)
dim(lung.expr); head(lung.expr)
## [1] 36841 5
## 锘縂eneID GeneName log2FC P.Value adj.P.Value
## 1 ENSG00000000003 TSPAN6 1.2109203 0.030285815 0.201038947
## 2 ENSG00000000005 TNMD 1.6015344 0.171398244 0.444140353
## 3 ENSG00000000419 DPM1 0.4674649 0.294138000 0.564898506
## 4 ENSG00000000457 SCYL3 0.0544462 0.891635248 1.000000000
## 5 ENSG00000000460 C1orf112 1.4376887 0.004173094 0.100584579
## 6 ENSG00000000938 FGR -2.9959429 0.000004940 0.005027991
up.genes <- lung.expr[lung.expr$log2FC > 1 & lung.expr$adj.P.Value < 0.05, 1]
dn.genes <- lung.expr[lung.expr$log2FC < -1 & lung.expr$adj.P.Value < 0.05, 1]
bkgd.genes <- lung.expr[,1]
class(up.genes); class(dn.genes); class(bkgd.genes)
## [1] "character"
## [1] "character"
## [1] "character"
up.genes[1:5]; dn.genes[1:5]; bkgd.genes[1:5]
## [1] "ENSG00000006047" "ENSG00000006377" "ENSG00000008300" "ENSG00000009709"
## [5] "ENSG00000011347"
## [1] "ENSG00000000938" "ENSG00000004799" "ENSG00000004866" "ENSG00000004939"
## [5] "ENSG00000005844"
## [1] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" "ENSG00000000457"
## [5] "ENSG00000000460"
###############################################_____3. clusterProfiler convert Entrez IDs.
up.genes.entrez <- clusterProfiler::bitr(up.genes,fromType = "ENSEMBL",toType = "ENTREZID",OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(up.genes, fromType = "ENSEMBL", toType =
## "ENTREZID", : 4.18% of input gene IDs are fail to map...
head(up.genes.entrez)
## ENSEMBL ENTREZID
## 1 ENSG00000006047 51087
## 2 ENSG00000006377 1750
## 3 ENSG00000008300 1951
## 4 ENSG00000009709 5081
## 5 ENSG00000011347 9066
## 6 ENSG00000011426 54443
keytypes(org.Hs.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GENETYPE" "GO" "GOALL" "IPI" "MAP"
## [16] "OMIM" "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM"
## [21] "PMID" "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
## [26] "UNIPROT"
dn.genes.entrez <- bitr(dn.genes,fromType = "ENSEMBL",toType = "ENTREZID",OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(dn.genes, fromType = "ENSEMBL", toType = "ENTREZID", OrgDb =
## org.Hs.eg.db): 1.09% of input gene IDs are fail to map...
bkgd.genes.entrez <- bitr(bkgd.genes,fromType = "ENSEMBL",toType = "ENTREZID",OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(bkgd.genes, fromType = "ENSEMBL", toType = "ENTREZID", OrgDb =
## org.Hs.eg.db): 15.04% of input gene IDs are fail to map...
###############################################______4. Gene Ontology
egobp <- clusterProfiler::enrichGO(
gene = up.genes.entrez[[2]],
universe = bkgd.genes.entrez[[2]],
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "fdr",
pvalueCutoff = 0.05, #p.adjust cutoff (https://github.com/GuangchuangYu/clusterProfiler/issues/104)
readable = TRUE)
head(egobp,5)
## ID Description GeneRatio BgRatio
## GO:0007059 GO:0007059 chromosome segregation 40/319 330/18374
## GO:0140014 GO:0140014 mitotic nuclear division 37/319 296/18374
## GO:0098813 GO:0098813 nuclear chromosome segregation 34/319 268/18374
## GO:0000819 GO:0000819 sister chromatid segregation 29/319 194/18374
## GO:0000070 GO:0000070 mitotic sister chromatid segregation 27/319 164/18374
## pvalue p.adjust qvalue
## GO:0007059 2.270689e-22 7.397904e-19 6.611290e-19
## GO:0140014 3.234320e-21 5.268707e-18 4.708489e-18
## GO:0098813 8.934918e-20 9.703320e-17 8.671573e-17
## GO:0000819 5.293586e-19 4.311626e-16 3.853174e-16
## GO:0000070 6.966838e-19 4.539591e-16 4.056900e-16
## geneID
## GO:0007059 TRIP13/SPAG5/NDC80/BIRC5/KIF4A/CDC6/FAM83D/OIP5/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/HJURP/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/MKI67/EME1/BUB1B/MAD2L1/SKA3/PLK1/BUB1/UBE2C/RMI2/KNTC1/CDCA2/KIF18B
## GO:0140014 ANLN/TRIP13/SPAG5/NDC80/TPX2/KIF4A/CDC6/MYBL2/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/CCNB1/CDCA8/KIF23/KIF11/CENPE/KIF2C/NUF2/MKI67/CHEK1/BUB1B/CCNB2/CDC25C/MAD2L1/PLK1/BUB1/CDK1/MTBP/UBE2C/KNTC1/KIF18B
## GO:0098813 TRIP13/SPAG5/NDC80/KIF4A/CDC6/FAM83D/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/EME1/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1/KIF18B
## GO:0000819 TRIP13/SPAG5/NDC80/KIF4A/CDC6/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/KIF23/CENPE/KIF2C/NUF2/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1/KIF18B
## GO:0000070 TRIP13/SPAG5/NDC80/KIF4A/CDC6/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/CCNB1/CDCA8/KIF23/CENPE/KIF2C/NUF2/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/KNTC1/KIF18B
## Count
## GO:0007059 40
## GO:0140014 37
## GO:0098813 34
## GO:0000819 29
## GO:0000070 27
barplot(egobp, showCategory = 10)

dotplot(egobp, showCategory = 10)

goplot(egobp)
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggplot(egobp[1:10], aes(x=reorder(Description, -pvalue), y=Count, fill=-p.adjust)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_continuous(low="blue", high="red") +
labs(x = "", y = "", fill = "p.adjust") +
theme(axis.text=element_text(size=11))

############################################_______5. EnrichmentMap
head(egobp,3)
## ID Description GeneRatio BgRatio
## GO:0007059 GO:0007059 chromosome segregation 40/319 330/18374
## GO:0140014 GO:0140014 mitotic nuclear division 37/319 296/18374
## GO:0098813 GO:0098813 nuclear chromosome segregation 34/319 268/18374
## pvalue p.adjust qvalue
## GO:0007059 2.270689e-22 7.397904e-19 6.611290e-19
## GO:0140014 3.234320e-21 5.268707e-18 4.708489e-18
## GO:0098813 8.934918e-20 9.703320e-17 8.671573e-17
## geneID
## GO:0007059 TRIP13/SPAG5/NDC80/BIRC5/KIF4A/CDC6/FAM83D/OIP5/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/HJURP/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/MKI67/EME1/BUB1B/MAD2L1/SKA3/PLK1/BUB1/UBE2C/RMI2/KNTC1/CDCA2/KIF18B
## GO:0140014 ANLN/TRIP13/SPAG5/NDC80/TPX2/KIF4A/CDC6/MYBL2/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/CCNB1/CDCA8/KIF23/KIF11/CENPE/KIF2C/NUF2/MKI67/CHEK1/BUB1B/CCNB2/CDC25C/MAD2L1/PLK1/BUB1/CDK1/MTBP/UBE2C/KNTC1/KIF18B
## GO:0098813 TRIP13/SPAG5/NDC80/KIF4A/CDC6/FAM83D/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/EME1/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1/KIF18B
## Count
## GO:0007059 40
## GO:0140014 37
## GO:0098813 34
class(egobp)
## [1] "enrichResult"
## attr(,"package")
## [1] "DOSE"
## extract a dataframe with results from object of type enrichResult
egobp.results.df <- egobp@result
class(egobp.results.df);dim(egobp.results.df) #[1] 3258 9
## [1] "data.frame"
## [1] 3258 9
heads(egobp.results.df,3)
## group group_name
## 1 1 ID
## 2 1 ID
## 3 1 ID
## 4 2 Description
## 5 2 Description
## 6 2 Description
## 7 3 GeneRatio
## 8 3 GeneRatio
## 9 3 GeneRatio
## 10 4 BgRatio
## 11 4 BgRatio
## 12 4 BgRatio
## 13 5 pvalue
## 14 5 pvalue
## 15 5 pvalue
## 16 6 p.adjust
## 17 6 p.adjust
## 18 6 p.adjust
## 19 7 qvalue
## 20 7 qvalue
## 21 7 qvalue
## 22 8 geneID
## 23 8 geneID
## 24 8 geneID
## 25 9 Count
## 26 9 Count
## 27 9 Count
## value
## 1 GO:0007059
## 2 GO:0140014
## 3 GO:0098813
## 4 chromosome segregation
## 5 mitotic nuclear division
## 6 nuclear chromosome segregation
## 7 40/319
## 8 37/319
## 9 34/319
## 10 330/18374
## 11 296/18374
## 12 268/18374
## 13 2.2706888539982e-22
## 14 3.2343196880045e-21
## 15 8.93491754044101e-20
## 16 7.39790428632613e-19
## 17 5.26870677175932e-18
## 18 9.70332044891893e-17
## 19 6.61128986332528e-19
## 20 4.70848855632655e-18
## 21 8.67157260240696e-17
## 22 TRIP13/SPAG5/NDC80/BIRC5/KIF4A/CDC6/FAM83D/OIP5/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/HJURP/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/MKI67/EME1/BUB1B/MAD2L1/SKA3/PLK1/BUB1/UBE2C/RMI2/KNTC1/CDCA2/KIF18B
## 23 ANLN/TRIP13/SPAG5/NDC80/TPX2/KIF4A/CDC6/MYBL2/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/CCNB1/CDCA8/KIF23/KIF11/CENPE/KIF2C/NUF2/MKI67/CHEK1/BUB1B/CCNB2/CDC25C/MAD2L1/PLK1/BUB1/CDK1/MTBP/UBE2C/KNTC1/KIF18B
## 24 TRIP13/SPAG5/NDC80/KIF4A/CDC6/FAM83D/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/EME1/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1/KIF18B
## 25 40
## 26 37
## 27 34
#View(egobp.results.df)
## create a new column for term size from BgRatio
egobp.results.df$term.size <- gsub("/(\\d+)", "", egobp.results.df$BgRatio)
heads(egobp.results.df,3)
## group group_name
## 1 1 ID
## 2 1 ID
## 3 1 ID
## 4 2 Description
## 5 2 Description
## 6 2 Description
## 7 3 GeneRatio
## 8 3 GeneRatio
## 9 3 GeneRatio
## 10 4 BgRatio
## 11 4 BgRatio
## 12 4 BgRatio
## 13 5 pvalue
## 14 5 pvalue
## 15 5 pvalue
## 16 6 p.adjust
## 17 6 p.adjust
## 18 6 p.adjust
## 19 7 qvalue
## 20 7 qvalue
## 21 7 qvalue
## 22 8 geneID
## 23 8 geneID
## 24 8 geneID
## 25 9 Count
## 26 9 Count
## 27 9 Count
## 28 10 term.size
## 29 10 term.size
## 30 10 term.size
## value
## 1 GO:0007059
## 2 GO:0140014
## 3 GO:0098813
## 4 chromosome segregation
## 5 mitotic nuclear division
## 6 nuclear chromosome segregation
## 7 40/319
## 8 37/319
## 9 34/319
## 10 330/18374
## 11 296/18374
## 12 268/18374
## 13 2.2706888539982e-22
## 14 3.2343196880045e-21
## 15 8.93491754044101e-20
## 16 7.39790428632613e-19
## 17 5.26870677175932e-18
## 18 9.70332044891893e-17
## 19 6.61128986332528e-19
## 20 4.70848855632655e-18
## 21 8.67157260240696e-17
## 22 TRIP13/SPAG5/NDC80/BIRC5/KIF4A/CDC6/FAM83D/OIP5/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/HJURP/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/MKI67/EME1/BUB1B/MAD2L1/SKA3/PLK1/BUB1/UBE2C/RMI2/KNTC1/CDCA2/KIF18B
## 23 ANLN/TRIP13/SPAG5/NDC80/TPX2/KIF4A/CDC6/MYBL2/NCAPG/TTK/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/CCNB1/CDCA8/KIF23/KIF11/CENPE/KIF2C/NUF2/MKI67/CHEK1/BUB1B/CCNB2/CDC25C/MAD2L1/PLK1/BUB1/CDK1/MTBP/UBE2C/KNTC1/KIF18B
## 24 TRIP13/SPAG5/NDC80/KIF4A/CDC6/FAM83D/NCAPG/TTK/ECT2/CDC20/NEK2/CENPF/KIF14/CENPK/DLGAP5/SGO1/TOP2A/CCNB1/CDCA8/BRIP1/KIF23/KNL1/CENPE/KIF2C/NUF2/EME1/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1/KIF18B
## 25 40
## 26 37
## 27 34
## 28 330
## 29 296
## 30 268
## filter for term size to keep only term.size => 3, gene count >= 5 and subset
egobp.results.df <- egobp.results.df[which(egobp.results.df[,'term.size'] >= 5 & egobp.results.df[,'Count'] >= 10),]
egobp.results.df <- egobp.results.df[c("ID", "Description", "pvalue", "qvalue", "geneID")]
dim(egobp.results.df) #[1] 308 5
## [1] 17 5
head(egobp.results.df, 6)
## ID Description
## GO:0051983 GO:0051983 regulation of chromosome segregation
## GO:0030071 GO:0030071 regulation of mitotic metaphase/anaphase transition
## GO:0033045 GO:0033045 regulation of sister chromatid segregation
## GO:0007091 GO:0007091 metaphase/anaphase transition of mitotic cell cycle
## GO:1902099 GO:1902099 regulation of metaphase/anaphase transition of cell cycle
## GO:0010965 GO:0010965 regulation of mitotic sister chromatid separation
## pvalue qvalue
## GO:0051983 8.295903e-15 3.019272e-12
## GO:0030071 5.664431e-14 1.832493e-11
## GO:0033045 6.332808e-14 1.843847e-11
## GO:0007091 9.707013e-14 2.569339e-11
## GO:1902099 1.260634e-13 3.058695e-11
## GO:0010965 2.094553e-13 4.356040e-11
## geneID
## GO:0051983 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/KIF2C/MKI67/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1
## GO:0030071 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/KNTC1
## GO:0033045 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/RMI2/KNTC1
## GO:0007091 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/KNTC1
## GO:1902099 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/KNTC1
## GO:0010965 TRIP13/NDC80/CDC6/TTK/CDC20/CENPF/DLGAP5/CCNB1/CENPE/BUB1B/MAD2L1/PLK1/BUB1/UBE2C/KNTC1
## format gene list column
egobp.results.df$geneID <- gsub("/", ",", egobp.results.df$geneID)
head(egobp.results.df, 6)
## ID Description
## GO:0051983 GO:0051983 regulation of chromosome segregation
## GO:0030071 GO:0030071 regulation of mitotic metaphase/anaphase transition
## GO:0033045 GO:0033045 regulation of sister chromatid segregation
## GO:0007091 GO:0007091 metaphase/anaphase transition of mitotic cell cycle
## GO:1902099 GO:1902099 regulation of metaphase/anaphase transition of cell cycle
## GO:0010965 GO:0010965 regulation of mitotic sister chromatid separation
## pvalue qvalue
## GO:0051983 8.295903e-15 3.019272e-12
## GO:0030071 5.664431e-14 1.832493e-11
## GO:0033045 6.332808e-14 1.843847e-11
## GO:0007091 9.707013e-14 2.569339e-11
## GO:1902099 1.260634e-13 3.058695e-11
## GO:0010965 2.094553e-13 4.356040e-11
## geneID
## GO:0051983 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,KIF2C,MKI67,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,RMI2,KNTC1
## GO:0030071 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:0033045 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,RMI2,KNTC1
## GO:0007091 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:1902099 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:0010965 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## add column for phenotype
egobp.results.df <- cbind(egobp.results.df, phenotype=1)
egobp.results.df <- egobp.results.df[, c(1, 2, 3, 4, 6, 5)]
## change column headers
colnames(egobp.results.df) <- c("Name","Description", "pvalue","qvalue","phenotype", "genes")
head(egobp.results.df, 6)
## Name Description
## GO:0051983 GO:0051983 regulation of chromosome segregation
## GO:0030071 GO:0030071 regulation of mitotic metaphase/anaphase transition
## GO:0033045 GO:0033045 regulation of sister chromatid segregation
## GO:0007091 GO:0007091 metaphase/anaphase transition of mitotic cell cycle
## GO:1902099 GO:1902099 regulation of metaphase/anaphase transition of cell cycle
## GO:0010965 GO:0010965 regulation of mitotic sister chromatid separation
## pvalue qvalue phenotype
## GO:0051983 8.295903e-15 3.019272e-12 1
## GO:0030071 5.664431e-14 1.832493e-11 1
## GO:0033045 6.332808e-14 1.843847e-11 1
## GO:0007091 9.707013e-14 2.569339e-11 1
## GO:1902099 1.260634e-13 3.058695e-11 1
## GO:0010965 2.094553e-13 4.356040e-11 1
## genes
## GO:0051983 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,KIF2C,MKI67,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,RMI2,KNTC1
## GO:0030071 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:0033045 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,RMI2,KNTC1
## GO:0007091 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:1902099 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
## GO:0010965 TRIP13,NDC80,CDC6,TTK,CDC20,CENPF,DLGAP5,CCNB1,CENPE,BUB1B,MAD2L1,PLK1,BUB1,UBE2C,KNTC1
dim(egobp.results.df) #[1] 308 6
## [1] 17 6
#View(head(egobp.results.df, 6))
setwd("C:\\Users\\liyix\\OneDrive\\Desktop\\")
egobp.results.filename <- file.path(getwd(),paste("clusterprofiler_cluster_enr_results.txt",sep="_"))
write.table(egobp.results.df,egobp.results.filename,col.name=TRUE,sep="\t",row.names=FALSE,quote=FALSE)
em_command = paste('enrichmentmap build analysisType="generic" ',
'pvalue=',"0.05", 'qvalue=',"0.05",
'similaritycutoff=',"0.25",
'coeffecients=',"JACCARD",
'enrichmentsDataset1=',egobp.results.filename ,
sep=" ")
em_command
## [1] "enrichmentmap build analysisType=\"generic\" pvalue= 0.05 qvalue= 0.05 similaritycutoff= 0.25 coeffecients= JACCARD enrichmentsDataset1= C:/Users/liyix/OneDrive/Desktop/clusterprofiler_cluster_enr_results.txt"
#enrichment map command will return the suid of newly created network.
em_network_suid <- commandsRun(em_command)
em_network_suid
## [1] "2077"
renameNetwork("Cluster1_enrichmentmap_cp", network=as.numeric(em_network_suid))
#export the network
exportImage(em_network_suid, type = "png")
## file
## "C:\\Users\\liyix\\OneDrive\\Desktop\\2077.png"
# Run the AutoAnnotate command
aa_command <- paste("autoannotate annotate-clusterBoosted",
"clusterAlgorithm=MCL",
"labelColumn=EnrichmentMap::GS_DESCR",
"maxWords=3")
print(aa_command)
## [1] "autoannotate annotate-clusterBoosted clusterAlgorithm=MCL labelColumn=EnrichmentMap::GS_DESCR maxWords=3"
commandsGET(aa_command)
## [1] "{label:anaphase transition regulation"
## [2] "nodes:2198"
## [3] "2192"
## [4] "2189"
## [5] "2186"
## [6] "2183"
## [7] "2177"
## [8] "2174"
## [9] "2165"
## [10] "2162"
## [11] "2159"
## [12] "2156"
## [13] "2153"
## [14] "2150}"
## [15] "{label:establishment chromosome localization"
## [16] "nodes:2195"
## [17] "2180"
## [18] "2171}"
exportImage(em_network_suid, type = "png")
## file
## "C:\\Users\\liyix\\OneDrive\\Desktop\\2077.png"
#View(egobp.results.df)
# Annotate a subnetwork
createSubnetwork(c(1:2),"__mclCluster")
## network
## 2701
commandsGET(aa_command)
## [1] "{label:anaphase transition regulation"
## [2] "nodes:2189"
## [3] "2156"
## [4] "2186"
## [5] "2153"
## [6] "2183"
## [7] "2150"
## [8] "2177"
## [9] "2174"
## [10] "2198"
## [11] "2165"
## [12] "2162"
## [13] "2192"
## [14] "2159}"
## [15] "{label:establishment chromosome localization"
## [16] "nodes:2180"
## [17] "2171"
## [18] "2195}"
exportImage('tutorial_image2', type='PDF') #.pdf
## file
## "C:\\Users\\liyix\\OneDrive\\Desktop\\tutorial_image2.pdf"
#ref https://bioconductor.org/packages/devel/bioc/vignettes/rWikiPathways/inst/doc/Pathway-Analysis.html
#ref https://bioconductor.github.io/BiocWorkshops/cytoscape-automation-in-r-using-rcy3.html