install.packages("remotes", dependencies = T)
remotes::install_github("WTaoUMC/RegEnrich", dependencies = T)
Here we show how to apply RegEnrich on the RNAseq data by analyzing Kang et al’s monocyte-macrophage-IFN stimulation dataset ( GSE130567). There are multiple experiment conditions in this study. But here we would like to focus on partial samples in which monocytes were cultured with 10 ng/ml human macrophage colonystimulating factor (M-CSF) in the presence (IFN-γ-primed macrophages) or absence (resting macrophages) of 100 U/ml human IFN-γ for 48 h. RNA were extracted and reverse transcripted followed by sequencing (50 bp, paired-end) using Illumina HiSeq 4000. Sequenced reads were mapped to reference human genome (hg19 assembly) using STAR aligner with default parameters. We will use the raw HT-seq counts for the RegEnrich analysis.
library(RegEnrich)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: tibble
## Loading required package: BiocSet
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
library(GEOquery)
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x IRanges::collapse() masks dplyr::collapse()
## x Biobase::combine() masks dplyr::combine(), BiocGenerics::combine()
## x matrixStats::count() masks dplyr::count()
## x IRanges::desc() masks dplyr::desc()
## x tidyr::expand() masks S4Vectors::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks S4Vectors::first()
## x dplyr::lag() masks stats::lag()
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## x dplyr::rename() masks S4Vectors::rename()
## x IRanges::slice() masks dplyr::slice()
eset <- getGEO(GEO = "GSE130567")[[1]] #ExpressionSet unpacked
## Found 1 file(s)
## GSE130567_series_matrix.txt.gz
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## File stored at:
## /tmp/RtmpnsVA2P/GPL20301.soft
pdata <- pData(eset)[, c("title", "geo_accession", "cultured in:ch1", "treatment:ch1")]
colnames(pdata) <- c("title", "accession", "cultured", "treatment")
pData(eset) <- pdata
# Estructura: eset[featureData (transcritos o genes) , phenoData (muestras: GSM)]
# Only samples cultured with M-CSF in the presence or absence of IFN-γ
eset <- eset[, pdata$treatment %in% c("NT", "IFNG-3h") & pdata$cultured == "M-CSF"]
# Sample information (muestras: GSM)
sampleInfo <- pData(eset)
rownames(sampleInfo) <- paste0(rep(c("Resting", "IFNG"), each = 3), 1:3)
sampleInfo$treatment <- factor(rep(c("Resting", "IFNG"), each = 3),
levels = c("Resting", "IFNG"))
Download read count file and decompose into a temporary folder. Desde https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE130567 Bajamos GSE130567_RAW.tar Botón derecho - copiar ubicación del enlace: https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE130567&format=file
tmpFolder = tempdir()
tmpFile = tempfile(pattern = "GSE130567_", tmpdir = tmpFolder, fileext = ".tar")
download.file("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE130567&format=file",
destfile = tmpFile, mode = "wb")
untar(tmpFile, exdir = tmpFolder)
files = untar(tmpFile, list = TRUE)
filesFull = file.path(tmpFolder, files)
Then read the raw read counts in these files.
(usaremos funciones de R base preferetement)
dat <- list()
for (file in filesFull){
accID<-gsub(".*(GSM\\d+).*", "\\1", file) # reprint using \\1
if(accID %in% sampleInfo$accession){
zz<-gzfile(file, "rt") #A connection with
zzdata<-read.csv(zz, header = FALSE, sep = "\t", skip = 4, row.names = 1)
close(zz)
zzdata<-zzdata[,1, drop = FALSE] # Extract the first numeric column
colnames(zzdata)<-accID
dat<-c(dat, list(zzdata)) #Grow a list
}
}
# lo mismo que purrr::reduce(dat,cbind)
edata0 = do.call(cbind, dat)
edata = edata0[grep(".*[0-9]+$", rownames(edata0)),] # remove PAR locus genes
#check
rownames(edata0)[!rownames(edata0) %in% rownames(edata)]
## [1] "ENSG00000228572.7_PAR_Y" "ENSG00000182378.13_PAR_Y"
## [3] "ENSG00000178605.13_PAR_Y" "ENSG00000226179.6_PAR_Y"
## [5] "ENSG00000167393.17_PAR_Y" "ENSG00000281849.3_PAR_Y"
## [7] "ENSG00000275287.5_PAR_Y" "ENSG00000280767.3_PAR_Y"
## [9] "ENSG00000234958.6_PAR_Y" "ENSG00000229232.6_PAR_Y"
## [11] "ENSG00000185960.14_PAR_Y" "ENSG00000237531.6_PAR_Y"
## [13] "ENSG00000225661.7_PAR_Y" "ENSG00000205755.11_PAR_Y"
## [15] "ENSG00000198223.16_PAR_Y" "ENSG00000265658.6_PAR_Y"
## [17] "ENSG00000223274.6_PAR_Y" "ENSG00000185291.11_PAR_Y"
## [19] "ENSG00000169100.13_PAR_Y" "ENSG00000236871.7_PAR_Y"
## [21] "ENSG00000236017.8_PAR_Y" "ENSG00000169093.15_PAR_Y"
## [23] "ENSG00000182162.10_PAR_Y" "ENSG00000197976.11_PAR_Y"
## [25] "ENSG00000196433.12_PAR_Y" "ENSG00000223511.7_PAR_Y"
## [27] "ENSG00000234622.6_PAR_Y" "ENSG00000169084.13_PAR_Y"
## [29] "ENSG00000223571.6_PAR_Y" "ENSG00000214717.11_PAR_Y"
## [31] "ENSG00000277120.5_PAR_Y" "ENSG00000223773.7_PAR_Y"
## [33] "ENSG00000230542.6_PAR_Y" "ENSG00000002586.19_PAR_Y"
## [35] "ENSG00000168939.11_PAR_Y" "ENSG00000237801.6_PAR_Y"
## [37] "ENSG00000237040.6_PAR_Y" "ENSG00000124333.15_PAR_Y"
## [39] "ENSG00000228410.6_PAR_Y" "ENSG00000223484.7_PAR_Y"
## [41] "ENSG00000124334.17_PAR_Y" "ENSG00000270726.6_PAR_Y"
## [43] "ENSG00000185203.12_PAR_Y" "ENSG00000182484.15_PAR_Y"
## [45] "ENSG00000227159.8_PAR_Y"
rownames(edata) <- substr(rownames(edata), 1, 15)
colnames(edata) <- rownames(sampleInfo)
# Retain genes with average read counts higher than 1
edata <- edata[rowMeans(edata) > 1,]
ere we randomly take only 5,000 genes to quickly illustrate how to use RegEnrich, but to see the real result from the analysis, you should neglect the following step.
set.seed(1234)
edata <- edata[sample(1:nrow(edata), 5000), ]
expressionMatrix = as.matrix(edata)
expressionMatrix %>% dim
## [1] 5000 6
Database of transcription co-factors and transcription factor interactions: https://tools.sschmeier.com/tcof/home/ http://www.yeastract.com/consensuslist.php https://www.mrc-lmb.cam.ac.uk/genomes/FlyTF/old_index.html
data(TFs)
sample(TFs)
unique(TFs$TF) -> human_regulators
RegenrichSet_Base = RegenrichSet(expr = expressionMatrix, # expression data (matrix)
colData = sampleInfo, # sample information (data frame)
reg = human_regulators, # regulators
method = "LRT_DESeq2", # differential expression analysis method
design = ~ treatment, # desing fomula
reduced = ~ 1, # reduced
networkConstruction = "COEN", # network inference method
enrichTest = "FET") # enrichment analysis method
RegenrichSet_Base %>% regenrich_diffExpr -> object_diffExpr
object_diffExpr %>% results_DEA
## DataFrame with 5000 rows and 3 columns
## gene p logFC
## <character> <numeric> <numeric>
## ENSG00000164675 ENSG00000164675 7.67494e-01 -0.199603
## ENSG00000140396 ENSG00000140396 3.83988e-02 -1.055368
## ENSG00000170092 ENSG00000170092 4.05100e-01 -1.132787
## ENSG00000104312 ENSG00000104312 2.42940e-01 0.587937
## ENSG00000260314 ENSG00000260314 4.19788e-42 -5.014803
## ... ... ... ...
## ENSG00000169418 ENSG00000169418 0.3409367 -1.088527
## ENSG00000260853 ENSG00000260853 0.0901921 -1.428743
## ENSG00000099995 ENSG00000099995 0.1680020 -0.382339
## ENSG00000250267 ENSG00000250267 0.0576274 -3.493389
## ENSG00000108813 ENSG00000108813 0.5648831 0.523186
object_diffExpr %>% regenrich_network -> object_network
object_network %>% results_topNet
## TopNetwork object (37120 edges, networkConstruction: 'COEN', percentage: 5%)
## # A tibble: 37,120 x 3
## set element weight
## <chr> <chr> <dbl>
## 1 ENSG00000155229 ENSG00000007923 0.810
## 2 ENSG00000155229 ENSG00000154839 0.807
## 3 ENSG00000155229 ENSG00000285752 0.804
## 4 ENSG00000155229 ENSG00000196466 0.808
## 5 ENSG00000155229 ENSG00000172936 0.814
## 6 ENSG00000155229 ENSG00000072274 0.808
## 7 ENSG00000155229 ENSG00000280186 0.815
## 8 ENSG00000155229 ENSG00000100151 0.803
## 9 ENSG00000155229 ENSG00000135049 0.804
## 10 ENSG00000155229 ENSG00000172456 0.810
## # … with 37,110 more rows
slot(RegenrichSet_Base, "paramsIn")$reg -> all_regulators
all_regulators %>% str
## chr [1:1712] "ENSG00000001167" "ENSG00000004848" "ENSG00000005007" ...
object_network@topNetwork@set$set -> regulators_found
regulators_found %>% str
## chr [1:201] "ENSG00000155229" "ENSG00000028277" "ENSG00000114416" ...
regulators_found %in% all_regulators
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE
library(BiocParallel)
# on non-Windows computers (use 2 workers)
bpparam = register(MulticoreParam(workers = 8, RNGseed = 1234))
# on Windows computers (use 2 workers)
# bpparam = register(SnowParam(workers = 2, RNGseed = 1234))
# the lower minR is set, the less edges and potential less regulators are retained.
object_GRN_multicore_network = regenrich_network(object_diffExpr, networkConstruction = "GRN",
BPPARAM = bpparam, minR = 0.3)
object_GRN_multicore_network %>% results_topNet
## TopNetwork object (42816 edges, networkConstruction: 'GRN', percentage: 5%)
## # A tibble: 42,816 x 3
## set element weight
## <chr> <chr> <dbl>
## 1 ENSG00000008083 ENSG00000140396 0.00729
## 2 ENSG00000012822 ENSG00000140396 0.00601
## 3 ENSG00000055332 ENSG00000140396 0.00521
## 4 ENSG00000067369 ENSG00000140396 0.00787
## 5 ENSG00000100105 ENSG00000140396 0.00512
## 6 ENSG00000108813 ENSG00000140396 0.00544
## 7 ENSG00000114126 ENSG00000140396 0.00572
## 8 ENSG00000114626 ENSG00000140396 0.00603
## 9 ENSG00000119401 ENSG00000140396 0.00693
## 10 ENSG00000127528 ENSG00000140396 0.00731
## # … with 42,806 more rows
object_network %>% regenrich_enrich -> object_enrich
object_enrich %>%results_enrich
## Enrich object (FET method, 91 regulators are used for enrichment,
## 10 regulators pass the threshold)
## # A tibble: 10 x 9
## ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <int>
## 1 ENSG0… ENSG000000… 228/308 789/22… 1.06e-49 9.64e-48 8.81e-48 ENSG00… 228
## 2 ENSG0… ENSG000001… 136/308 422/22… 2.34e-28 1.07e-26 9.74e-27 ENSG00… 136
## 3 ENSG0… ENSG000001… 217/308 1033/2… 1.22e-19 3.69e-18 3.37e-18 ENSG00… 217
## 4 ENSG0… ENSG000001… 64/308 181/22… 1.73e-14 3.94e-13 3.60e-13 ENSG00… 64
## 5 ENSG0… ENSG000001… 230/308 1218/2… 2.24e-14 4.08e-13 3.73e-13 ENSG00… 230
## 6 ENSG0… ENSG000001… 98/308 383/22… 1.06e-11 1.60e-10 1.47e-10 ENSG00… 98
## 7 ENSG0… ENSG000001… 185/308 990/22… 5.12e- 9 6.65e- 8 6.08e- 8 ENSG00… 185
## 8 ENSG0… ENSG000001… 182/308 1010/2… 2.73e- 7 3.11e- 6 2.84e- 6 ENSG00… 182
## 9 ENSG0… ENSG000002… 57/308 234/22… 4.17e- 6 4.21e- 5 3.85e- 5 ENSG00… 57
## 10 ENSG0… ENSG000000… 81/308 379/22… 8.23e- 6 7.49e- 5 6.85e- 5 ENSG00… 81
slot(results_enrich(object_enrich), "allResult") -> enrich_FET
enrich_FET$ID %in% all_regulators
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE
enrich_FET
set.seed(123)
object_enrich_GSEA <- regenrich_enrich(object_network, enrichTest = "GSEA", nperm = 5000)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (1.48% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaFun(pathways = network, stats = namedScores1, minSize =
## minSize, : There were 28 pathways for which P-values were not calculated
## properly due to unbalanced gene-level statistic values
enrich_GSEA <- slot(results_enrich(object_enrich_GSEA), "allResult")
enrich_GSEA
plotOrders(enrich_FET[[1]], enrich_GSEA[[1]])
# Regulator scoring and ranking The RegEnrich score is a summarized information from both differential expression analysis and regulator enrichment analysis for regulators. This step of RegEnrich analysis is done by regenrich_rankScore function.
Above all, the differential expression analysis is performed by Limma method, regulator-target network is infered by COEN method, and enrichment analysis is performed by FET method, so the scores and ranking summarize the importance of regulators by considering regulatory interactions in the studied biological process.
object_enrich %>% regenrich_rankScore -> object_rankScore
res_score = results_score(object_rankScore)
#results_score(object_rankScore)
object_rankScore_GSEA <- regenrich_rankScore(object_enrich_GSEA)
#results_score(object_rankScore_GSEA)
plotRegTarExpr(object_rankScore, reg = "ENSG00000143437")
plotRegTarExpr(object_rankScore_GSEA, reg = "ENSG00000239306")
enrich_FET
library(KEGGREST)
library(tidyverse)
KEGGREST::listDatabases() -> all.kegg.databases
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
org.Hs.eg.db %>% keytypes
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "MAP" "OMIM"
## [16] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG" "UNIGENE"
## [26] "UNIPROT"
enrich_FET.ENTREZID <- clusterProfiler::bitr(enrich_FET$ID, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
enrich_FET.SYMBOL <- clusterProfiler::bitr(enrich_FET$ID, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
enrich_FET.ENTREZID.SYMBOL <- inner_join(enrich_FET.ENTREZID, enrich_FET.SYMBOL, by = 'ENSEMBL')
Regulators_ego <- clusterProfiler::enrichGO(gene = enrich_FET.ENTREZID.SYMBOL$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
Regulators_ego@result
inner_join(enrich_FET.ENTREZID.SYMBOL, enrich_FET , by = c('ENSEMBL'='ID')) -> enrich_FET_full
enrich_FET_full %>% .[1,] %>% .[['SYMBOL']]
## [1] "POU2F2"
enrich_FET_full %>% .[1,] %>% .[['geneID']]%>% str_split('/') %>% unlist() -> ensembl_ARNT
regulator_ARNT.ENTREZID <- clusterProfiler::bitr(ensembl_ARNT, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_ARNT, "ENSEMBL", "ENTREZID",
## "org.Hs.eg.db"): 3.07% of input gene IDs are fail to map...
regulator_ARNT.SYMBOL <- clusterProfiler::bitr(ensembl_ARNT, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_ARNT, "ENSEMBL", "SYMBOL",
## "org.Hs.eg.db"): 3.07% of input gene IDs are fail to map...
regulator_ARNT.ENTREZID.SYMBOL <- inner_join(regulator_ARNT.ENTREZID, regulator_ARNT.SYMBOL, by = 'ENSEMBL')
regulator_ARNT_ego <- clusterProfiler::enrichGO(gene = regulator_ARNT.ENTREZID.SYMBOL$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
regulator_ARNT_ego@result
expressionMatrix %>% rownames -> all_rna_ensmble
all_rna_ensmble.ENTREZID <- clusterProfiler::bitr(all_rna_ensmble, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(all_rna_ensmble, "ENSEMBL", "ENTREZID", :
## 19.18% of input gene IDs are fail to map...
all_rna_ensmble.SYMBOL <- clusterProfiler::bitr(all_rna_ensmble, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(all_rna_ensmble, "ENSEMBL", "SYMBOL",
## "org.Hs.eg.db"): 19.18% of input gene IDs are fail to map...
all_rna_ensmble.ENTREZID.SYMBOL <- inner_join(all_rna_ensmble.ENTREZID, all_rna_ensmble.SYMBOL, by = 'ENSEMBL')
regulator_ARNT_vs_universe <- clusterProfiler::enrichGO(gene = regulator_ARNT.ENTREZID.SYMBOL$ENTREZID,
universe = all_rna_ensmble.ENTREZID.SYMBOL$ENTREZID,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE)
regulator_ARNT_vs_universe@result
regulator_ARNT_mkk <- clusterProfiler::enrichMKEGG(gene = regulator_ARNT.ENTREZID.SYMBOL$ENTREZID,
organism = "hsa",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
regulator_ARNT_mkk@result
#BiocManager::install("ReactomePA", checkBuilt = T)
reactome <- ReactomePA::enrichPathway( regulator_ARNT.ENTREZID.SYMBOL$ENTREZID,
minGSSize = 10, organism = "human", pvalueCutoff = 0.01)
reactome@result %>% dplyr::select(matches('descri'))
all_reactome <- ReactomePA::enrichPathway(enrich_FET.ENTREZID.SYMBOL$ENTREZID,
minGSSize = 5, organism = "human", pvalueCutoff = 0.05)
all_reactome@result%>% dplyr::select(matches('descri'))
x <- DOSE::enrichDO(gene = enrich_FET.ENTREZID.SYMBOL$ENTREZID,
ont = "DO",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 5,
maxGSSize = 500,
qvalueCutoff = 0.05,
readable = FALSE)
x@result
dgn <-DOSE::enrichDGN(
gene = enrich_FET.ENTREZID.SYMBOL$ENTREZID,
pvalueCutoff = 0.01,
pAdjustMethod = "BH",
qvalueCutoff = 0.01)
dgn %>% as.data.frame
Comparing multiple gene lists
#regulator_ARNT.ENTREZID.SYMBOL
enrich_FET_full %>% .[2,] %>% .[['SYMBOL']]
## [1] "EHMT1"
enrich_FET_full %>% .[2,] %>% .[['geneID']]%>% str_split('/') %>% unlist() -> ensembl_RBM14
ensembl_RBM14.ENTREZID <- clusterProfiler::bitr(ensembl_RBM14, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_RBM14, "ENSEMBL", "ENTREZID",
## "org.Hs.eg.db"): 2.94% of input gene IDs are fail to map...
ensembl_RBM14.SYMBOL <- clusterProfiler::bitr(ensembl_RBM14, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_RBM14, "ENSEMBL", "SYMBOL",
## "org.Hs.eg.db"): 2.94% of input gene IDs are fail to map...
regulator_RBM14.ENTREZID.SYMBOL <- inner_join(ensembl_RBM14.ENTREZID, ensembl_RBM14.SYMBOL, by = 'ENSEMBL')
library(tidyverse)
enrich_FET_full %>% .[3,] %>% .[['SYMBOL']]
## [1] "MMS19"
enrich_FET_full %>% .[3,] %>% .[['geneID']]%>% str_split('/') %>% unlist() -> ensembl_EHMT1
ensembl_EHMT1.ENTREZID <- clusterProfiler::bitr(ensembl_EHMT1, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_EHMT1, "ENSEMBL", "ENTREZID",
## "org.Hs.eg.db"): 1.84% of input gene IDs are fail to map...
ensembl_EHMT1.SYMBOL <- clusterProfiler::bitr(ensembl_EHMT1, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_EHMT1, "ENSEMBL", "SYMBOL",
## "org.Hs.eg.db"): 1.84% of input gene IDs are fail to map...
regulator_EHMT1.ENTREZID.SYMBOL <- inner_join(ensembl_EHMT1.ENTREZID, ensembl_EHMT1.SYMBOL, by = 'ENSEMBL')
library(tidyverse)
enrich_FET_full %>% .[4,] %>% .[['SYMBOL']]
## [1] "BIN1"
enrich_FET_full %>% .[4,] %>% .[['geneID']]%>% str_split('/') %>% unlist() -> ensembl_BIN1
ensembl_BIN1.ENTREZID <- clusterProfiler::bitr(ensembl_BIN1, "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_BIN1, "ENSEMBL", "ENTREZID",
## "org.Hs.eg.db"): 3.12% of input gene IDs are fail to map...
ensembl_BIN1.SYMBOL <- clusterProfiler::bitr(ensembl_BIN1, "ENSEMBL", "SYMBOL", "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(ensembl_BIN1, "ENSEMBL", "SYMBOL",
## "org.Hs.eg.db"): 3.12% of input gene IDs are fail to map...
regulator_BIN1.ENTREZID.SYMBOL <- inner_join(ensembl_BIN1.ENTREZID, ensembl_BIN1.SYMBOL, by = 'ENSEMBL')
my_regulated_genes.ENTREZID <- list(RBM14 = regulator_RBM14.ENTREZID.SYMBOL[['ENTREZID']],
ARNT = regulator_ARNT.ENTREZID.SYMBOL[['ENTREZID']],
EHMT1 = regulator_EHMT1.ENTREZID.SYMBOL[['ENTREZID']],
BIN1 = regulator_BIN1.ENTREZID.SYMBOL[['ENTREZID']])
library(clusterProfiler)
## clusterProfiler v3.18.0 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
library(ReactomePA)
## ReactomePA v1.34.0 For help: https://guangchuangyu.github.io/ReactomePA
##
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
ck <- compareCluster(geneCluster = my_regulated_genes.ENTREZID, fun = 'enrichPathway', pvalueCutoff=5)
dotplot(ck)
library(org.Hs.eg.db)
ck2 <- compareCluster(geneCluster = my_regulated_genes.ENTREZID, fun = enrichGO, OrgDb = org.Hs.eg.db, pvalueCutoff=5)
ck2 <- setReadable(ck2, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
dotplot(ck2)
ck3 <- compareCluster(my_regulated_genes.ENTREZID, fun="enrichMKEGG",
organism="hsa", pvalueCutoff=5)
dotplot(ck3)
#scRNAseq
#BiocManager::install("scRNAseq", checkBuilt = T)
#?ReprocessedFluidigmData
library(tidyverse)
library(scRNAseq)
## Loading required package: SingleCellExperiment
out <- scRNAseq::listDatasets()
out %>% as.data.frame
out %>% as.data.frame %>% dplyr::filter(str_detect(Part,'brain') & Taxonomy==10090)
out %>% as.data.frame %>% dplyr::filter(str_detect(Part,'stem') & Taxonomy==10090)
marques2016oligodendrocyte <- MarquesBrainData(ensembl=TRUE)
oligodendrocyte <- marques2016oligodendrocyte
keep_transcript <- rowSums(counts(oligodendrocyte) > 10) > 10
oligodendrocyte <- oligodendrocyte[keep_transcript, ]
oligodendrocyte %>% rowData() -> oligodendrocyte.transc
buettner2015computational <- BuettnerESCData()
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
ESC <- buettner2015computational
keep_transcript <- rowSums(counts(ESC) > 1) > 1
ESC <- ESC[keep_transcript, ]
ESC %>% rowData() -> ESC.transc
library(org.Mm.eg.db)
##
org.Mm.eg.db %>% keytypes
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "MGI" "ONTOLOGY"
## [16] "ONTOLOGYALL" "PATH" "PFAM" "PMID" "PROSITE"
## [21] "REFSEQ" "SYMBOL" "UNIGENE" "UNIPROT"
ESC.ENTREZID <- clusterProfiler::bitr(rownames(ESC.transc), "ENSEMBL", "ENTREZID", "org.Mm.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(rownames(ESC.transc), "ENSEMBL", "ENTREZID", :
## 19.18% of input gene IDs are fail to map...
oligodendrocyte.ENTREZID <- clusterProfiler::bitr(rownames(oligodendrocyte.transc), "ENSEMBL", "ENTREZID", "org.Mm.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(rownames(oligodendrocyte.transc), "ENSEMBL", :
## 0.69% of input gene IDs are fail to map...
ESC_ego <- clusterProfiler::enrichGO(gene = ESC.ENTREZID$ENTREZID,
OrgDb = org.Mm.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
head(ESC_ego,10)
OLIGO_ego <- clusterProfiler::enrichGO(gene = oligodendrocyte.ENTREZID$ENTREZID,
OrgDb = org.Mm.eg.db,
keyType = 'ENTREZID',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
head(OLIGO_ego,10)
OLIGO_vs_ESC <- clusterProfiler::enrichGO(gene = oligodendrocyte.ENTREZID$ENTREZID,
universe = ESC.ENTREZID$ENTREZID,
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE)
head(OLIGO_vs_ESC,10)
oligo_mkk <- clusterProfiler::enrichMKEGG(gene = ESC.ENTREZID$ENTREZID,
organism = "mmu",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
oligo_mkk@result
mkk <- clusterProfiler::enrichMKEGG(gene = oligodendrocyte.ENTREZID$ENTREZID,
universe = ESC.ENTREZID$ENTREZID,
organism = "mmu",
pvalueCutoff = 0.01,
qvalueCutoff = 0.01)
mkk@result
#BiocManager::install("ReactomePA", checkBuilt = T)
ecs_reactome <- ReactomePA::enrichPathway(ESC.ENTREZID$ENTREZID,
minGSSize = 15, organism = "mouse", pvalueCutoff = 0.01)
ecs_reactome@result %>% dplyr::select(matches('descri'))
oligo_ecs_reactome <- ReactomePA::enrichPathway(oligodendrocyte.ENTREZID$ENTREZID,
universe = ESC.ENTREZID$ENTREZID,
minGSSize = 5, organism = "mouse", pvalueCutoff = 0.01)
oligo_ecs_reactome@result%>% dplyr::select(matches('descri'))
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'lung') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'plur') & Taxonomy==9606)
zilionis2019singlecell <- ZilionisLungData(ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 1467 of 41861 requested IDs.
lamanno2016molecular <- LaMannoBrainData('human-ips', ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 1904 of 14725 requested IDs.
lung <- zilionis2019singlecell
human_ips <- lamanno2016molecular
keep_transcript <- rowSums(counts(lung) > 1) > 1
lung <- lung[keep_transcript, ]
lung %>% rowData() -> lung.transc
keep_transcript <- rowSums(counts(human_ips) > 1) > 1
human_ips <- human_ips[keep_transcript, ]
human_ips %>% rowData() -> human_ips.transc
lung.ENTREZID <- clusterProfiler::bitr(rownames(lung.transc), "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(rownames(lung.transc), "ENSEMBL", "ENTREZID", :
## 17.54% of input gene IDs are fail to map...
human_ips.ENTREZID <- clusterProfiler::bitr(rownames(human_ips.transc), "ENSEMBL", "ENTREZID", "org.Hs.eg.db")
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(rownames(human_ips.transc), "ENSEMBL", : 0.8%
## of input gene IDs are fail to map...
x <- DOSE::enrichDO(gene = lung.ENTREZID$ENTREZID,
ont = "DO",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 5,
maxGSSize = 500,
qvalueCutoff = 0.05,
readable = FALSE)
x@result
x <- DOSE::enrichDO(gene = human_ips.ENTREZID$ENTREZID,
ont = "DO",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 5,
maxGSSize = 500,
qvalueCutoff = 0.05,
readable = FALSE)
x@result
dgn <-DOSE::enrichDGN(
gene = lung.ENTREZID$ENTREZID,
pvalueCutoff = 0.01,
pAdjustMethod = "BH",
minGSSize = 400,
maxGSSize = 700,
qvalueCutoff = 0.01)
dgn %>% as.data.frame
#devtools::install_dev("vroom")
cell_marker_data <- vroom::vroom('http://bio-bigdata.hrbmu.edu.cn/CellMarker/download/Human_cell_markers.txt')
## Rows: 2,868
## Columns: 15
## Delimiter: "\t"
## chr [15]: speciesType, tissueType, UberonOntologyID, cancerType, cellType, cellName, CellO...
##
## Use `spec()` to retrieve the guessed column specification
## Pass a specification to the `col_types` argument to quiet this message
## instead of `cellName`, users can use other features (e.g. `cancerType`)
cells <- cell_marker_data %>%
dplyr::select(cellName, geneID) %>%
dplyr::mutate(geneID = strsplit(geneID, ', ')) %>%
tidyr::unnest()
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(geneID)`
x <- clusterProfiler::enricher(lung.ENTREZID$ENTREZID, TERM2GENE = cells)
x@result
#MSigDb analysis Molecular Signatures Database is a collection of annotated gene sets. It contains 8 major collections:
H: hallmark gene sets
C1: positional gene sets
C2: curated gene sets
C3: motif gene sets
C4: computational gene sets
C5: GO gene sets
C6: oncogenic signatures
C7: immunologic signatures
library(msigdbr)
msigdbr_show_species()
## Warning: 'msigdbr_show_species' is deprecated.
## Use 'msigdbr_species' instead.
## See help("Deprecated")
## [1] "Bos taurus" "Caenorhabditis elegans"
## [3] "Canis lupus familiaris" "Danio rerio"
## [5] "Drosophila melanogaster" "Gallus gallus"
## [7] "Homo sapiens" "Mus musculus"
## [9] "Rattus norvegicus" "Saccharomyces cerevisiae"
## [11] "Sus scrofa"
m_df <- msigdbr(species = "Homo sapiens")
m_df %>% as.data.frame %>% dplyr::select(gs_description) %>% unique %>% tail
m_t2g <- msigdbr(species = "Homo sapiens", category = "C7") %>%
dplyr::select(gs_name, entrez_gene)
m_t2g
em <- clusterProfiler::enricher(lung.ENTREZID$ENTREZID, TERM2GENE=m_t2g)
head(em)
Comparing multiple gene lists
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'embryonic stem cells') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'induced pluripotent stem cells') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'lung') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'pancreas') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'cortex') & Taxonomy==9606)
out %>% as.data.frame() %>% dplyr::filter(str_detect(Part,'.*') & Taxonomy==9606)
Lung <- ZilionisLungData(ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 1467 of 41861 requested IDs.
Human_ips <- LaMannoBrainData('human-ips', ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 1904 of 14725 requested IDs.
Human_es <- LaMannoBrainData('human-es', ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 2778 of 18538 requested IDs.
Pancreas <- MuraroPancreasData(ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 2110 of 19059 requested IDs.
#Cortex <- ReprocessedFluidigmData(ensembl=T)
PBMC <- MairPBMCData(mode = 'rna', ensembl = T)
## snapshotDate(): 2020-10-02
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
## snapshotDate(): 2020-10-27
## loading from cache
## Warning: Unable to map 16 of 499 requested IDs.
## see ?scRNAseq and browseVignettes('scRNAseq') for documentation
## loading from cache
keep_transcripts <- function(dataset, min_counts=10){keep_filter <- rowSums(counts(dataset) > min_counts) > min_counts
dataset[keep_filter, ] %>% rowData() -> filtered.transc
return(filtered.transc)}
my.bitr <- function(some_ens){some_ens %>% rownames %>% clusterProfiler::bitr( "ENSEMBL", "ENTREZID", "org.Hs.eg.db") %>% .[['ENTREZID']]}
purrr::compose(my.bitr,keep_transcripts) -> ENSEMBL_to_ENTREZID
list(Pancreas,PBMC,Human_es,Human_ips,Lung) %>% purrr::set_names(c('Pancreas','PBMC','Human_es','Human_ips','Lung')) -> my.cells
purrr::map(my.cells,ENSEMBL_to_ENTREZID) -> my_cells.ENTREZID
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(., "ENSEMBL", "ENTREZID", "org.Hs.eg.db"):
## 0.35% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(., "ENSEMBL", "ENTREZID", "org.Hs.eg.db"):
## 0.83% of input gene IDs are fail to map...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(., "ENSEMBL", "ENTREZID", "org.Hs.eg.db"):
## 0.53% of input gene IDs are fail to map...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in clusterProfiler::bitr(., "ENSEMBL", "ENTREZID", "org.Hs.eg.db"):
## 1.25% of input gene IDs are fail to map...
## 'select()' returned 1:many mapping between keys and columns
## Warning in clusterProfiler::bitr(., "ENSEMBL", "ENTREZID", "org.Hs.eg.db"):
## 0.67% of input gene IDs are fail to map...
library(clusterProfiler)
library(ReactomePA)
ck <- clusterProfiler::compareCluster(geneCluster = my_cells.ENTREZID, fun = enrichPathway, pvalueCutoff=0.05)
dotplot(ck)
library(clusterProfiler)
library(ReactomePA)
ck2 <- compareCluster(geneCluster = my_cells.ENTREZID, fun = enrichGO, OrgDb = org.Hs.eg.db)
ck2 <- setReadable(ck2, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
dotplot(ck2)
ck3 <- compareCluster(my_cells.ENTREZID, fun="enrichKEGG",
organism="hsa", pvalueCutoff=0.01)
dotplot(ck3)