rm(list = ls())
gene_symbol <- c("CD86", "CD8B","CDC14A","CDC45", "CEND1","CENPE","CENPIP1","CENPO","CERS1")
length(gene_symbol) #[1] 9
## [1] 9
#########################
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
##
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("org.Hs.eg.db")
keytypes(org.Hs.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "MAP" "OMIM"
## [16] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG" "UNIGENE"
## [26] "UNIPROT"
columns(org.Hs.eg.db)
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS"
## [6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
## [11] "GO" "GOALL" "IPI" "MAP" "OMIM"
## [16] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG" "UNIGENE"
## [26] "UNIPROT"
gene_1 <- select(org.Hs.eg.db, keys= gene_symbol, columns= c("ENTREZID","GENENAME","ENSEMBL"), keytype= "SYMBOL")
## 'select()' returned 1:1 mapping between keys and columns
dim(gene_1) #[1] 17 4
## [1] 9 4
gene_1
## SYMBOL ENTREZID GENENAME
## 1 CD86 942 CD86 molecule
## 2 CD8B 926 CD8b molecule
## 3 CDC14A 8556 cell division cycle 14A
## 4 CDC45 8318 cell division cycle 45
## 5 CEND1 51286 cell cycle exit and neuronal differentiation 1
## 6 CENPE 1062 centromere protein E
## 7 CENPIP1 100419337 centromere protein I pseudogene 1
## 8 CENPO 79172 centromere protein O
## 9 CERS1 10715 ceramide synthase 1
## ENSEMBL
## 1 ENSG00000114013
## 2 ENSG00000172116
## 3 ENSG00000079335
## 4 ENSG00000093009
## 5 ENSG00000184524
## 6 ENSG00000138778
## 7 <NA>
## 8 ENSG00000138092
## 9 ENSG00000223802
########################
library("grex")
data("gtexv7")
id <- gtexv7
df <- grex(id)
dim(df) #[1] 56202 7
## [1] 56202 7
length(unique(df$ensembl_id)) #[1] 56202
## [1] 56202
head(df)
## ensembl_id entrez_id hgnc_symbol
## 1 ENSG00000223972 100287102 DDX11L1
## 2 ENSG00000227232 <NA> <NA>
## 3 ENSG00000243485 <NA> <NA>
## 4 ENSG00000237613 645520 FAM138A
## 5 ENSG00000268020 <NA> <NA>
## 6 ENSG00000240361 <NA> <NA>
## hgnc_name cyto_loc uniprot_id
## 1 DEAD/H-box helicase 11 like 1 1p36.33 <NA>
## 2 <NA> <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 family with sequence similarity 138 member A 1p36.33 <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## gene_biotype
## 1 transcribed_unprocessed_pseudogene
## 2 <NA>
## 3 <NA>
## 4 lincRNA
## 5 <NA>
## 6 <NA>