Library
library(EnsDb.Hsapiens.v75)
## Loading required package: ensembldb
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomeInfoDb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
edb <- EnsDb.Hsapiens.v75
edb
## EnsDb for Ensembl:
## |Backend: SQLite
## |Db type: EnsDb
## |Type of Gene ID: Ensembl Gene ID
## |Supporting package: ensembldb
## |Db created by: ensembldb package from Bioconductor
## |script_version: 0.3.0
## |Creation time: Thu May 18 09:15:45 2017
## |ensembl_version: 75
## |ensembl_host: localhost
## |Organism: homo_sapiens
## |taxonomy_id: 9606
## |genome_build: GRCh37
## |DBSCHEMAVERSION: 2.0
## | No. of genes: 64102.
## | No. of transcripts: 215647.
## |Protein data available.
columns(edb) # 33 columns
## [1] "ENTREZID" "EXONID" "EXONIDX"
## [4] "EXONSEQEND" "EXONSEQSTART" "GENEBIOTYPE"
## [7] "GENEID" "GENENAME" "GENESEQEND"
## [10] "GENESEQSTART" "INTERPROACCESSION" "ISCIRCULAR"
## [13] "PROTDOMEND" "PROTDOMSTART" "PROTEINDOMAINID"
## [16] "PROTEINDOMAINSOURCE" "PROTEINID" "PROTEINSEQUENCE"
## [19] "SEQCOORDSYSTEM" "SEQLENGTH" "SEQNAME"
## [22] "SEQSTRAND" "SYMBOL" "TXBIOTYPE"
## [25] "TXCDSSEQEND" "TXCDSSEQSTART" "TXID"
## [28] "TXNAME" "TXSEQEND" "TXSEQSTART"
## [31] "UNIPROTDB" "UNIPROTID" "UNIPROTMAPPINGTYPE"
columns(edb)[c(1, 7, 8, 17, 20, 21, 23, 27, 28, 30)]
## [1] "ENTREZID" "GENEID" "GENENAME" "PROTEINID" "SEQLENGTH"
## [6] "SEQNAME" "SYMBOL" "TXID" "TXNAME" "TXSEQSTART"
keytypes(edb) # 16 key types
## [1] "ENTREZID" "EXONID" "GENEBIOTYPE"
## [4] "GENEID" "GENENAME" "PROTDOMID"
## [7] "PROTEINDOMAINID" "PROTEINDOMAINSOURCE" "PROTEINID"
## [10] "SEQNAME" "SEQSTRAND" "SYMBOL"
## [13] "TXBIOTYPE" "TXID" "TXNAME"
## [16] "UNIPROTID"
head(keys(edb)) # 73102 genes
## [1] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" "ENSG00000000457"
## [5] "ENSG00000000460" "ENSG00000000938"
genekeys <- keys(edb, keytype="GENEID")
genekeys[1:10]
## [1] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419"
## [4] "ENSG00000000457" "ENSG00000000460" "ENSG00000000938"
## [7] "ENSG00000000971" "ENSG00000001036" "ENSG00000001084"
## [10] "ENSG00000001167"
entrezkeys <- keys(edb, keytype="ENTREZID")
entrezkeys[1:10]
## [1] 1 2 3 9 10 12 13 14 15 16
symbolkeys <- keys(edb, keytype="SYMBOL")
symbolkeys[1:10]
## [1] "5S_rRNA" "7SK" "A1BG" "A1BG-AS1" "A1CF"
## [6] "A2M" "A2M-AS1" "A2ML1" "A2ML1-AS1" "A2ML1-AS2"
genenamekeys <- keys(edb, keytype="GENENAME")
genenamekeys[1:10]
## [1] "5S_rRNA" "7SK" "A1BG" "A1BG-AS1" "A1CF"
## [6] "A2M" "A2M-AS1" "A2ML1" "A2ML1-AS1" "A2ML1-AS2"
seqnamekeys <- keys(edb, keytype="SEQNAME")
seqnamekeys[1:10]
## [1] "1" "10" "11" "12" "13" "14" "15" "16" "17" "18"
txidkeys <- keys(edb, keytype="TXID")
txidkeys[1:10]
## [1] "ENST00000000233" "ENST00000000412" "ENST00000000442"
## [4] "ENST00000001008" "ENST00000001146" "ENST00000002125"
## [7] "ENST00000002165" "ENST00000002501" "ENST00000002596"
## [10] "ENST00000002829"
TXnamekeys <- keys(edb, keytype="TXNAME")
TXnamekeys[1:10]
## [1] "ENST00000000233" "ENST00000000412" "ENST00000000442"
## [4] "ENST00000001008" "ENST00000001146" "ENST00000002125"
## [7] "ENST00000002165" "ENST00000002501" "ENST00000002596"
## [10] "ENST00000002829"
proteinidkeys <- keys(edb, keytype="PROTEINID")
proteinidkeys[1:10]
## [1] "ENSP00000000233" "ENSP00000000412" "ENSP00000000442"
## [4] "ENSP00000001008" "ENSP00000001146" "ENSP00000002125"
## [7] "ENSP00000002165" "ENSP00000002501" "ENSP00000002596"
## [10] "ENSP00000002829"
uniprotkeys <- keys(edb, keytype="UNIPROTID")
uniprotkeys[1:10]
## [1] "1433B_HUMAN" "1433E_HUMAN" "1433F_HUMAN" "1433G_HUMAN" "1433S_HUMAN"
## [6] "1433T_HUMAN" "1433Z_HUMAN" "1A01_HUMAN" "1A02_HUMAN" "1A03_HUMAN"
Nicotinic acetylcholine receptor alpha, beta, gamma, delta, epsilon subunits (CHRN)
CHRNkeys <- keys(edb,
filter= GeneNameFilter(c("CHRNA1", "CHRNA2", "CHRNA3", "CHRNA4",
"CHRNA5", "CHRNA6", "CHRNA7", "CHRNA9",
"CHRNA10", "CHRNB1", "CHRNB2", "CHRNB3",
"CHRNB4", "CHRNG", "CHRND", "CHRNE")))
CHRNkeys
## [1] "ENSG00000138435" "ENSG00000129749" "ENSG00000120903"
## [4] "ENSG00000080644" "ENSG00000101204" "ENSG00000169684"
## [7] "ENSG00000147434" "ENSG00000175344" "ENSG00000174343"
## [10] "ENSG00000170175" "ENSG00000160716" "ENSG00000147432"
## [13] "ENSG00000117971" "ENSG00000135902" "ENSG00000108556"
## [16] "ENSG00000196811"
chrntxsENS <- select(edb,
keys=CHRNkeys,
columns=c("GENENAME", "ENTREZID", "TXID", "PROTEINID", "TXSEQSTART", "TXBIOTYPE"),
keytype="GENEID")
nrow(chrntxsENS) # 88
## [1] 88
chrntxsENS
## GENEID GENENAME ENTREZID TXID PROTEINID
## 1 ENSG00000138435 CHRNA1 1134 ENST00000348749 ENSP00000261008
## 2 ENSG00000138435 CHRNA1 1134 ENST00000261007 ENSP00000261007
## 3 ENSG00000138435 CHRNA1 1134 ENST00000409542 ENSP00000387026
## 4 ENSG00000138435 CHRNA1 1134 ENST00000409219 ENSP00000386611
## 5 ENSG00000138435 CHRNA1 1134 ENST00000435083 ENSP00000395805
## 6 ENSG00000138435 CHRNA1 1134 ENST00000409323 ENSP00000386684
## 7 ENSG00000129749 CHRNA10 57053 ENST00000250699 ENSP00000250699
## 8 ENSG00000129749 CHRNA10 57053 ENST00000534359 ENSP00000437107
## 9 ENSG00000129749 CHRNA10 57053 ENST00000526599 ENSP00000432757
## 10 ENSG00000129749 CHRNA10 57053 ENST00000493827 <NA>
## 11 ENSG00000120903 CHRNA2 1135 ENST00000520933 ENSP00000429616
## 12 ENSG00000120903 CHRNA2 1135 ENST00000520600 <NA>
## 13 ENSG00000120903 CHRNA2 1135 ENST00000523529 <NA>
## 14 ENSG00000120903 CHRNA2 1135 ENST00000523695 ENSP00000430612
## 15 ENSG00000120903 CHRNA2 1135 ENST00000240132 ENSP00000240132
## 16 ENSG00000120903 CHRNA2 1135 ENST00000522008 <NA>
## 17 ENSG00000120903 CHRNA2 1135 ENST00000524096 ENSP00000430422
## 18 ENSG00000120903 CHRNA2 1135 ENST00000518712 ENSP00000430856
## 19 ENSG00000120903 CHRNA2 1135 ENST00000520650 <NA>
## 20 ENSG00000120903 CHRNA2 1135 ENST00000521921 ENSP00000429953
## 21 ENSG00000120903 CHRNA2 1135 ENST00000520208 ENSP00000430994
## 22 ENSG00000120903 CHRNA2 1135 ENST00000407991 ENSP00000385026
## 23 ENSG00000080644 CHRNA3 1136 ENST00000559002 <NA>
## 24 ENSG00000080644 CHRNA3 1136 ENST00000559658 ENSP00000452896
## 25 ENSG00000080644 CHRNA3 1136 ENST00000348639 ENSP00000267951
## 26 ENSG00000080644 CHRNA3 1136 ENST00000326828 ENSP00000315602
## 27 ENSG00000080644 CHRNA3 1136 ENST00000558903 <NA>
## 28 ENSG00000080644 CHRNA3 1136 ENST00000561128 <NA>
## 29 ENSG00000080644 CHRNA3 1136 ENST00000559080 ENSP00000453993
## 30 ENSG00000080644 CHRNA3 1136 ENST00000559941 <NA>
## 31 ENSG00000101204 CHRNA4 1137 ENST00000370263 ENSP00000359285
## 32 ENSG00000101204 CHRNA4 1137 ENST00000463705 <NA>
## 33 ENSG00000101204 CHRNA4 1137 ENST00000498043 ENSP00000429513
## 34 ENSG00000101204 CHRNA4 1137 ENST00000467563 <NA>
## 35 ENSG00000101204 CHRNA4 1137 ENST00000475033 <NA>
## 36 ENSG00000101204 CHRNA4 1137 ENST00000480012 <NA>
## 37 ENSG00000169684 CHRNA5 1138 ENST00000299565 ENSP00000299565
## 38 ENSG00000169684 CHRNA5 1138 ENST00000559554 ENSP00000453519
## 39 ENSG00000169684 CHRNA5 1138 ENST00000394802 ENSP00000378281
## 40 ENSG00000169684 CHRNA5 1138 ENST00000559576 ENSP00000452641
## 41 ENSG00000147434 CHRNA6 8973 ENST00000276410 ENSP00000276410
## 42 ENSG00000147434 CHRNA6 8973 ENST00000534622 ENSP00000433871
## 43 ENSG00000147434 CHRNA6 8973 ENST00000533810 ENSP00000434659
## 44 ENSG00000147434 CHRNA6 8973 ENST00000530869 <NA>
## 45 ENSG00000147434 CHRNA6 8973 ENST00000529467 <NA>
## 46 ENSG00000175344 CHRNA7 89832 ENST00000454250 ENSP00000407546
## 47 ENSG00000175344 CHRNA7 1139 ENST00000454250 ENSP00000407546
## 48 ENSG00000175344 CHRNA7 89832 ENST00000306901 ENSP00000303727
## 49 ENSG00000175344 CHRNA7 1139 ENST00000306901 ENSP00000303727
## 50 ENSG00000175344 CHRNA7 89832 ENST00000437966 ENSP00000399087
## 51 ENSG00000175344 CHRNA7 1139 ENST00000437966 ENSP00000399087
## 52 ENSG00000175344 CHRNA7 89832 ENST00000455693 ENSP00000405989
## 53 ENSG00000175344 CHRNA7 1139 ENST00000455693 ENSP00000405989
## 54 ENSG00000174343 CHRNA9 55584 ENST00000310169 ENSP00000312663
## 55 ENSG00000174343 CHRNA9 55584 ENST00000502377 <NA>
## 56 ENSG00000174343 CHRNA9 55584 ENST00000509518 <NA>
## 57 ENSG00000170175 CHRNB1 1140 ENST00000306071 ENSP00000304290
## 58 ENSG00000170175 CHRNB1 1140 ENST00000572857 ENSP00000461402
## 59 ENSG00000170175 CHRNB1 1140 ENST00000574054 <NA>
## 60 ENSG00000170175 CHRNB1 1140 ENST00000570557 ENSP00000460648
## 61 ENSG00000170175 CHRNB1 1140 ENST00000536404 ENSP00000439209
## 62 ENSG00000170175 CHRNB1 1140 ENST00000576360 ENSP00000459092
## 63 ENSG00000170175 CHRNB1 1140 ENST00000573209 <NA>
## 64 ENSG00000170175 CHRNB1 1140 ENST00000575379 ENSP00000461751
## 65 ENSG00000160716 CHRNB2 1141 ENST00000368476 ENSP00000357461
## 66 ENSG00000147432 CHRNB3 1142 ENST00000531610 <NA>
## 67 ENSG00000147432 CHRNB3 1142 ENST00000534391 ENSP00000433913
## 68 ENSG00000147432 CHRNB3 1142 ENST00000289957 ENSP00000289957
## 69 ENSG00000117971 CHRNB4 1143 ENST00000261751 ENSP00000261751
## 70 ENSG00000117971 CHRNB4 1143 ENST00000412074 ENSP00000416386
## 71 ENSG00000117971 CHRNB4 1143 ENST00000559849 ENSP00000457404
## 72 ENSG00000117971 CHRNB4 1143 ENST00000560511 <NA>
## 73 ENSG00000117971 CHRNB4 1143 ENST00000560868 <NA>
## 74 ENSG00000117971 CHRNB4 1143 ENST00000558216 <NA>
## 75 ENSG00000135902 CHRND 1144 ENST00000441621 ENSP00000408819
## 76 ENSG00000135902 CHRND 1144 ENST00000449596 ENSP00000404950
## 77 ENSG00000135902 CHRND 1144 ENST00000258385 ENSP00000258385
## 78 ENSG00000135902 CHRND 1144 ENST00000412233 ENSP00000398143
## 79 ENSG00000135902 CHRND 1144 ENST00000446616 ENSP00000410801
## 80 ENSG00000135902 CHRND 1144 ENST00000543200 ENSP00000438380
## 81 ENSG00000135902 CHRND 1144 ENST00000536614 ENSP00000437740
## 82 ENSG00000135902 CHRND 1144 ENST00000457943 ENSP00000391055
## 83 ENSG00000108556 CHRNE 1145 ENST00000293780 ENSP00000293780
## 84 ENSG00000108556 CHRNE 1145 ENST00000572438 <NA>
## 85 ENSG00000108556 CHRNE 1145 ENST00000575637 <NA>
## 86 ENSG00000196811 CHRNG 1146 ENST00000485094 <NA>
## 87 ENSG00000196811 CHRNG 1146 ENST00000389494 ENSP00000374145
## 88 ENSG00000196811 CHRNG 1146 ENST00000389492 ENSP00000374143
## TXSEQSTART TXBIOTYPE
## 1 175612320 protein_coding
## 2 175612388 protein_coding
## 3 175612538 protein_coding
## 4 175612538 protein_coding
## 5 175612538 nonsense_mediated_decay
## 6 175617483 protein_coding
## 7 3686817 protein_coding
## 8 3687266 protein_coding
## 9 3687266 nonsense_mediated_decay
## 10 3689730 processed_transcript
## 11 27317295 protein_coding
## 12 27318093 retained_intron
## 13 27318615 retained_intron
## 14 27318615 nonsense_mediated_decay
## 15 27319120 protein_coding
## 16 27321145 retained_intron
## 17 27326866 protein_coding
## 18 27327329 protein_coding
## 19 27327349 retained_intron
## 20 27327422 protein_coding
## 21 27328519 protein_coding
## 22 27317279 protein_coding
## 23 78885394 processed_transcript
## 24 78885394 nonsense_mediated_decay
## 25 78885394 protein_coding
## 26 78887647 protein_coding
## 27 78894118 processed_transcript
## 28 78910722 retained_intron
## 29 78910972 protein_coding
## 30 78911191 processed_transcript
## 31 61975420 protein_coding
## 32 61975421 processed_transcript
## 33 61977946 nonsense_mediated_decay
## 34 61977946 processed_transcript
## 35 62002105 processed_transcript
## 36 62005773 processed_transcript
## 37 78857862 protein_coding
## 38 78857913 protein_coding
## 39 78873232 protein_coding
## 40 78882704 protein_coding
## 41 42607763 protein_coding
## 42 42608070 protein_coding
## 43 42611772 protein_coding
## 44 42612083 processed_transcript
## 45 42620116 retained_intron
## 46 32322691 protein_coding
## 47 32322691 protein_coding
## 48 32322701 protein_coding
## 49 32322701 protein_coding
## 50 32322759 nonsense_mediated_decay
## 51 32322759 nonsense_mediated_decay
## 52 32322759 protein_coding
## 53 32322759 protein_coding
## 54 40337346 protein_coding
## 55 40339303 processed_transcript
## 56 40351604 processed_transcript
## 57 7348380 protein_coding
## 58 7348383 protein_coding
## 59 7348427 retained_intron
## 60 7348658 protein_coding
## 61 7348822 protein_coding
## 62 7348850 protein_coding
## 63 7348862 retained_intron
## 64 7358913 protein_coding
## 65 154540257 protein_coding
## 66 42552519 processed_transcript
## 67 42552519 protein_coding
## 68 42552562 protein_coding
## 69 78916461 protein_coding
## 70 78916750 protein_coding
## 71 78921695 nonsense_mediated_decay
## 72 78923404 processed_transcript
## 73 78979260 processed_transcript
## 74 78984920 processed_transcript
## 75 233390703 nonsense_mediated_decay
## 76 233390870 protein_coding
## 77 233390894 protein_coding
## 78 233390898 nonsense_mediated_decay
## 79 233390901 nonsense_mediated_decay
## 80 233390870 protein_coding
## 81 233390898 protein_coding
## 82 233390898 protein_coding
## 83 4801069 protein_coding
## 84 4801352 retained_intron
## 85 4804304 processed_transcript
## 86 233404437 retained_intron
## 87 233404437 protein_coding
## 88 233404458 protein_coding
table(chrntxsENS$ENTREZID)
##
## 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
## 6 12 8 6 4 4 8 1 3 6 8 3
## 1146 8973 55584 57053 89832
## 3 5 3 4 4