library(biomaRt)
## Pull out dataset you want
mouse <- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "mmusculus_gene_ensembl",
host= "jul2016.archive.ensembl.org")
## You can list all the attributes possible (heading to save space)
head(listAttributes(mouse), 25)
## name
## 1 ensembl_gene_id
## 2 ensembl_transcript_id
## 3 ensembl_peptide_id
## 4 ensembl_exon_id
## 5 description
## 6 chromosome_name
## 7 start_position
## 8 end_position
## 9 strand
## 10 band
## 11 transcript_start
## 12 transcript_end
## 13 transcription_start_site
## 14 transcript_length
## 15 transcript_tsl
## 16 transcript_gencode_basic
## 17 transcript_appris
## 18 external_gene_name
## 19 external_gene_source
## 20 external_transcript_name
## 21 external_transcript_source_name
## 22 transcript_count
## 23 percentage_gc_content
## 24 gene_biotype
## 25 transcript_biotype
## description page
## 1 Ensembl Gene ID feature_page
## 2 Ensembl Transcript ID feature_page
## 3 Ensembl Protein ID feature_page
## 4 Ensembl Exon ID feature_page
## 5 Description feature_page
## 6 Chromosome Name feature_page
## 7 Gene Start (bp) feature_page
## 8 Gene End (bp) feature_page
## 9 Strand feature_page
## 10 Band feature_page
## 11 Transcript Start (bp) feature_page
## 12 Transcript End (bp) feature_page
## 13 Transcription Start Site (TSS) feature_page
## 14 Transcript length (including UTRs and CDS) feature_page
## 15 Transcript Support Level (TSL) feature_page
## 16 GENCODE basic annotation feature_page
## 17 APPRIS annotation feature_page
## 18 Associated Gene Name feature_page
## 19 Associated Gene Source feature_page
## 20 Associated Transcript Name feature_page
## 21 Associated Transcript Source feature_page
## 22 Transcript count feature_page
## 23 % GC content feature_page
## 24 Gene type feature_page
## 25 Transcript type feature_page
## Heres how to pull the data that you want
genes.with.id <- getBM(attributes=c("ensembl_gene_id", "external_gene_name"), mart= mouse)
## heading to save space
head(genes.with.id, 25)
## ensembl_gene_id external_gene_name
## 1 ENSMUSG00000064372 mt-Tp
## 2 ENSMUSG00000064371 mt-Tt
## 3 ENSMUSG00000064370 mt-Cytb
## 4 ENSMUSG00000064369 mt-Te
## 5 ENSMUSG00000064368 mt-Nd6
## 6 ENSMUSG00000064367 mt-Nd5
## 7 ENSMUSG00000064366 mt-Tl2
## 8 ENSMUSG00000064365 mt-Ts2
## 9 ENSMUSG00000064364 mt-Th
## 10 ENSMUSG00000064363 mt-Nd4
## 11 ENSMUSG00000065947 mt-Nd4l
## 12 ENSMUSG00000064361 mt-Tr
## 13 ENSMUSG00000064360 mt-Nd3
## 14 ENSMUSG00000064359 mt-Tg
## 15 ENSMUSG00000064358 mt-Co3
## 16 ENSMUSG00000064357 mt-Atp6
## 17 ENSMUSG00000064356 mt-Atp8
## 18 ENSMUSG00000064355 mt-Tk
## 19 ENSMUSG00000064354 mt-Co2
## 20 ENSMUSG00000064353 mt-Td
## 21 ENSMUSG00000064352 mt-Ts1
## 22 ENSMUSG00000064351 mt-Co1
## 23 ENSMUSG00000064350 mt-Ty
## 24 ENSMUSG00000064349 mt-Tc
## 25 ENSMUSG00000064348 mt-Tn
### Then giving a list of names you should be able to find the indeces associated with genes you want to annotate