Libraries

library("biomaRt")
library("Biostrings")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
library(BSgenome)
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: rtracklayer

Upstream 100 base sequences of nicotinic acetylcholine receptor subunits

ensembl=useMart("ensembl")
hsensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
hsensembl
## Object of class 'Mart':
##   Using the ENSEMBL_MART_ENSEMBL BioMart database
##   Using the hsapiens_gene_ensembl dataset
entrez=c("1134", "1135", "1136", "1137", "1138", "8973", "1139", "55584", "57053",
         "1140", "1141", "1142", "1143", "1144", "1145", "1146")

nachr_coding_gene_flank = getSequence(id = entrez,
                                      type="entrezgene",
                                      seqType="coding_gene_flank",
                                      upstream=100, 
                                      mart=hsensembl) 
str(nachr_coding_gene_flank)
## 'data.frame':    17 obs. of  2 variables:
##  $ coding_gene_flank: chr  "GATTTTGAACCCCTGTATTTTCTTTTCAAAACCCCCTTTTCCAGTGGAAATGCTCTGTTGTTAAAAAGGAAGAAACTGTCTTTCTGAAACTGACATCACG" "GCGGTGTGGGCTCCTTGTGCCCAGATCCTTTGTATTCATAGGGGGAAGTGGAAGACCACGCTGCCTGACTGAGACTTTATTATAGAGGCTCAGGAAAAAG" "CTTCTGATGATGTCAAGGTTGGATGCATGTGGCTGACTGATAGCTCTTTGTTTTCCACAATCCTTTGCCTAGGAAAAAGGAATCCAAGTGTGTTTTAACC" "GGGAGTGTAGGCTGTTATATGACACCCAGAGCCCATCTCTCTCTGCCCCAGACCTTGGAGCTGTTGTCCCACCCCTGTCACTGCAGAGAGCTGAGGCACC" ...
##  $ entrezgene       : int  1142 55584 8973 1146 57053 1139 1144 1140 1138 1137 ...
substr(nachr_coding_gene_flank$coding_gene_flank, 1, 10) # to character string
##  [1] "GATTTTGAAC" "GCGGTGTGGG" "CTTCTGATGA" "GGGAGTGTAG" "TGTCCACACC"
##  [6] "CGCGAGCCGA" "ACCCCCTCCT" "CCAGGAGAGA" "GTCTCACGAC" "ACGTGCTGGG"
## [11] "CTCGCTTTCC" "CTCGGGGGCC" "GGGCGCGGCT" "GAATCCTCAC" "GTCTGGGGTC"
## [16] "CTCCAGCAAC" "AGCCCGGAAC"
subseq(nachr_coding_gene_flank$coding_gene_flank, 1, 10) # returns DNAStringSet
##  [1] "GATTTTGAAC" "GCGGTGTGGG" "CTTCTGATGA" "GGGAGTGTAG" "TGTCCACACC"
##  [6] "CGCGAGCCGA" "ACCCCCTCCT" "CCAGGAGAGA" "GTCTCACGAC" "ACGTGCTGGG"
## [11] "CTCGCTTTCC" "CTCGGGGGCC" "GGGCGCGGCT" "GAATCCTCAC" "GTCTGGGGTC"
## [16] "CTCCAGCAAC" "AGCCCGGAAC"
nachr_unlisted <- unlist(nachr_coding_gene_flank)
nachr_unlisted[2]
##                                                                                     coding_gene_flank2 
## "GCGGTGTGGGCTCCTTGTGCCCAGATCCTTTGTATTCATAGGGGGAAGTGGAAGACCACGCTGCCTGACTGAGACTTTATTATAGAGGCTCAGGAAAAAG"