Libraries
library("biomaRt")
library("Biostrings")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
library(BSgenome)
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: rtracklayer
Upstream 100 base sequences of nicotinic acetylcholine receptor subunits
ensembl=useMart("ensembl")
hsensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
hsensembl
## Object of class 'Mart':
## Using the ENSEMBL_MART_ENSEMBL BioMart database
## Using the hsapiens_gene_ensembl dataset
entrez=c("1134", "1135", "1136", "1137", "1138", "8973", "1139", "55584", "57053",
"1140", "1141", "1142", "1143", "1144", "1145", "1146")
nachr_coding_gene_flank = getSequence(id = entrez,
type="entrezgene",
seqType="coding_gene_flank",
upstream=100,
mart=hsensembl)
str(nachr_coding_gene_flank)
## 'data.frame': 17 obs. of 2 variables:
## $ coding_gene_flank: chr "GATTTTGAACCCCTGTATTTTCTTTTCAAAACCCCCTTTTCCAGTGGAAATGCTCTGTTGTTAAAAAGGAAGAAACTGTCTTTCTGAAACTGACATCACG" "GCGGTGTGGGCTCCTTGTGCCCAGATCCTTTGTATTCATAGGGGGAAGTGGAAGACCACGCTGCCTGACTGAGACTTTATTATAGAGGCTCAGGAAAAAG" "CTTCTGATGATGTCAAGGTTGGATGCATGTGGCTGACTGATAGCTCTTTGTTTTCCACAATCCTTTGCCTAGGAAAAAGGAATCCAAGTGTGTTTTAACC" "GGGAGTGTAGGCTGTTATATGACACCCAGAGCCCATCTCTCTCTGCCCCAGACCTTGGAGCTGTTGTCCCACCCCTGTCACTGCAGAGAGCTGAGGCACC" ...
## $ entrezgene : int 1142 55584 8973 1146 57053 1139 1144 1140 1138 1137 ...
substr(nachr_coding_gene_flank$coding_gene_flank, 1, 10) # to character string
## [1] "GATTTTGAAC" "GCGGTGTGGG" "CTTCTGATGA" "GGGAGTGTAG" "TGTCCACACC"
## [6] "CGCGAGCCGA" "ACCCCCTCCT" "CCAGGAGAGA" "GTCTCACGAC" "ACGTGCTGGG"
## [11] "CTCGCTTTCC" "CTCGGGGGCC" "GGGCGCGGCT" "GAATCCTCAC" "GTCTGGGGTC"
## [16] "CTCCAGCAAC" "AGCCCGGAAC"
subseq(nachr_coding_gene_flank$coding_gene_flank, 1, 10) # returns DNAStringSet
## [1] "GATTTTGAAC" "GCGGTGTGGG" "CTTCTGATGA" "GGGAGTGTAG" "TGTCCACACC"
## [6] "CGCGAGCCGA" "ACCCCCTCCT" "CCAGGAGAGA" "GTCTCACGAC" "ACGTGCTGGG"
## [11] "CTCGCTTTCC" "CTCGGGGGCC" "GGGCGCGGCT" "GAATCCTCAC" "GTCTGGGGTC"
## [16] "CTCCAGCAAC" "AGCCCGGAAC"
nachr_unlisted <- unlist(nachr_coding_gene_flank)
nachr_unlisted[2]
## coding_gene_flank2
## "GCGGTGTGGGCTCCTTGTGCCCAGATCCTTTGTATTCATAGGGGGAAGTGGAAGACCACGCTGCCTGACTGAGACTTTATTATAGAGGCTCAGGAAAAAG"