library(GenomicRanges)
gr = GRanges(seqnames =
Rle(c('chr1', 'chr2', 'chr1', 'chr3'), c(1,2,3,4)),
ranges =
IRanges(1:10, end = 7:16, names = head(letters, 10)),
strand =
Rle(strand(c('-', '+', '*', '+', '-')),
c(1,2,2,3,2)),
score = 1:10,
GC = seq(1,0, length=10))
seqlengths(gr) = c(249250621,243199373,198022430)
seqnames(gr)
## factor-Rle of length 10 with 4 runs
## Lengths: 1 2 3 4
## Values : chr1 chr2 chr1 chr3
## Levels(3): chr1 chr2 chr3
ranges(gr)
## IRanges of length 10
## start end width names
## [1] 1 7 7 a
## [2] 2 8 7 b
## [3] 3 9 7 c
## [4] 4 10 7 d
## [5] 5 11 7 e
## [6] 6 12 7 f
## [7] 7 13 7 g
## [8] 8 14 7 h
## [9] 9 15 7 i
## [10] 10 16 7 j
names(gr)
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
strand(gr)
## factor-Rle of length 10 with 5 runs
## Lengths: 1 2 2 3 2
## Values : - + * + -
## Levels(3): + - *
mcols(gr)$score
## [1] 1 2 3 4 5 6 7 8 9 10
gr$score
## [1] 1 2 3 4 5 6 7 8 9 10
seqlengths(gr)
## chr1 chr2 chr3
## 249250621 243199373 198022430
length(gr)
## [1] 10
sp = split(gr, rep(1:2, each = 5))
sp
## GRangesList object of length 2:
## $1
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## d chr1 [4, 10] * | 4 0.666666666666667
## e chr1 [5, 11] * | 5 0.555555555555556
##
## $2
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## f chr1 [ 6, 12] + | 6 0.444444444444444
## g chr3 [ 7, 13] + | 7 0.333333333333333
## h chr3 [ 8, 14] + | 8 0.222222222222222
## i chr3 [ 9, 15] - | 9 0.111111111111111
## j chr3 [10, 16] - | 10 0
##
## -------
## seqinfo: 3 sequences from an unspecified genome
#also check out my exploration time
split(gr, seqnames(gr))
## GRangesList object of length 3:
## $chr1
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## d chr1 [4, 10] * | 4 0.666666666666667
## e chr1 [5, 11] * | 5 0.555555555555556
## f chr1 [6, 12] + | 6 0.444444444444444
##
## $chr2
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
##
## $chr3
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## g chr3 [ 7, 13] + | 7 0.333333333333333
## h chr3 [ 8, 14] + | 8 0.222222222222222
## i chr3 [ 9, 15] - | 9 0.111111111111111
## j chr3 [10, 16] - | 10 0
##
## -------
## seqinfo: 3 sequences from an unspecified genome
split(gr, strand(gr))
## GRangesList object of length 3:
## $+
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## f chr1 [6, 12] + | 6 0.444444444444444
## g chr3 [7, 13] + | 7 0.333333333333333
## h chr3 [8, 14] + | 8 0.222222222222222
##
## $-
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## a chr1 [ 1, 7] - | 1 1
## i chr3 [ 9, 15] - | 9 0.111111111111111
## j chr3 [10, 16] - | 10 0
##
## $*
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## d chr1 [4, 10] * | 4 0.666666666666667
## e chr1 [5, 11] * | 5 0.555555555555556
##
## -------
## seqinfo: 3 sequences from an unspecified genome
c(sp[[1]], sp[[2]])
## GRanges object with 10 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## d chr1 [ 4, 10] * | 4 0.666666666666667
## e chr1 [ 5, 11] * | 5 0.555555555555556
## f chr1 [ 6, 12] + | 6 0.444444444444444
## g chr3 [ 7, 13] + | 7 0.333333333333333
## h chr3 [ 8, 14] + | 8 0.222222222222222
## i chr3 [ 9, 15] - | 9 0.111111111111111
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
gr[2:3]
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## -------
## seqinfo: 3 sequences from an unspecified genome
gr[2:3, 'GC']
## GRanges object with 2 ranges and 1 metadata column:
## seqnames ranges strand | GC
## <Rle> <IRanges> <Rle> | <numeric>
## b chr2 [2, 8] + | 0.888888888888889
## c chr2 [3, 9] + | 0.777777777777778
## -------
## seqinfo: 3 sequences from an unspecified genome
gr[2:3, 2]
## GRanges object with 2 ranges and 1 metadata column:
## seqnames ranges strand | GC
## <Rle> <IRanges> <Rle> | <numeric>
## b chr2 [2, 8] + | 0.888888888888889
## c chr2 [3, 9] + | 0.777777777777778
## -------
## seqinfo: 3 sequences from an unspecified genome
#Here is an example where the 2nd row of a GRanges object is replaced with the 1st row of gr
singles <- split(gr, names(gr))
grMod <- gr
grMod[2] <- singles[[1]]
head(grMod, n=3)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## b chr1 [1, 7] - | 1 1
## c chr2 [3, 9] + | 3 0.777777777777778
## -------
## seqinfo: 3 sequences from an unspecified genome
#Here is a second examplefrom the 2nd row etc.
grMod[2,1] <- singles[[3]][,1]
head(grMod, n=3)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## b chr2 [3, 9] + | 3 1
## c chr2 [3, 9] + | 3 0.777777777777778
## -------
## seqinfo: 3 sequences from an unspecified genome
rep(singles[[2]], times = 3)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## b chr2 [2, 8] + | 2 0.888888888888889
## b chr2 [2, 8] + | 2 0.888888888888889
## -------
## seqinfo: 3 sequences from an unspecified genome
rev(gr)
## GRanges object with 10 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## j chr3 [10, 16] - | 10 0
## i chr3 [ 9, 15] - | 9 0.111111111111111
## h chr3 [ 8, 14] + | 8 0.222222222222222
## g chr3 [ 7, 13] + | 7 0.333333333333333
## f chr1 [ 6, 12] + | 6 0.444444444444444
## e chr1 [ 5, 11] * | 5 0.555555555555556
## d chr1 [ 4, 10] * | 4 0.666666666666667
## c chr2 [ 3, 9] + | 3 0.777777777777778
## b chr2 [ 2, 8] + | 2 0.888888888888889
## a chr1 [ 1, 7] - | 1 1
## -------
## seqinfo: 3 sequences from an unspecified genome
#using window
window(gr, start=2,end=4)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## d chr1 [4, 10] * | 4 0.666666666666667
## -------
## seqinfo: 3 sequences from an unspecified genome
gr[2:4]
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## d chr1 [4, 10] * | 4 0.666666666666667
## -------
## seqinfo: 3 sequences from an unspecified genome
#using ranges
gr[IRanges(start=c(2,7), end=c(3,9))]
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## g chr3 [7, 13] + | 7 0.333333333333333
## h chr3 [8, 14] + | 8 0.222222222222222
## i chr3 [9, 15] - | 9 0.111111111111111
## -------
## seqinfo: 3 sequences from an unspecified genome
gr[c(2,3,7,8,9)]
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## g chr3 [7, 13] + | 7 0.333333333333333
## h chr3 [8, 14] + | 8 0.222222222222222
## i chr3 [9, 15] - | 9 0.111111111111111
## -------
## seqinfo: 3 sequences from an unspecified genome
g <- gr[1:3]
g <- append(g, singles[[10]])
g
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
start(g)
## [1] 1 2 3 10
end(g)
## [1] 7 8 9 16
width(g)
## [1] 7 7 7 7
range(g)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 9] +
## [3] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
# It looks like range provides the range on each chromosome and strand
sort(gr)
## GRanges object with 10 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## f chr1 [ 6, 12] + | 6 0.444444444444444
## a chr1 [ 1, 7] - | 1 1
## d chr1 [ 4, 10] * | 4 0.666666666666667
## e chr1 [ 5, 11] * | 5 0.555555555555556
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## g chr3 [ 7, 13] + | 7 0.333333333333333
## h chr3 [ 8, 14] + | 8 0.222222222222222
## i chr3 [ 9, 15] - | 9 0.111111111111111
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
range(gr)
## GRanges object with 6 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [6, 12] +
## [2] chr1 [1, 7] -
## [3] chr1 [4, 11] *
## [4] chr2 [2, 9] +
## [5] chr3 [7, 14] +
## [6] chr3 [9, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
# flank gives you a width of 10, for new range beginning 1 coordinate after the end
# all this is backwards if start = FALSE
g
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
flank(g,10)
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 2 out-of-bound ranges located on sequence
## chr2. Note that only ranges located on a non-circular sequence
## whose length is not NA can be considered out-of-bound (use
## seqlengths() and isCircular() to get the lengths and circularity
## flags of the underlying sequences). You can use trim() to trim
## these ranges. See ?`trim,GenomicRanges-method` for more
## information.
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 8, 17] - | 1 1
## b chr2 [-8, 1] + | 2 0.888888888888889
## c chr2 [-7, 2] + | 3 0.777777777777778
## j chr3 [17, 26] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
flank(g,10, start=FALSE)
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 2 out-of-bound ranges located on sequences
## chr1 and chr3. Note that only ranges located on a non-circular
## sequence whose length is not NA can be considered out-of-bound
## (use seqlengths() and isCircular() to get the lengths and
## circularity flags of the underlying sequences). You can use trim()
## to trim these ranges. See ?`trim,GenomicRanges-method` for more
## information.
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [-9, 0] - | 1 1
## b chr2 [ 9, 18] + | 2 0.888888888888889
## c chr2 [10, 19] + | 3 0.777777777777778
## j chr3 [ 0, 9] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
# shifting ranges
g
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
shift(g, 5)
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 6, 12] - | 1 1
## b chr2 [ 7, 13] + | 2 0.888888888888889
## c chr2 [ 8, 14] + | 3 0.777777777777778
## j chr3 [15, 21] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
# resizing: keeps the 5' fixed and change the width to 30.
# default, fix = 'start'. so does start = 5'?
g
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
resize(g, 30)
## Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 2 out-of-bound ranges located on sequences
## chr1 and chr3. Note that only ranges located on a non-circular
## sequence whose length is not NA can be considered out-of-bound
## (use seqlengths() and isCircular() to get the lengths and
## circularity flags of the underlying sequences). You can use trim()
## to trim these ranges. See ?`trim,GenomicRanges-method` for more
## information.
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [-22, 7] - | 1 1
## b chr2 [ 2, 31] + | 2 0.888888888888889
## c chr2 [ 3, 32] + | 3 0.777777777777778
## j chr3 [-13, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
# reduce, disjoin, and gaps
g
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
reduce(g)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 9] +
## [3] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
disjoin(g)
## GRanges object with 5 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 2] +
## [3] chr2 [ 3, 8] +
## [4] chr2 [ 9, 9] +
## [5] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
gaps(g)
## GRanges object with 11 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 249250621] +
## [2] chr1 [ 8, 249250621] -
## [3] chr1 [ 1, 249250621] *
## [4] chr2 [ 1, 1] +
## [5] chr2 [10, 243199373] +
## [6] chr2 [ 1, 243199373] -
## [7] chr2 [ 1, 243199373] *
## [8] chr3 [ 1, 198022430] +
## [9] chr3 [ 1, 9] -
## [10] chr3 [17, 198022430] -
## [11] chr3 [ 1, 198022430] *
## -------
## seqinfo: 3 sequences from an unspecified genome
g2 <- head(gr,n=2)
g; g2
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [ 1, 7] - | 1 1
## b chr2 [ 2, 8] + | 2 0.888888888888889
## c chr2 [ 3, 9] + | 3 0.777777777777778
## j chr3 [10, 16] - | 10 0
## -------
## seqinfo: 3 sequences from an unspecified genome
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## b chr2 [2, 8] + | 2 0.888888888888889
## -------
## seqinfo: 3 sequences from an unspecified genome
union(g, g2)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 9] +
## [3] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
#how is this union thing much different from below???
reduce(g)
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 9] +
## [3] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
reduce(c(g,g2))
## GRanges object with 3 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [ 1, 7] -
## [2] chr2 [ 2, 9] +
## [3] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
#intersect and setdiff
intersect(g, g2)
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 [1, 7] -
## [2] chr2 [2, 8] +
## -------
## seqinfo: 3 sequences from an unspecified genome
setdiff(g, g2)
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr2 [ 9, 9] +
## [2] chr3 [10, 16] -
## -------
## seqinfo: 3 sequences from an unspecified genome
#punion, pintersect, and psetdiff
g3 <- g[1:2]
ranges(g3[1]) <- IRanges(start=5, end=12)
g2; g3
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [1, 7] - | 1 1
## b chr2 [2, 8] + | 2 0.888888888888889
## -------
## seqinfo: 3 sequences from an unspecified genome
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## a chr1 [5, 12] - | 1 1
## b chr2 [2, 8] + | 2 0.888888888888889
## -------
## seqinfo: 3 sequences from an unspecified genome
punion(g2, g3)
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## a chr1 [1, 12] -
## b chr2 [2, 8] +
## -------
## seqinfo: 3 sequences from an unspecified genome
pintersect(g2, g3)
## GRanges object with 2 ranges and 3 metadata columns:
## seqnames ranges strand | score GC hit
## <Rle> <IRanges> <Rle> | <integer> <numeric> <logical>
## a chr1 [5, 7] - | 1 1 1
## b chr2 [2, 8] + | 2 0.888888888888889 1
## -------
## seqinfo: 3 sequences from an unspecified genome
# remember a zero is represented with the start coordinated being 1 greater than the end coordinage
psetdiff(g2, g3)
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## a chr1 [1, 4] -
## b chr2 [2, 1] +
## -------
## seqinfo: 3 sequences from an unspecified genome
This might be something useful when representing operons.
gr1 <- GRanges(seqnames = "chr2", ranges = IRanges(3, 6), strand = "+", score = 5L, GC = 0.45)
gr2 <- GRanges(seqnames = c("chr1", "chr1"), ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"), score = 3:4, GC = c(0.3, 0.5))
grl <- GRangesList("txA" = gr1, "txB" = gr2)
grl
## GRangesList object of length 2:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
seqnames(grl)
## RleList of length 2
## $txA
## factor-Rle of length 1 with 1 run
## Lengths: 1
## Values : chr2
## Levels(2): chr2 chr1
##
## $txB
## factor-Rle of length 2 with 1 run
## Lengths: 2
## Values : chr1
## Levels(2): chr2 chr1
ranges(grl)
## IRangesList of length 2
## $txA
## IRanges of length 1
## start end width
## [1] 3 6 4
##
## $txB
## IRanges of length 2
## start end width
## [1] 7 9 3
## [2] 13 15 3
length(grl)
## [1] 2
names(grl)
## [1] "txA" "txB"
seqlengths(grl)
## chr2 chr1
## NA NA
elementLengths(grl)
## txA txB
## 1 2
isEmpty(grl)
## [1] FALSE
mcols(grl) = c('Transcript A', 'Transcript B')
mcols(grl)
## DataFrame with 2 rows and 1 column
## value
## <character>
## 1 Transcript A
## 2 Transcript B
ul = unlist(grl)
ul
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## txA chr2 [ 3, 6] + | 5 0.45
## txB chr1 [ 7, 9] + | 3 0.3
## txB chr1 [13, 15] - | 4 0.5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
start(grl)
## IntegerList of length 2
## [["txA"]] 3
## [["txB"]] 7 13
end(grl)
## IntegerList of length 2
## [["txA"]] 6
## [["txB"]] 9 15
width(grl)
## IntegerList of length 2
## [["txA"]] 4
## [["txB"]] 3 3
shift(grl, 30)
## GRangesList object of length 2:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [33, 36] + | 5 0.45
##
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr1 [37, 39] + | 3 0.3
## [2] chr1 [43, 45] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
coverage(grl)
## RleList of length 2
## $chr2
## integer-Rle of length 6 with 2 runs
## Lengths: 2 4
## Values : 0 1
##
## $chr1
## integer-Rle of length 15 with 4 runs
## Lengths: 6 3 3 3
## Values : 0 1 0 1
grl[1]
## GRangesList object of length 1:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl[[1]]
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl['txA']
## GRangesList object of length 1:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl$txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl[1, 'score'] # is a GRangesList object
## GRangesList object of length 1:
## $txA
## GRanges object with 1 range and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr2 [3, 6] + | 5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl[1][,'score'] # is a GRangesList object
## GRangesList object of length 1:
## $txA
## GRanges object with 1 range and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr2 [3, 6] + | 5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
grl[[1]][,'score'] # is a GRanges object
## GRanges object with 1 range and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <integer>
## [1] chr2 [3, 6] + | 5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
rep(grl[[1]], times = 3)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
## [2] chr2 [3, 6] + | 5 0.45
## [3] chr2 [3, 6] + | 5 0.45
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
rev(grl)
## GRangesList object of length 2:
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
##
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr2 [3, 6] + | 5 0.45
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# how to retrieve a range of elements from a list
window(grl, start = 1, end = 1)
## GRangesList object of length 1:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
window(grl, start = 2, end = 2)
## GRangesList object of length 1:
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
window(grl, start = 1, end = 2)
## GRangesList object of length 2:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# IRanges and grl, like window above
grl[IRanges(start = 2, end = 2)]
## GRangesList object of length 1:
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
lapply(grl, length)
## $txA
## [1] 1
##
## $txB
## [1] 2
sapply(grl, length)
## txA txB
## 1 2
grl2 = shift(grl, 10)
names(grl2) = c('shiftTxA', 'shiftTxB')
# WHAT THE HELL IS THIS???
# mapply is a multivariate version of sapply. mapply applies FUN to the first elements of each ... argument, the second elements, the third elements, and so on. Arguments are recycled if necessary
mapply(c, grl, grl2)
## $txA
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [ 3, 6] + | 5 0.45
## [2] chr2 [13, 16] + | 5 0.45
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
##
## $txB
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
## [3] chr1 [17, 19] + | 3 0.3
## [4] chr1 [23, 25] - | 4 0.5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
Map(c, grl, grl2)
## $txA
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [ 3, 6] + | 5 0.45
## [2] chr2 [13, 16] + | 5 0.45
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
##
## $txB
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
## [3] chr1 [17, 19] + | 3 0.3
## [4] chr1 [23, 25] - | 4 0.5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# This is reversing the individual ranges within each GRanges object in the list
# endoapply and mendoapply perform the endomorphic equivalents of lapply and mapply by returning objects of the same class as the inputs rather than a list.
endoapply(grl, rev)
## GRangesList object of length 2:
## $txA
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [3, 6] + | 5 0.45
##
## $txB
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr1 [13, 15] - | 4 0.5
## [2] chr1 [ 7, 9] + | 3 0.3
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# mendoapply(FUN, ..., MoreArgs=NULL)
mendoapply(c, grl, grl2)
## GRangesList object of length 2:
## $txA
## GRanges object with 2 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [ 3, 6] + | 5 0.45
## [2] chr2 [13, 16] + | 5 0.45
##
## $txB
## GRanges object with 4 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## [1] chr1 [ 7, 9] + | 3 0.3
## [2] chr1 [13, 15] - | 4 0.5
## [3] chr1 [17, 19] + | 3 0.3
## [4] chr1 [23, 25] - | 4 0.5
##
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# Reduce() ... as different from reduce()
Reduce(c, grl)
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## [1] chr2 [ 3, 6] + | 5 0.45
## [2] chr1 [ 7, 9] + | 3 0.3
## [3] chr1 [13, 15] - | 4 0.5
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
# findOverlaps, which takes a query and a subject as inputs and returns a Hits object containing the index pairings for the overlapping elements.
mtch = findOverlaps(gr, grl)
as.matrix(mtch)
## queryHits subjectHits
## [1,] 2 1
## [2,] 3 1
## [3,] 4 2
## [4,] 5 2
## [5,] 6 2
findOverlaps(gr, grl, select='first')
## [1] NA 1 1 2 2 2 NA NA NA NA
findOverlaps(grl, gr, select='first')
## [1] 2 4
# countOverlaps, which tabulates the number of overlaps for each element in the query.
countOverlaps(gr, grl)
## a b c d e f g h i j
## 0 1 1 1 1 1 0 0 0 0
countOverlaps(grl, gr)
## txA txB
## 2 3
# subsetByOverlaps, which extracts the elements in the query that overlap at least one element in the subject.
subsetByOverlaps(gr, grl)
## GRanges object with 5 ranges and 2 metadata columns:
## seqnames ranges strand | score GC
## <Rle> <IRanges> <Rle> | <integer> <numeric>
## b chr2 [2, 8] + | 2 0.888888888888889
## c chr2 [3, 9] + | 3 0.777777777777778
## d chr1 [4, 10] * | 4 0.666666666666667
## e chr1 [5, 11] * | 5 0.555555555555556
## f chr1 [6, 12] + | 6 0.444444444444444
## -------
## seqinfo: 3 sequences from an unspecified genome
library(GenomicAlignments)
aln1_file = system.file("extdata", "ex1.bam",package="Rsamtools")
aln1 <- readGAlignments(aln1_file)
aln1
## GAlignments object with 3271 alignments and 0 metadata columns:
## seqnames strand cigar qwidth start end
## <Rle> <Rle> <character> <integer> <integer> <integer>
## [1] seq1 + 36M 36 1 36
## [2] seq1 + 35M 35 3 37
## [3] seq1 + 35M 35 5 39
## [4] seq1 + 36M 36 6 41
## [5] seq1 + 35M 35 9 43
## ... ... ... ... ... ... ...
## [3267] seq2 + 35M 35 1524 1558
## [3268] seq2 + 35M 35 1524 1558
## [3269] seq2 - 35M 35 1528 1562
## [3270] seq2 - 35M 35 1532 1566
## [3271] seq2 - 35M 35 1533 1567
## width njunc
## <integer> <integer>
## [1] 36 0
## [2] 35 0
## [3] 35 0
## [4] 36 0
## [5] 35 0
## ... ... ...
## [3267] 35 0
## [3268] 35 0
## [3269] 35 0
## [3270] 35 0
## [3271] 35 0
## -------
## seqinfo: 2 sequences from an unspecified genome
seqnames(aln1)
## factor-Rle of length 3271 with 2 runs
## Lengths: 1482 1789
## Values : seq1 seq2
## Levels(2): seq1 seq2
seqlevels(aln1)
## [1] "seq1" "seq2"
strand(aln1)
## factor-Rle of length 3271 with 1394 runs
## Lengths: 8 1 63 2 12 1 2 2 6 1 ... 1 1 1 1 1 12 2 2 3 3
## Values : + - + - + - + - + - ... + - + - + - + - + -
## Levels(3): + - *
# what in the hell is this cigar crap?
head(cigar(aln1))
## [1] "36M" "35M" "35M" "36M" "35M" "35M"
# what is the difference between these two widths?
head(qwidth(aln1))
## [1] 36 35 35 36 35 35
head(width(aln1))
## [1] 36 35 35 36 35 35
head(start(aln1))
## [1] 1 3 5 6 9 13
head(end(aln1))
## [1] 36 37 39 41 43 47
# what in the hell is this njunc crap?
head(njunc(aln1))
## [1] 0 0 0 0 0 0