#if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("Biostrings")
require(Biostrings)
## 载入需要的程辑包:Biostrings
## 载入需要的程辑包:BiocGenerics
##
## 载入程辑包:'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## 载入需要的程辑包:S4Vectors
## 载入需要的程辑包:stats4
##
## 载入程辑包:'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## 载入需要的程辑包:IRanges
##
## 载入程辑包:'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## 载入需要的程辑包:XVector
## 载入需要的程辑包:GenomeInfoDb
##
## 载入程辑包:'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Nucleotide global, local, and overlap alignments
s1 <-
DNAString("ACTTCACCAGCTCCCTGGCGGTAAGTTGATCAAAGGAAACGCAAAGTTTTCAAG")
s2 <-
DNAString("GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC")
# First use a fixed substitution matrix
mat <- nucleotideSubstitutionMatrix(match = 1, mismatch = -3, baseOnly = TRUE)
globalAlign <-
pairwiseAlignment(s1, s2, substitutionMatrix = mat,
gapOpening = 5, gapExtension = 2)
globalAlign
## Global PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG
## subject: GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC
## score: -52
localAlign <-
pairwiseAlignment(s1, s2, type = "local", substitutionMatrix = mat,
gapOpening = 5, gapExtension = 2)
localAlign
## Local PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: [20] GGTAAGT
## subject: [20] GGTAAGT
## score: 7
overlapAlign <-
pairwiseAlignment(s1, s2, type = "overlap", substitutionMatrix = mat,
gapOpening = 5, gapExtension = 2)
overlapAlign
## Overlap PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: [54] G
## subject: [1] G
## score: 1
# Then use quality-based method for generating a substitution matrix
pairwiseAlignment(s1, s2,
patternQuality = SolexaQuality(rep(c(22L, 12L), times = c(36, 18))),
subjectQuality = SolexaQuality(rep(c(22L, 12L), times = c(40, 20))),
scoreOnly = TRUE)
## [1] -77.29538
# Now assume can't distinguish between C/T and G/A
pairwiseAlignment(s1, s2,
patternQuality = SolexaQuality(rep(c(22L, 12L), times = c(36, 18))),
subjectQuality = SolexaQuality(rep(c(22L, 12L), times = c(40, 20))),
type = "local")
## Local PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: [20] GGTAAGT
## subject: [20] GGTAAGT
## score: 13.8731
mapping <- diag(4)
dimnames(mapping) <- list(DNA_BASES, DNA_BASES)
mapping["C", "T"] <- mapping["T", "C"] <- 1
mapping["G", "A"] <- mapping["A", "G"] <- 1
pairwiseAlignment(s1, s2,
patternQuality = SolexaQuality(rep(c(22L, 12L), times = c(36, 18))),
subjectQuality = SolexaQuality(rep(c(22L, 12L), times = c(40, 20))),
fuzzyMatrix = mapping,
type = "local")
## Local PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: [1] ACTTCACCAGCTCCCT
## subject: [1] GTTTCACTACTTCCTT
## score: 23.81974
## Amino acid global alignment
pairwiseAlignment(AAString("PAWHEAE"), AAString("HEAGAWGHEE"),
substitutionMatrix = "BLOSUM50",
gapOpening = 0, gapExtension = 8)
## Global PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: -PA--W-HEAE
## subject: HEAGAWGHE-E
## score: 1
pairwiseAlignment(AAString("HEAGAWGHEE"), AAString("HEAGAWGHEE"),
substitutionMatrix = "BLOSUM50",
gapOpening = 0, gapExtension = 8)
## Global PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: HEAGAWGHEE
## subject: HEAGAWGHEE
## score: 79