rm(list = ls())
#library(devtools)
#BiocManager::install("Biostrings")
#Sys.setenv("TAR" = "internal")
#devtools::install_github("mhahsler/rBLAST")
library(rBLAST)
## Loading required package: Biostrings
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
##download BLAST #https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
##check local BLAST software
#Sys.setenv(PATH = "C:\\Program Files\\NCBI\\blast-2.12.0+\\bin")
Sys.which("blastn"); system("blastn -version")
##                                             blastn 
## "C:\\PROGRA~1\\NCBI\\BLAST-~1.0_\\bin\\blastn.exe"
## [1] 0
#### build DB
dir <- "C:\\Users\\liyix\\OneDrive\\Desktop\\BLAST"
dir
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\BLAST"
db = makeblastdb(file.path(dir, "sequence.fasta"), dbtype = "nucl", args="")
db
## [1] 0
#dbtype:"nucl" or "prot"
bl <- blast(file.path(dir, "sequence.fasta"))
bl
## BLAST Database
## Location: C:\Users\liyix\OneDrive\Desktop\BLAST/sequence.fasta 
## BLAST Type: blastn 
## Database: C:\Users\liyix\OneDrive\Desktop\BLAST/sequence.fasta
##  1 sequences; 1,062 total bases
## 
## Date: Aug 21, 2021  12:07 PM Longest sequence: 1,062 bases
## 
## BLASTDB Version: 5
## 
## Volumes:
##  C:\Users\liyix\OneDrive\Desktop\BLAST\sequence.fasta
## load some test data 
fq <- readDNAStringSet(file.path(dir, "sequence.fasta"),format = 'fasta')
fq
## DNAStringSet object of length 1:
##     width seq                                               names               
## [1]  1062 ATGACCGAACCGTTAAAACCACG...GCAAAACGCCCAGCGAAAAATAA NC_000913.3:13855...
#
#setwd(dir)
output <- predict(bl, fq, custom_format = "qseqid bitscore length")
output
##                        qseqid bitscore length
## 1 NC_000913.3:1385511-1386572     1962   1062
cl <- predict(bl, fq, BLAST_args="-max_target_seqs 1")
cl
##                       QueryID                   SubjectID Perc.Ident
## 1 NC_000913.3:1385511-1386572 NC_000913.3:1385511-1386572        100
##   Alignment.Length Mismatches Gap.Openings Q.start Q.end S.start S.end E Bits
## 1             1062          0            0       1  1062       1  1062 0 1962
fq_1 <- subseq(fq, start = 1, width = 50)
#predict(bl, fq_1, BLAST_args="-max_target_seqs 1")
c(fq_1, fq_1)
## DNAStringSet object of length 2:
##     width seq                                               names               
## [1]    50 ATGACCGAACCGTTAAAACCACG...GATTTCGACGGTCCTCTGGAGGT NC_000913.3:13855...
## [2]    50 ATGACCGAACCGTTAAAACCACG...GATTTCGACGGTCCTCTGGAGGT NC_000913.3:13855...
predict(bl, c(fq_1, fq_1), BLAST_args="-max_target_seqs 1")
##                       QueryID                   SubjectID Perc.Ident
## 1 NC_000913.3:1385511-1386572 NC_000913.3:1385511-1386572        100
## 2 NC_000913.3:1385511-1386572 NC_000913.3:1385511-1386572        100
##   Alignment.Length Mismatches Gap.Openings Q.start Q.end S.start S.end        E
## 1               50          0            0       1    50       1    50 3.19e-24
## 2               50          0            0       1    50       1    50 3.19e-24
##   Bits
## 1 93.5
## 2 93.5
#predict(bl, c(fq_1, fq_1))
??rBLAST::predict
## starting httpd help server ...
##  done
write.csv(cl, paste0(dir,"\\",Sys.Date(),"-","fq_1_result.csv"),row.names = FALSE)
##ref https://cloud.tencent.com/developer/article/1636434
## https://bio331.devbioinformatics.org/rBlast.html
##https://github.com/mhahsler/rBLAST/
##download.file("https://ftp.ncbi.nlm.nih.gov/blast/db/16S_ribosomal_RNA.tar.gz",
##"16S_ribosomal_RNA.tar.gz", mode='wb')
#untar("16S_ribosomal_RNA.tar.gz", exdir="16SMicrobialDB")