rm(list = ls())
#####################Getting Data from NCBI
library("seqinr")
## Warning: package 'seqinr' was built under R version 4.0.5
getncbiseq <- function(accession) {
require("seqinr") # this function requires the SeqinR R package
# first find which ACNUC database the accession is stored in:
dbs <- c("genbank","refseq","refseqViruses","bacterial")
numdbs <- length(dbs)
for (i in 1:numdbs) {
db <- dbs[i]
choosebank(db)
# check if the sequence is in ACNUC database â???Tdbâ???T:
resquery <- try(query(".tmpquery", paste("AC=", accession)), silent = TRUE)
if (!(inherits(resquery, "try-error")))
{
print(paste("trying: ",db))
queryname <- "query2"
thequery <- paste("AC=",accession,sep="")
print(thequery)
# query("queryname","thequery")
query2 <- query(`queryname`,`thequery`)
# see if a sequence was retrieved:
seq <- getSequence(query2$req[[1]])
closebank()
return(seq)
}
print(paste("accession not in: ",db))
closebank()
}
print(paste("ERROR: accession",accession,"was not found"))
}
dengueseq <- getncbiseq("NC_001477")
## [1] "accession not in: genbank"
## [1] "accession not in: refseq"
## [1] "trying: refseqViruses"
## [1] "AC=NC_001477"
dengueseq[1:50]; table(dengueseq); length(dengueseq)
## [1] "a" "g" "t" "t" "g" "t" "t" "a" "g" "t" "c" "t" "a" "c" "g" "t" "g" "g" "a"
## [20] "c" "c" "g" "a" "c" "a" "a" "g" "a" "a" "c" "a" "g" "t" "t" "t" "c" "g" "a"
## [39] "a" "t" "c" "g" "g" "a" "a" "g" "c" "t" "t" "g"
## dengueseq
## a c g t
## 3426 2240 2770 2299
## [1] 10735
write.fasta(names="DEN-1", sequences=dengueseq, file.out="den1.fasta")
################################input sequence
library("seqinr")
dengue <- read.fasta(file = "den1.fasta")
length(dengue$`DEN-1`); table(dengue$`DEN-1`); dengue$`DEN-1`[1:50]; GC(dengue$`DEN-1`)
## [1] 10735
##
## a c g t
## 3426 2240 2770 2299
## [1] "a" "g" "t" "t" "g" "t" "t" "a" "g" "t" "c" "t" "a" "c" "g" "t" "g" "g" "a"
## [20] "c" "c" "g" "a" "c" "a" "a" "g" "a" "a" "c" "a" "g" "t" "t" "t" "c" "g" "a"
## [39] "a" "t" "c" "g" "g" "a" "a" "g" "c" "t" "t" "g"
## [1] 0.4666977
tab <- table(dengue); (tab["g"]+tab["c"])/sum(tab)
## g
## 0.4666977
# Get the Vector of Nucleotide sequence
#Get DNA words Counts that are 1 nucleotide long
dengueseq_1 <- dengue[[1]]
count(dengueseq_1, 1); count(dengueseq_1, 2)
##
## a c g t
## 3426 2240 2770 2299
##
## aa ac ag at ca cc cg ct ga gc gg gt ta tc tg tt
## 1108 720 890 708 901 523 261 555 976 500 787 507 440 497 832 529
#ref https://rpubs.com/wenmi01/r_bioconductor_basics
#ref https://rpubs.com/wenmi01/r_bioconductor_basics