rm(list = ls())
#####################Getting Data from NCBI
library("seqinr")
## Warning: package 'seqinr' was built under R version 4.0.5
getncbiseq <- function(accession) {
  require("seqinr") # this function requires the SeqinR R package 
  # first find which ACNUC database the accession is stored in: 
  dbs <- c("genbank","refseq","refseqViruses","bacterial")
  numdbs <- length(dbs)
  for (i in 1:numdbs) {
    db <- dbs[i]
    choosebank(db)
    # check if the sequence is in ACNUC database â???Tdbâ???T:
    resquery <- try(query(".tmpquery", paste("AC=", accession)), silent = TRUE) 
    if (!(inherits(resquery, "try-error")))
    {
      print(paste("trying: ",db))
      queryname <- "query2"
      thequery <- paste("AC=",accession,sep="") 
      print(thequery)
      # query("queryname","thequery")
      query2 <- query(`queryname`,`thequery`)
      # see if a sequence was retrieved:
      seq <- getSequence(query2$req[[1]]) 
      closebank()
      return(seq) 
    }
    print(paste("accession not in: ",db))
    closebank()
  }
  print(paste("ERROR: accession",accession,"was not found"))
}
dengueseq <- getncbiseq("NC_001477")
## [1] "accession not in:  genbank"
## [1] "accession not in:  refseq"
## [1] "trying:  refseqViruses"
## [1] "AC=NC_001477"
dengueseq[1:50]; table(dengueseq); length(dengueseq)
##  [1] "a" "g" "t" "t" "g" "t" "t" "a" "g" "t" "c" "t" "a" "c" "g" "t" "g" "g" "a"
## [20] "c" "c" "g" "a" "c" "a" "a" "g" "a" "a" "c" "a" "g" "t" "t" "t" "c" "g" "a"
## [39] "a" "t" "c" "g" "g" "a" "a" "g" "c" "t" "t" "g"
## dengueseq
##    a    c    g    t 
## 3426 2240 2770 2299
## [1] 10735
write.fasta(names="DEN-1", sequences=dengueseq, file.out="den1.fasta")
################################input sequence
library("seqinr")
dengue <- read.fasta(file = "den1.fasta")
length(dengue$`DEN-1`); table(dengue$`DEN-1`); dengue$`DEN-1`[1:50]; GC(dengue$`DEN-1`)
## [1] 10735
## 
##    a    c    g    t 
## 3426 2240 2770 2299
##  [1] "a" "g" "t" "t" "g" "t" "t" "a" "g" "t" "c" "t" "a" "c" "g" "t" "g" "g" "a"
## [20] "c" "c" "g" "a" "c" "a" "a" "g" "a" "a" "c" "a" "g" "t" "t" "t" "c" "g" "a"
## [39] "a" "t" "c" "g" "g" "a" "a" "g" "c" "t" "t" "g"
## [1] 0.4666977
tab <- table(dengue); (tab["g"]+tab["c"])/sum(tab)
##         g 
## 0.4666977
# Get the Vector of Nucleotide sequence
#Get DNA words Counts that are 1 nucleotide long
dengueseq_1 <- dengue[[1]]
count(dengueseq_1, 1); count(dengueseq_1, 2)
## 
##    a    c    g    t 
## 3426 2240 2770 2299
## 
##   aa   ac   ag   at   ca   cc   cg   ct   ga   gc   gg   gt   ta   tc   tg   tt 
## 1108  720  890  708  901  523  261  555  976  500  787  507  440  497  832  529
#ref https://rpubs.com/wenmi01/r_bioconductor_basics
#ref https://rpubs.com/wenmi01/r_bioconductor_basics