This file can generate a random number of sequences of a random length. Then write/save the file as a FASTA. File name will be: date-‘DNA’-number of distinct sequnces

TO DO: Change ‘Num_Sequences’ = number of students

TO DO: Comment out ‘set.seed’ for random sequences

Num_Sequences = 2
#Num_Sequences = runif(1, min=2, max=10)

#This can be commented out for less definable patterns
set.seed(1000) 

#install.packages("seqinr")
library(seqinr)

# Example file name: 2018-03-29-DNA-2.fasta
file_out = paste(Sys.Date(), "-DNA-", Num_Sequences, ".fasta", sep="")
file.create(file_out)
## [1] TRUE
DNA = c ("A", "T", "C", "G") #, "N", "a", "t", "c", "g", "n")
DNA_probabilities = c(0.2, 0.2, 0.2, 0.4) #, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01)

for (i in 1:Num_Sequences) {
    seq_length = sample(100:1000, 1)
    line_header = paste("Student = ", i," | WPI Bioinformatics-DNA Exercises", sep="")
    DNA_sequence = paste(sample(DNA, 
                                seq_length, 
                                replace=TRUE, 
                                prob=DNA_probabilities), 
                         collapse = "")
    write.fasta(DNA_sequence, 
                line_header, 
                file.out=paste(Sys.Date(), "-DNA-", Num_Sequences, ".fasta", sep=""), 
                open = "a", 
                nbchar = 60, 
                as.string = TRUE)
}