This file can generate a random number of sequences of a random length. Then write/save the file as a FASTA. File name will be: date-RNA-number of distinct sequnces.fasta

TO DO: Change ‘Num_Sequences’ = number of students

TO DO: Comment out ‘set.seed’ for random sequences

Num_Sequences = 2
#Num_Sequences = runif(1, min=2, max=10)

#This can be commented out for less definable patterns
set.seed(1000) 

#install.packages("seqinr")
library(seqinr)

# Example file name: 2018-03-29-RNA-2.fasta
file_out = paste(Sys.Date(), "-RNA-", Num_Sequences, ".fasta", sep="")
file.create(file_out)
## [1] TRUE
RNA = c ("A", "U", "C", "G") #, "N", "a", "u", "c", "g", "n")
RNA_probabilities = c(0.2, 0.2, 0.2, 0.4) #, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01)

for (i in 1:Num_Sequences) {
    seq_length = sample(100:1000, 1)
    line_header = paste("Student = ", i," | WPI Bioinformatics-RNA Exercises", sep="")
    RNA_sequence = paste(sample(RNA, 
                                seq_length, 
                                replace=TRUE, 
                                prob=RNA_probabilities), 
                         collapse = "")
    write.fasta(RNA_sequence, 
                line_header, 
                file.out=paste(Sys.Date(), "-RNA-", Num_Sequences, ".fasta", sep=""), 
                open = "a", 
                nbchar = 60, 
                as.string = TRUE)
}