This file can generate a random number of sequences of a random length. Then write/save the file as a FASTA. File name will be: date-‘Protein’-number of distinct sequences.fasta

TO DO: Change ‘Num_Sequences’ = number of students

TO DO: Comment out ‘set.seed’ for random sequences

Num_Sequences = 2
#Num_Sequences = runif(1, min=2, max=10)

#This can be commented out for less definable patterns
set.seed(1000) 

#install.packages("seqinr")
library(seqinr)

# Example file name: 2018-03-29-Protein-2.fasta
file_out = paste(Sys.Date(), "-Protein-", Num_Sequences, ".fasta", sep="")
file.create(file_out)
## [1] TRUE
protein = c("A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
protein_probabilities = as.vector(rep(0.05, 20))

for (i in 1:Num_Sequences) {
    seq_length = sample(100:1000, 1)
    line_header = paste("Student = ", i," | WPI Bioinformatics-Protein Exercises", sep="")
    Protein_sequence = paste(sample(protein,
                                    seq_length,
                                    replace=TRUE,
                                    prob=protein_probabilities),
                             collapse="")
    write.fasta(Protein_sequence, 
                line_header, 
                file.out=paste(Sys.Date(), "-Protein-", Num_Sequences, ".fasta", sep=""), 
                open = "a", 
                nbchar = 60, 
                as.string = TRUE)
}