TO DO: Change ‘Num_Sequences’ = number of students
TO DO: Comment out ‘set.seed’ for random sequences
Num_Sequences = 2
#Num_Sequences = runif(1, min=2, max=10)
#This can be commented out for less definable patterns
set.seed(1000)
#install.packages("seqinr")
library(seqinr)
# Example file name: 2018-03-29-Protein-2.fasta
file_out = paste(Sys.Date(), "-Protein-", Num_Sequences, ".fasta", sep="")
file.create(file_out)
## [1] TRUE
protein = c("A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
protein_probabilities = as.vector(rep(0.05, 20))
for (i in 1:Num_Sequences) {
seq_length = sample(100:1000, 1)
line_header = paste("Student = ", i," | WPI Bioinformatics-Protein Exercises", sep="")
Protein_sequence = paste(sample(protein,
seq_length,
replace=TRUE,
prob=protein_probabilities),
collapse="")
write.fasta(Protein_sequence,
line_header,
file.out=paste(Sys.Date(), "-Protein-", Num_Sequences, ".fasta", sep=""),
open = "a",
nbchar = 60,
as.string = TRUE)
}