TO DO: Change ‘Num_Sequences’ = number of students
TO DO: Comment out ‘set.seed’ for random sequences
Num_Sequences = 2
#Num_Sequences = runif(1, min=2, max=10)
#This can be commented out for less definable patterns
set.seed(1000)
#install.packages("seqinr")
library(seqinr)
# Example file name: 2018-03-29-DNA-2.fasta
file_out = paste(Sys.Date(), "-DNA-", Num_Sequences, ".fasta", sep="")
file.create(file_out)
## [1] TRUE
DNA = c ("A", "T", "C", "G") #, "N", "a", "t", "c", "g", "n")
DNA_probabilities = c(0.2, 0.2, 0.2, 0.4) #, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01)
for (i in 1:Num_Sequences) {
seq_length = sample(100:1000, 1)
line_header = paste("Student = ", i," | WPI Bioinformatics-DNA Exercises", sep="")
DNA_sequence = paste(sample(DNA,
seq_length,
replace=TRUE,
prob=DNA_probabilities),
collapse = "")
write.fasta(DNA_sequence,
line_header,
file.out=paste(Sys.Date(), "-DNA-", Num_Sequences, ".fasta", sep=""),
open = "a",
nbchar = 60,
as.string = TRUE)
}