Evolution Simulations One

Author

Dr Andrew Dalby

I wanted to create some simple simulations of sequence evolution to help students to understand sequence alignments and why evolution does not actually follow this simple model and also why DNA sequences are not random and should not be randomised completely to create random models.

The first simulation creates a random sequence of 126 bases (already not realistic) and then changes one base in each generation at random for 100 generations.

library('stringr')
set.seed(1234)
seq1 <- sample(c("A","C","T","G"),126, replace=TRUE)
generations1 <- matrix(1:12600, nrow=100, ncol=126)

for (i in 1:100){
  j <- sample(1:126,1)
  m <- sample(c("A","C","T","G"),1)
  seq1[j] <- m
  generations1[i,] <- seq1 
}

initial <- gsub(","," ",toString(generations1[1,])) 
final <- gsub(","," ",toString(generations1[100,]))

sequence <- rbind(initial,final)
df <- data.frame(sequence)
df
                                                                                                                                                                                                                                                                                                                                                                                        sequence
initial G  G  C  C  A  G  T  A  A  C  G  G  C  T  C  C  C  T  C  G  C  C  G  C  G  G  A  G  G  G  T  G  T  T  A  C  A  C  C  T  G  T  G  G  G  T  T  A  T  C  G  G  G  C  T  C  A  T  C  A  C  G  T  C  A  A  G  A  A  G  C  A  T  T  G  C  G  T  C  T  A  G  T  C  A  C  T  C  G  A  A  T  T  C  A  G  C  T  C  G  G  C  T  T  A  G  T  C  T  A  T  G  G  G  G  A  G  T  G  T  A  C  C  T  G  A
final   G  A  G  C  A  T  T  C  T  C  G  A  C  T  A  C  C  T  C  A  A  A  T  C  T  G  A  G  G  C  T  A  T  T  A  T  A  A  C  T  T  A  T  A  G  G  C  A  T  C  G  G  C  C  T  C  G  C  C  G  C  G  C  C  C  G  G  G  A  T  C  G  G  T  G  A  T  T  C  T  T  C  C  T  A  C  T  C  G  G  A  G  T  T  A  A  G  A  A  T  C  A  T  C  A  G  T  C  T  A  T  T  A  G  A  G  A  T  T  T  A  T  G  T  G  C
write.csv(file="seq1.csv",generations1)

I then altered the code to run for more generations only sampling every 100 generations.

set.seed(1234)
seq1 <- sample(c("A","C","T","G"),126, replace=TRUE)
generations2 <- matrix(1:12600, nrow=100, ncol=126)

# Make a second loop for sampling the output every 100 generations
for (i in 1:100){
for (j in 1:10){
  k <- sample(1:126,1)
  m <- sample(c("A","C","T","G"),1)
  seq1[k] <- m
}
  generations2[i,] <- seq1
}

initial <- gsub(","," ",toString(generations2[1,])) 
final <- gsub(","," ",toString(generations2[100,]))

sequence <- rbind(initial,final)
df <- data.frame(sequence)
df
                                                                                                                                                                                                                                                                                                                                                                                        sequence
initial G  G  C  C  A  G  T  A  A  C  G  G  C  T  C  C  C  T  C  G  C  C  G  C  G  G  A  G  G  G  T  A  T  T  A  C  A  C  C  T  G  T  G  G  G  T  T  A  T  C  G  G  G  C  T  C  A  C  C  A  C  G  T  C  A  A  G  A  A  G  C  A  T  T  G  C  G  T  C  T  A  G  T  C  A  C  T  C  G  A  A  T  T  C  A  G  C  T  C  T  G  C  T  T  A  G  T  C  T  A  T  G  G  G  G  A  G  T  G  T  A  C  C  T  G  C
final   C  A  T  T  G  G  G  C  T  A  T  G  C  G  G  G  C  G  A  G  G  G  G  A  A  G  C  C  G  C  C  T  G  G  C  A  A  T  T  T  G  G  A  C  A  G  G  C  A  G  G  C  A  C  G  A  G  G  T  G  G  G  C  A  A  T  A  A  A  T  C  T  T  C  G  C  G  C  A  G  G  G  T  A  T  T  A  C  T  G  G  G  T  C  G  A  G  T  G  T  A  C  A  G  C  C  A  T  T  G  C  C  G  A  C  C  A  G  G  G  A  C  G  C  A  C
write.csv(file="seq2.csv", generations2)

Now it is much harder to see the patterns in the two sequences. The sequences are still the same lengths and I have not added bases or removed any by insertion or deletion, which makes it even harder.