#################################
# OneMax with neutral evolution
#################################
## set constants
genome.size <- 20; # length of genome (fit string)
pop.size <- 100; # population size
gamete.per.ind <- 10; # number of gametes
num.gen <- 100; # generations
mu <- 1/genome.size; # mutation rate
## create initial population
pop <- lapply(1:pop.size, function(x){sample(x = c(0,1), prob = c(0.5,0.5), replace = T, size = 20)})
fit <- function(x) {sum(x)}

## for output
out.neutral <- data.frame(gen = integer(), min = integer(), max = integer(), mean = numeric(), div.all=numeric(), div.hap=numeric());
## mutation function (broken stick algorithm)
mutated <- function(ind, mu) {
  for(i in 1:length(ind)) {
    to.mut <- runif(1);    # draw a random number from 0 to 1
    ind[i] <- ifelse(to.mut <= mu, ifelse(ind[i]==0, 1, 0), ind[i]); # mutate only if less than this number
  }
  return(ind);
}

## heterozygosity for haplotypes
hz.hap <- function(pop) {
  hz <- 0;
  haps <- sapply(pop, function(x) paste(x, collapse =""));
  hap.cts <- table(haps);
  hap.freq <- hap.cts/pop.size;
  for(k in 1:length(hap.freq)) {
    hz <- hz + hap.freq[k]^2;
  }
  return(1-hz)
}

## heterozygosity for alleles
hz.allele <- function(pop) {
  hz <- 0;
  for(k in 1:genome.size) {
    cts <- sapply(pop, function(x) x[k]);
    cts.tab <- table(cts);
    frq.tab <- cts.tab/pop.size;
    hz <- hz + (1-frq.tab[1]^2-frq.tab[2]^2);
  }
  return(hz/genome.size)
}

## crossover function
xover <- function(parent1, parent2) {
  pos <- sample(1:(genome.size-1), 1); # random breakpoint
  child1 <- c(parent1[1:pos], parent2[(pos+1):genome.size]);
  child2 <- c(parent2[1:pos], parent1[(pos+1):genome.size]);
  return(list(child1, child2));
}

## evolve
for (i in 1:num.gen) {
  fit <- sapply(pop, sum)/genome.size; # fitness
  out.neutral <- rbind(out.neutral, c(gen=i, min=min(fit), max=max(fit), mean=mean(fit), div.all=hz.allele(pop), div.hap=hz.hap(pop)));
  gamete.pool <- lapply(pop, function(x) { # for each ind
    gametes <- lapply(1:gamete.per.ind, function(n) {mutated(x, mu)}); # for each mutated gamete
   return(gametes); 
});
  pop <- sample(unlist(gamete.pool, recursive = F), pop.size); # sample to create next generation
}

plot(out.neutral[,1], out.neutral[,4], ylim=c(0,1), las=1, type = "l", xlab="generation", ylab="fitness/heterozygosity", main="neutral")
lines(out.neutral[,1], out.neutral[,5], type="l", col=2)
lines(out.neutral[,1], out.neutral[,6], type="l", col=3)
abline(h=0.5, lty=2)

#################################
# OneMax with selection: proportional scheme
#################################
pop <- lapply(1:pop.size, function(x){sample(x = c(0,1), prob = c(0.5,0.5), replace = T, size = genome.size)})
## for output
out.sel <- data.frame(gen = integer(), min = integer(), max = integer(), mean = numeric(), div.all=numeric(), div.hap=numeric());

for (i in 1:num.gen) {
  fit <- sapply(pop, sum)/genome.size; # fitness
  out.sel <- rbind(out.sel, c(gen=i, min=min(fit), max=max(fit), mean=mean(fit), div.all=hz.allele(pop), div.hap=hz.hap(pop)));
  gamete.pool <- lapply(pop, function(x) { # for each ind
    gametes <- lapply(1:length(which(x==1)), function(n) {mutated(x, mu)}); # for each mutated gamete (numb of gametes proportional to number of 1's)
    return(gametes); 
  });
  pop <- sample(unlist(gamete.pool, recursive = F), pop.size); # sample to create next generation
}

plot(out.sel[,1], out.sel[,4], ylim=c(0,1), las=1, type = "l", xlab="generation", ylab="fitness/heterozygosity", main="selection: proportional")
lines(out.sel[,1], out.sel[,5], type="l", col=2)
lines(out.sel[,1], out.sel[,6], type="l", col=3)
abline(h=0.5, lty=2)

#################################
# OneMax with selection: Tournament scheme
#################################
tour.size <-3;
tour.num <- 100;
pop <- lapply(1:pop.size, function(x){sample(x = c(0,1), prob = c(0.5,0.5), replace = T, size = genome.size)})
## for output
out.tour <- data.frame(gen = integer(), min = integer(), max = integer(), mean = numeric(),div.all=numeric(), div.hap=numeric());

for (i in 1:num.gen) {
  fit <- sapply(pop, sum)/genome.size; # fitness
  out.tour <- rbind(out.tour, c(gen=i, min=min(fit), max=max(fit), mean=mean(fit), div.all=hz.allele(pop), div.hap=hz.hap(pop)));
  gamete.pool <- lapply(1:tour.num, function(k) {
  tour <- sample(pop, tour.size);
  fit.tour <- sapply(tour, sum);
  winner.index <- order(fit.tour)[tour.size];
  winner <- tour[[winner.index]];
  gametes <- lapply(1:gamete.per.ind, function(n) {mutated(winner, mu)}); # for each mutated gamete
  return(gametes); 
  });
  pop <- sample(unlist(gamete.pool, recursive = F), pop.size); # sample to create next generation
}

plot(out.tour[,1], out.tour[,4], ylim=c(0,1), las=1, type = "l", xlab="generation", ylab="fitness/heterozygosity", main="selection by tournament")
lines(out.tour[,1], out.tour[,5], type="l", col=2)
lines(out.tour[,1], out.tour[,6], type="l", col=3)
abline(h=0.5, lty=2)

#################################
# OneMax with selection & crossover (to maintain diversity)
#################################
tour.size <-3;
tour.num <- 100;
pop <- lapply(1:pop.size, function(x){sample(x = c(0,1), prob = c(0.5,0.5), replace = T, size = genome.size)})
## for output
out.cross <- data.frame(gen = integer(), min = integer(), max = integer(), mean = numeric(), div.all=numeric(), div.hap=numeric());

for (i in 1:num.gen) {
  pop.cross <- lapply(1:(pop.size/2), function(n) {
    pa <- sample(pop, 2, replace = F);
    xover(pa[[1]], pa[[2]]);
  });

  pop.cross <- unlist(pop.cross, recursive = F);  
  fit <- sapply(pop.cross, sum)/genome.size; # fitness
  
  out.cross <- rbind(out.cross, c(gen=i, min=min(fit), max=max(fit), mean=mean(fit), div.all=hz.allele(pop.cross), div.hap=hz.hap(pop.cross)));
  gamete.pool <- lapply(1:tour.num, function(k) {
    tour <- sample(pop.cross, tour.size);
    fit.tour <- sapply(tour, sum);
    winner.index <- order(fit.tour)[tour.size];
    winner <- tour[[winner.index]];
    gametes <- lapply(1:gamete.per.ind, function(n) {mutated(winner, mu)}); # for each mutated gamete
    return(gametes); 
  });
  pop <- sample(unlist(gamete.pool, recursive = F), pop.size); # sample to create next generation
}

plot(out.cross[,1], out.cross[,4], ylim=c(0,1), las=1, type = "l", xlab="generation", ylab="fitness/heterozygosity", main="selection by tournament & crossover")
lines(out.cross[,1], out.cross[,5], type="l", col=2)
lines(out.cross[,1], out.cross[,6], type="l", col=3)
abline(h=0.5, lty=2)

#############################
# Using GA Package
#############################
library(GA)
## Loading required package: foreach
## Loading required package: iterators
## Package 'GA' version 3.1.1
## Type 'citation("GA")' for citing this R package in publications.
ind <- sample(x = c(0,1), prob = c(0.5,0.5), replace = T, size = 20);
fit <- function(x) {sum(x)/length(x)}
ga.onemax <- ga("binary", fitness = fit, nBits = length(ind))
plot(ga.onemax)

summary(ga.onemax)
## ── Genetic Algorithm ─────────────────── 
## 
## GA settings: 
## Type                  =  binary 
## Population size       =  50 
## Number of generations =  100 
## Elitism               =  2 
## Crossover probability =  0.8 
## Mutation probability  =  0.1 
## 
## GA results: 
## Iterations             = 100 
## Fitness function value = 1 
## Solution = 
##      x1 x2 x3 x4 x5 x6 x7 x8 x9 x10  ...  x19 x20
## [1,]  1  1  1  1  1  1  1  1  1   1         1   1