Setting things up

Let’s load up the data:

library("ggplot2")

div15run0 <- read.csv("data/RSWN/div15run0.csv")
div15run1 <- read.csv("data/RSWN/div15run1.csv")
div90run6 <- read.csv("data/RSWN/div90run6.csv")
div90run10 <- read.csv("data/RSWN/div90run10.csv")
drop25run0 <- read.csv("data/RSWN/drop25run0.csv")
drop25run8 <- read.csv("data/RSWN/drop25run8.csv")

dl_div15run0 <- read.csv("data/DoubleLetters/double_letters_div15_run0.csv")
dl_div15run1 <- read.csv("data/DoubleLetters/double_letters_div15_run1.csv")
dl_div90run0 <- read.csv("data/DoubleLetters/double_letters_div90_run0.csv")
dl_div90run3 <- read.csv("data/DoubleLetters/double_letters_div90_run3.csv")
dl_drop25run0 <- read.csv("data/DoubleLetters/double_letters_drop_25_run0.csv")
dl_drop25run21 <- read.csv("data/DoubleLetters/double_letters_drop_25_run21.csv")

Now create a new selection column to make it easier to organize the graphs. The code below is stupid ugly and depressingly repetitious. I tried writing a loop to factor out that repetition but it didn’t work (the new rows got attached to the local variable instead of the “real” data frame), so I wimped out and copy/pasted like made. Sorry.

#for (d in list(div15run0, div15run1, div90run6, div90run10, drop25run0, drop25run8)) {
#  d$selection <- sapply(strsplit(as.character(d$method), "-"), function(l) l[[3]])
#  print(names(d))
#}

div15run0$selection <- sapply(strsplit(as.character(div15run0$method), "-"), function(l) l[[3]])
div15run1$selection <- sapply(strsplit(as.character(div15run1$method), "-"), function(l) l[[3]])
div90run6$selection <- sapply(strsplit(as.character(div90run6$method), "-"), function(l) l[[3]])
div90run10$selection <- sapply(strsplit(as.character(div90run10$method), "-"), function(l) l[[3]])
drop25run0$selection <- sapply(strsplit(as.character(drop25run0$method), "-"), function(l) l[[3]])
drop25run8$selection <- sapply(strsplit(as.character(drop25run8$method), "-"), function(l) l[[3]])

dl_div15run0$selection <- sapply(strsplit(as.character(dl_div15run0$method), "-"), function(l) l[[3]])
dl_div15run1$selection <- sapply(strsplit(as.character(dl_div15run1$method), "-"), function(l) l[[3]])
dl_div90run0$selection <- sapply(strsplit(as.character(dl_div90run0$method), "-"), function(l) l[[3]])
dl_div90run3$selection <- sapply(strsplit(as.character(dl_div90run3$method), "-"), function(l) l[[3]])
dl_drop25run0$selection <- sapply(strsplit(as.character(dl_drop25run0$method), "-"), function(l) l[[3]])
dl_drop25run21$selection <- sapply(strsplit(as.character(dl_drop25run21$method), "-"), function(l) l[[3]])

div15run0$run <- sapply(strsplit(as.character(div15run0$method), "-"), function(l) l[[2]])
div15run1$run <- sapply(strsplit(as.character(div15run1$method), "-"), function(l) l[[2]])
div90run6$run <- sapply(strsplit(as.character(div90run6$method), "-"), function(l) l[[2]])
div90run10$run <- sapply(strsplit(as.character(div90run10$method), "-"), function(l) l[[2]])
drop25run0$run <- sapply(strsplit(as.character(drop25run0$method), "-"), function(l) l[[2]])
drop25run8$run <- sapply(strsplit(as.character(drop25run8$method), "-"), function(l) l[[2]])

dl_div15run0$run <- sapply(strsplit(as.character(dl_div15run0$method), "-"), function(l) l[[2]])
dl_div15run1$run <- sapply(strsplit(as.character(dl_div15run1$method), "-"), function(l) l[[2]])
dl_div90run0$run <- sapply(strsplit(as.character(dl_div90run0$method), "-"), function(l) l[[2]])
dl_div90run3$run <- sapply(strsplit(as.character(dl_div90run3$method), "-"), function(l) l[[2]])
dl_drop25run0$run <- sapply(strsplit(as.character(dl_drop25run0$method), "-"), function(l) l[[2]])
dl_drop25run21$run <- sapply(strsplit(as.character(dl_drop25run21$method), "-"), function(l) l[[2]])

div15run0$start <- sapply(strsplit(as.character(div15run0$method), "-"), function(l) l[[1]])
div15run1$start <- sapply(strsplit(as.character(div15run1$method), "-"), function(l) l[[1]])
div90run6$start <- sapply(strsplit(as.character(div90run6$method), "-"), function(l) l[[1]])
div90run10$start <- sapply(strsplit(as.character(div90run10$method), "-"), function(l) l[[1]])
drop25run0$start <- sapply(strsplit(as.character(drop25run0$method), "-"), function(l) l[[1]])
drop25run8$start <- sapply(strsplit(as.character(drop25run8$method), "-"), function(l) l[[1]])

dl_div15run0$start <- sapply(strsplit(as.character(dl_div15run0$method), "-"), function(l) l[[1]])
dl_div15run1$start <- sapply(strsplit(as.character(dl_div15run1$method), "-"), function(l) l[[1]])
dl_div90run0$start <- sapply(strsplit(as.character(dl_div90run0$method), "-"), function(l) l[[1]])
dl_div90run3$start <- sapply(strsplit(as.character(dl_div90run3$method), "-"), function(l) l[[1]])
dl_drop25run0$start <- sapply(strsplit(as.character(dl_drop25run0$method), "-"), function(l) l[[1]])
dl_drop25run21$start <- sapply(strsplit(as.character(dl_drop25run21$method), "-"), function(l) l[[1]])

Plotting

This little guy sets changes the default color of outlier dots from black to whatever color is being used fo rthe boxplot. I also set up a colorblind friendly palette for use later.

update_geom_defaults("point", list(colour = NULL, alpha=0.05))

cbbPalette <- c("#E69F00", "#56B4E9")
cbbPalette <- c("#E69F00", "#000000")

Replace-space-with-newline

These are runs starting with a point in a tournament selection run where diversity was fairly low.

div15 = rbind(div15run0, div15run1)


ggplot(div15, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "RSWN starting with low diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(div15, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "RSWN starting with low diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom", legend.direction = "horizontal")

Now starting with a point in a lexicase selection run where the diversity was nice and high.

div90 = rbind(div90run6, div90run10)

ggplot(div90, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "RSWN starting with high diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(div90, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "RSWN starting with high diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom")

Lastly, starting with a point after a dive in diversity in a lexicase selection run, presumably due to a hyperselection event.

drop25 = rbind(drop25run0, drop25run8)

ggplot(drop25, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "RSWN after diversity crash", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(drop25, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "RSWN after diversity crash", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom")

Double letters

These are runs starting with a point in a tournament selection run where diversity was fairly low.

dl_div15 = rbind(dl_div15run0, dl_div15run1)

ggplot(dl_div15, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "double-letters starting with low diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(dl_div15, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "double-letters starting with low diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom")

Now starting with a point in a lexicase selection run where the diversity was nice and high.

dl_div90 = rbind(dl_div90run0, dl_div90run3)

ggplot(dl_div90, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "double-letters starting with high diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(dl_div90, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "double-letters starting with high diversity", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom")

Lastly, starting with a point after a dive in diversity in a lexicase selection run, presumably due to a hyperselection event.

dl_drop25 = rbind(dl_drop25run0, dl_drop25run21)

ggplot(dl_drop25, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=1) + 
  ylim(0, 1) +
  labs(title = "double-letters after diversity crash", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  theme_bw() + 
  theme(legend.position = "bottom")

ggplot(dl_drop25, aes(x=generation, y=error_vector_diversity, color=selection)) + 
  geom_line(aes(group=interaction(trial, method), linetype=selection), alpha=0.15, show_guide=FALSE) + 
  geom_boxplot(aes(group=interaction(generation, selection)), outlier.size=0) + 
  ylim(0, 1) + 
  labs(title = "double-letters after diversity crash", x="Generation", y="Error diversity", color="Selection") +
  facet_grid(. ~ run) +
  scale_colour_manual(values = cbbPalette) +
  theme_bw() + 
  theme(legend.position = "bottom")