Introduction

The goal of this document is to create the plots for the GPTP paper.

Setup Code

library('ggplot2')
library('cluster')
library('Hmisc')
library("gridExtra")
library("plyr")

setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")

source('../scripts/clustering.R')

grayPalette3 <- c("#444444", "#000000", "#666666")

Import Data

Replace Space With Newline

data_rswn_lexicase = import_from_error_clustering_and_div("../data/RSWN/lexicase/clustering/")
data_rswn_tourney = import_from_error_clustering_and_div("../data/RSWN/tourney/clustering/")
data_rswn_ifs = import_from_error_clustering_and_div("../data/RSWN/ifs/clustering/")
data_rswn = rbind(data_rswn_lexicase, data_rswn_tourney, data_rswn_ifs)
data_rswn$succeeded = factor(data_rswn$succeeded, levels = c(TRUE, FALSE))
data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "tourney", "ifs"))

Syllables

data_syllables_lexicase = import_from_error_clustering_and_div("../data/syllables/lexicase/clustering/")
data_syllables_tourney = import_from_error_clustering_and_div("../data/syllables/tourney/clustering/")
data_syllables_ifs = import_from_error_clustering_and_div("../data/syllables/ifs/clustering/")
data_syllables = rbind(data_syllables_lexicase, data_syllables_tourney, data_syllables_ifs)
data_syllables$succeeded = factor(data_syllables$succeeded, levels = c(TRUE, FALSE))
data_syllables$treatment = factor(data_syllables$treatment, levels = c("lexicase", "tourney", "ifs"))

String Lengths Backwards

data_string_lengths_backwards_lexicase = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/clustering/")
data_string_lengths_backwards_tourney = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/clustering/")
data_string_lengths_backwards_ifs = import_from_error_clustering_and_div("../data/string-lengths-backwards/ifs/clustering/")
data_string_lengths_backwards = rbind(data_string_lengths_backwards_lexicase, data_string_lengths_backwards_tourney, data_string_lengths_backwards_ifs)
data_string_lengths_backwards$succeeded = factor(data_string_lengths_backwards$succeeded, levels = c(TRUE, FALSE))
data_string_lengths_backwards$treatment = factor(data_string_lengths_backwards$treatment, levels = c("lexicase", "tourney", "ifs"))

Negative To Zero

data_negative_to_zero_lexicase = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/clustering/")
data_negative_to_zero_tourney = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/clustering/")
data_negative_to_zero_ifs = import_from_error_clustering_and_div("../data/negative-to-zero/ifs/clustering/")
data_negative_to_zero = rbind(data_negative_to_zero_lexicase, data_negative_to_zero_tourney, data_negative_to_zero_ifs)
data_negative_to_zero$succeeded = factor(data_negative_to_zero$succeeded, levels = c(TRUE, FALSE))
data_negative_to_zero$treatment = factor(data_negative_to_zero$treatment, levels = c("lexicase", "tourney", "ifs"))

Doubles Letters

data_double_letters_lexicase = import_from_error_clustering_and_div("../data/double-letters/lexicase/clustering/")
data_double_letters_tourney = import_from_error_clustering_and_div("../data/double-letters/tourney/clustering/")
data_double_letters_ifs = import_from_error_clustering_and_div("../data/double-letters/ifs/clustering/")
data_double_letters = rbind(data_double_letters_lexicase, data_double_letters_tourney, data_double_letters_ifs)
data_double_letters$succeeded = factor(data_double_letters$succeeded, levels = c(TRUE, FALSE))
data_double_letters$treatment = factor(data_double_letters$treatment, levels = c("lexicase", "tourney", "ifs"))

Scrabble Score

data_scrabble_score_lexicase = import_from_error_clustering_and_div("../data/scrabble-score/lexicase/clustering/")
data_scrabble_score_tourney = import_from_error_clustering_and_div("../data/scrabble-score/tourney/clustering/")
data_scrabble_score_ifs = import_from_error_clustering_and_div("../data/scrabble-score/ifs/clustering/")
data_scrabble_score = rbind(data_scrabble_score_lexicase, data_scrabble_score_tourney, data_scrabble_score_ifs)
data_scrabble_score$succeeded = factor(data_scrabble_score$succeeded, levels = c(TRUE, FALSE))
data_scrabble_score$treatment = factor(data_scrabble_score$treatment, levels = c("lexicase", "tourney", "ifs"))

Checksum

data_checksum_lexicase = import_from_error_clustering_and_div("../data/checksum/lexicase/clustering/")
data_checksum_tourney = import_from_error_clustering_and_div("../data/checksum/tourney/clustering/")
data_checksum_ifs = import_from_error_clustering_and_div("../data/checksum/ifs/clustering/")
data_checksum = rbind(data_checksum_lexicase, data_checksum_tourney, data_checksum_ifs)
data_checksum$succeeded = factor(data_checksum$succeeded, levels = c(TRUE, FALSE))
data_checksum$treatment = factor(data_checksum$treatment, levels = c("lexicase", "tourney", "ifs"))

Count Odds

data_count_odds_lexicase = import_from_error_clustering_and_div("../data/count-odds/lexicase/clustering/")
data_count_odds_tourney = import_from_error_clustering_and_div("../data/count-odds/tourney/clustering/")
data_count_odds_ifs = import_from_error_clustering_and_div("../data/count-odds/ifs/clustering/")
data_count_odds = rbind(data_count_odds_lexicase, data_count_odds_tourney, data_count_odds_ifs)
data_count_odds$succeeded = factor(data_count_odds$succeeded, levels = c(TRUE, FALSE))
data_count_odds$treatment = factor(data_count_odds$treatment, levels = c("lexicase", "tourney", "ifs"))

Modify Some Functions for Publication Plots

shape_size <- 4

# Plots diversity medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_diversity_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = "right", legend.just = c(1,0.5)){
  p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment, fill=treatment, shape = treatment)) + #, linetype=treatment)) + 
    stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
    #facet_grid(. ~ treatment, labeller=label_both) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    coord_cartesian(ylim = c(0,1)) +
    labs(y = "Error Diversity") +
    scale_colour_manual(values = grayPalette3) +
    scale_fill_manual(values = grayPalette3) +
    theme(legend.title=element_blank(), legend.justification=c(1,0.5), legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}

# Plots clusters medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_cluster_count_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = "right", legend.just = c(1,0.5)){            

  p <- ggplot(data, aes(x=generation, y=cluster.count, color=treatment, fill=treatment, shape = treatment)) + 
    stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    labs(y = "Cluster Count") +
    scale_colour_manual(values = grayPalette3) +
    scale_fill_manual(values = grayPalette3) +
    theme(legend.title=element_blank(), legend.justification = legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}


# Makes a plot giving the number of successes at or before each generation
plot_generational_success_counts <- function(data){
  success_counts = get_generational_success_counts(data)
  
  first_treatment = levels(data$treatment)[1]
  num_runs_per_treatment = nrow(subset(data, treatment==first_treatment & generation == 0))
  
  p <- ggplot(success_counts, aes(x=generation, y=num.successes, color=treatment, shape=treatment)) +
    geom_line(size=1) +
    geom_point(data=subset(success_counts, generation %% 30 == 15), size = shape_size) +
    ylim(c(0, num_runs_per_treatment)) +
    theme_bw() +
    scale_colour_manual(values = grayPalette3) +
    labs(x = "Generation", y = "Successes") +
    theme(plot.margin = unit(c(-1, 1, 0.5, 0.5), "lines")) + # plot.margin   margin around entire plot (unit with the sizes of the top, right, bottom, and left margins) 
    theme(legend.position="none") # Removes legend
  
  # NOTE: Can type `theme_get()` to see all the properties of theme, including the default margin of c(1, 1, 0.5, 0.5)
  
  return(p)
}

Plot Medians and Quartiles

RSWN

rswn_mq <- plot_diversity_medians_and_quartiles(data_rswn, legend.pos = c(1, 0.5))

#rswn_mq

add_generational_success_counts_plot(data_rswn, rswn_mq)

## NULL
add_generational_success_counts_plot(data_rswn, plot_cluster_count_medians_and_quartiles(data_rswn, legend.pos = c(.2, 0.8)))

## NULL

Syllables

add_generational_success_counts_plot(data_syllables, plot_diversity_medians_and_quartiles(data_syllables, legend.pos = c(1, 0.2)))

## NULL
add_generational_success_counts_plot(data_syllables, plot_cluster_count_medians_and_quartiles(data_syllables, legend.pos = c(1, 0.5)))

## NULL

String Lengths Backwards

add_generational_success_counts_plot(data_string_lengths_backwards, plot_diversity_medians_and_quartiles(data_string_lengths_backwards, legend.pos = c(1, .8), legend.just = c(1, 1)))

## NULL
add_generational_success_counts_plot(data_string_lengths_backwards, plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards, legend.pos = c(0.2, 0.8)))

## NULL

Negative to Zero

add_generational_success_counts_plot(data_negative_to_zero, plot_diversity_medians_and_quartiles(data_negative_to_zero, legend.pos = c(1, 0.15)))

## NULL
add_generational_success_counts_plot(data_negative_to_zero, plot_cluster_count_medians_and_quartiles(data_negative_to_zero, legend.pos = c(0.2, 0.8)))

## NULL

Double Letters

add_generational_success_counts_plot(data_double_letters, plot_diversity_medians_and_quartiles(data_double_letters, legend.pos = c(1, 0.2)))

## NULL
add_generational_success_counts_plot(data_double_letters, plot_cluster_count_medians_and_quartiles(data_double_letters, legend.pos = c(1, 0.8)))

## NULL

Scrabble Score

add_generational_success_counts_plot(data_scrabble_score, plot_diversity_medians_and_quartiles(data_scrabble_score, legend.pos = c(1, 0.2)))

## NULL
add_generational_success_counts_plot(data_scrabble_score, plot_cluster_count_medians_and_quartiles(data_scrabble_score, legend.pos = c(0.2, 0.8)))

## NULL

Checksum

add_generational_success_counts_plot(data_checksum, plot_diversity_medians_and_quartiles(data_checksum, legend.pos = c(1, 0.2)))

## NULL
add_generational_success_counts_plot(data_checksum, plot_cluster_count_medians_and_quartiles(data_checksum, legend.pos = c(0.2, 0.8)))

## NULL

Count Odds

add_generational_success_counts_plot(data_count_odds, plot_diversity_medians_and_quartiles(data_count_odds, legend.pos = c(1, 0.2)))

## NULL
add_generational_success_counts_plot(data_count_odds, plot_cluster_count_medians_and_quartiles(data_count_odds, legend.pos = c(1, 0.36)))

## NULL