Introduction

The goal of this document is to look at the diversity data from the elitist survival runs.

Setup Code

library('ggplot2')
library('cluster')
library('Hmisc')
library("gridExtra")
library("plyr")

setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")

source('../scripts/clustering.R')

Import Data

Replace Space With Newline

data_rswn_lexicase_es25 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_25/")
data_rswn_lexicase_es50 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_50/")
data_rswn_lexicase_es75 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_75/")
data_rswn_lexicase_es100 = import_from_error_clustering_and_div("../data/RSWN/lexicase/clustering/")
data_rswn_tourney_es25 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_25/")
data_rswn_tourney_es50 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_50/")
data_rswn_tourney_es75 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_75/")
data_rswn_tourney_es100 = import_from_error_clustering_and_div("../data/RSWN/tourney/clustering/")

data_rswn = rbind(data_rswn_lexicase_es100, data_rswn_lexicase_es75, data_rswn_lexicase_es50, data_rswn_lexicase_es25, data_rswn_tourney_es100, data_rswn_tourney_es75, data_rswn_tourney_es50, data_rswn_tourney_es25)
data_rswn$succeeded = factor(data_rswn$succeeded, levels = c(TRUE, FALSE))
data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Syllables

data_syllables_lexicase_es25 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_25/")
data_syllables_lexicase_es50 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_50/")
data_syllables_lexicase_es75 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_75/")
data_syllables_lexicase_es100 = import_from_error_clustering_and_div("../data/syllables/lexicase/clustering/")
data_syllables_tourney_es25 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_25/")
data_syllables_tourney_es50 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_50/")
data_syllables_tourney_es75 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_75/")
data_syllables_tourney_es100 = import_from_error_clustering_and_div("../data/syllables/tourney/clustering/")

data_syllables = rbind(data_syllables_lexicase_es100, data_syllables_lexicase_es75, data_syllables_lexicase_es50, data_syllables_lexicase_es25, data_syllables_tourney_es100, data_syllables_tourney_es75, data_syllables_tourney_es50, data_syllables_tourney_es25)
data_syllables$succeeded = factor(data_syllables$succeeded, levels = c(TRUE, FALSE))
data_syllables$treatment = factor(data_syllables$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

String Lengths Backwards

data_string_lengths_backwards_lexicase_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_25/")
data_string_lengths_backwards_lexicase_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_50/")
data_string_lengths_backwards_lexicase_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_75/")
data_string_lengths_backwards_lexicase_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/clustering/")
data_string_lengths_backwards_tourney_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_25/")
data_string_lengths_backwards_tourney_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_50/")
data_string_lengths_backwards_tourney_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_75/")
data_string_lengths_backwards_tourney_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/clustering/")

data_string_lengths_backwards = rbind(data_string_lengths_backwards_lexicase_es100, data_string_lengths_backwards_lexicase_es75, data_string_lengths_backwards_lexicase_es50, data_string_lengths_backwards_lexicase_es25, data_string_lengths_backwards_tourney_es100, data_string_lengths_backwards_tourney_es75, data_string_lengths_backwards_tourney_es50, data_string_lengths_backwards_tourney_es25)
data_string_lengths_backwards$succeeded = factor(data_string_lengths_backwards$succeeded, levels = c(TRUE, FALSE))
data_string_lengths_backwards$treatment = factor(data_string_lengths_backwards$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Vector Average

data_vector_average_lexicase_es25 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_25/")
data_vector_average_lexicase_es50 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_50/")
data_vector_average_lexicase_es75 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_75/")
data_vector_average_lexicase_es100 = import_from_error_clustering_and_div("../data/vector-average/lexicase/clustering/")
data_vector_average_tourney_es25 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_25/")
data_vector_average_tourney_es50 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_50/")
data_vector_average_tourney_es75 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_75/")
data_vector_average_tourney_es100 = import_from_error_clustering_and_div("../data/vector-average/tourney/clustering/")

data_vector_average = rbind(data_vector_average_lexicase_es100, data_vector_average_lexicase_es75, data_vector_average_lexicase_es50, data_vector_average_lexicase_es25, data_vector_average_tourney_es100, data_vector_average_tourney_es75, data_vector_average_tourney_es50, data_vector_average_tourney_es25)
data_vector_average$succeeded = factor(data_vector_average$succeeded, levels = c(TRUE, FALSE))
data_vector_average$treatment = factor(data_vector_average$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Negative To Zero

data_negative_to_zero_lexicase_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_25/")
data_negative_to_zero_lexicase_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_50/")
data_negative_to_zero_lexicase_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_75/")
data_negative_to_zero_lexicase_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/clustering/")
data_negative_to_zero_tourney_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_25/")
data_negative_to_zero_tourney_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_50/")
data_negative_to_zero_tourney_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_75/")
data_negative_to_zero_tourney_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/clustering/")

data_negative_to_zero = rbind(data_negative_to_zero_lexicase_es100, data_negative_to_zero_lexicase_es75, data_negative_to_zero_lexicase_es50, data_negative_to_zero_lexicase_es25, data_negative_to_zero_tourney_es100, data_negative_to_zero_tourney_es75, data_negative_to_zero_tourney_es50, data_negative_to_zero_tourney_es25)
data_negative_to_zero$succeeded = factor(data_negative_to_zero$succeeded, levels = c(TRUE, FALSE))
data_negative_to_zero$treatment = factor(data_negative_to_zero$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Doubles Letters

data_double_letters_lexicase_es25 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_25/")
data_double_letters_lexicase_es50 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_50/")
data_double_letters_lexicase_es75 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_75/")
data_double_letters_lexicase_es100 = import_from_error_clustering_and_div("../data/double-letters/lexicase/clustering/")
data_double_letters_tourney_es25 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_25/")
data_double_letters_tourney_es50 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_50/")
data_double_letters_tourney_es75 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_75/")
data_double_letters_tourney_es100 = import_from_error_clustering_and_div("../data/double-letters/tourney/clustering/")

data_double_letters = rbind(data_double_letters_lexicase_es100, data_double_letters_lexicase_es75, data_double_letters_lexicase_es50, data_double_letters_lexicase_es25, data_double_letters_tourney_es100, data_double_letters_tourney_es75, data_double_letters_tourney_es50, data_double_letters_tourney_es25)
data_double_letters$succeeded = factor(data_double_letters$succeeded, levels = c(TRUE, FALSE))
data_double_letters$treatment = factor(data_double_letters$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Count Odds

data_count_odds_lexicase_es25 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_25/")
data_count_odds_lexicase_es50 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_50/")
data_count_odds_lexicase_es75 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_75/")
data_count_odds_lexicase_es100 = import_from_error_clustering_and_div("../data/count-odds/lexicase/clustering/")
data_count_odds_tourney_es25 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_25/")
data_count_odds_tourney_es50 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_50/")
data_count_odds_tourney_es75 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_75/")
data_count_odds_tourney_es100 = import_from_error_clustering_and_div("../data/count-odds/tourney/clustering/")

data_count_odds = rbind(data_count_odds_lexicase_es100, data_count_odds_lexicase_es75, data_count_odds_lexicase_es50, data_count_odds_lexicase_es25, data_count_odds_tourney_es100, data_count_odds_tourney_es75, data_count_odds_tourney_es50, data_count_odds_tourney_es25)
data_count_odds$succeeded = factor(data_count_odds$succeeded, levels = c(TRUE, FALSE))
data_count_odds$treatment = factor(data_count_odds$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Modify Some Functions for Publication Plots

shape_size <- 4

# Plots diversity medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_diversity_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){
  p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment, fill=treatment, shape = treatment)) + #, linetype=treatment)) + 
    #stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
    stat_summary(fun.y="median", geom = "line", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
    #facet_grid(. ~ treatment, labeller=label_both) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    coord_cartesian(ylim = c(0,1)) +
    labs(y = "Error Diversity") +
    #scale_fill(guide = 'none')+
    theme(legend.title=element_blank(), legend.justification=legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}

# Plots clusters medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_cluster_count_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){            

  p <- ggplot(data, aes(x=generation, y=cluster.count, color=treatment, fill=treatment, shape = treatment)) + 
    #stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) +
    stat_summary(fun.y="median", geom = "line", size=1) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    labs(y = "Cluster Count") +
    theme(legend.title=element_blank(), legend.justification = legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}


# Makes a plot giving the number of successes at or before each generation
plot_generational_success_counts <- function(data){
  success_counts = get_generational_success_counts(data)
  
  first_treatment = levels(data$treatment)[1]
  num_runs_per_treatment = nrow(subset(data, treatment==first_treatment & generation == 0))
  
  p <- ggplot(success_counts, aes(x=generation, y=num.successes, color=treatment, shape=treatment, fill=treatment)) +
    geom_line(size=1) +
    geom_point(data=subset(success_counts, generation %% 30 == 15), size = shape_size) +
    ylim(c(0, num_runs_per_treatment)) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette) +
    scale_fill_manual(values = cbbPalette) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15)) +
    labs(x = "Generation", y = "Successes") +
    theme(plot.margin = unit(c(-1, 1, 0.5, 0.5), "lines")) + # plot.margin   margin around entire plot (unit with the sizes of the top, right, bottom, and left margins) 
    theme(legend.position="none") # Removes legend
  
  # NOTE: Can type `theme_get()` to see all the properties of theme, including the default margin of c(1, 1, 0.5, 0.5)
  
  return(p)
}

Plot Medians and Quartiles

RSWN

add_generational_success_counts_plot(data_rswn, plot_diversity_medians_and_quartiles(data_rswn, legend.pos = c(1, 0.5), legend.just = c(1, 0.5)))

## NULL
add_generational_success_counts_plot(data_rswn, plot_cluster_count_medians_and_quartiles(data_rswn))#, legend.pos = c(1, 0.3)))

## NULL
#plot_cluster_count_medians_and_quartiles(data_rswn) + facet_grid(succeeded ~ ., labeller=label_both)

Syllables

add_generational_success_counts_plot(data_syllables, plot_diversity_medians_and_quartiles(data_syllables, legend.pos = c(1, 0)))

## NULL
add_generational_success_counts_plot(data_syllables, plot_cluster_count_medians_and_quartiles(data_syllables))

## NULL

String Lengths Backwards

add_generational_success_counts_plot(data_string_lengths_backwards, plot_diversity_medians_and_quartiles(data_string_lengths_backwards, legend.pos = c(1, 1), legend.just = c(1, 1)))

## NULL
#plot_diversity_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

add_generational_success_counts_plot(data_string_lengths_backwards, plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards))

## NULL
#plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

Vector Average

add_generational_success_counts_plot(data_vector_average, plot_diversity_medians_and_quartiles(data_vector_average))

## NULL
add_generational_success_counts_plot(data_vector_average, plot_cluster_count_medians_and_quartiles(data_vector_average))

## NULL

Negative To Zero

add_generational_success_counts_plot(data_negative_to_zero, plot_diversity_medians_and_quartiles(data_negative_to_zero))

## NULL
add_generational_success_counts_plot(data_negative_to_zero, plot_cluster_count_medians_and_quartiles(data_negative_to_zero))

## NULL
#plot_cluster_count_medians_and_quartiles(data_negative_to_zero) + facet_grid(succeeded ~ ., labeller=label_both)

Double Letters

add_generational_success_counts_plot(data_double_letters, plot_diversity_medians_and_quartiles(data_double_letters))

## NULL
add_generational_success_counts_plot(data_double_letters, plot_cluster_count_medians_and_quartiles(data_double_letters))

## NULL
#plot_cluster_count_medians_and_quartiles(data_double_letters) + facet_grid(succeeded ~ ., labeller=label_both)

Count Odds

add_generational_success_counts_plot(data_count_odds, plot_diversity_medians_and_quartiles(data_count_odds))

## NULL
add_generational_success_counts_plot(data_count_odds, plot_cluster_count_medians_and_quartiles(data_count_odds))

## NULL