Introduction

The goal of this document is to look at the diversity data from the elitist survival runs.

Setup Code

library('ggplot2')
library('cluster')
library('Hmisc')
library("gridExtra")
library("plyr")

setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")

source('../scripts/clustering.R')

Import Data

Replace Space With Newline

data_rswn_lexicase_es25 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_25/")
data_rswn_lexicase_es50 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_50/")
data_rswn_lexicase_es75 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_75/")
data_rswn_lexicase_es100 = import_from_error_clustering_and_div("../data/RSWN/lexicase/clustering/")
data_rswn_tourney_es25 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_25/")
data_rswn_tourney_es50 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_50/")
data_rswn_tourney_es75 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_75/")
data_rswn_tourney_es100 = import_from_error_clustering_and_div("../data/RSWN/tourney/clustering/")

data_rswn = rbind(data_rswn_lexicase_es100, data_rswn_lexicase_es75, data_rswn_lexicase_es50, data_rswn_lexicase_es25, data_rswn_tourney_es100, data_rswn_tourney_es75, data_rswn_tourney_es50, data_rswn_tourney_es25)
data_rswn$succeeded = factor(data_rswn$succeeded, levels = c(TRUE, FALSE))
data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Syllables

data_syllables_lexicase_es25 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_25/")
data_syllables_lexicase_es50 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_50/")
data_syllables_lexicase_es75 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_75/")
data_syllables_lexicase_es100 = import_from_error_clustering_and_div("../data/syllables/lexicase/clustering/")
data_syllables_tourney_es25 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_25/")
data_syllables_tourney_es50 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_50/")
data_syllables_tourney_es75 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_75/")
data_syllables_tourney_es100 = import_from_error_clustering_and_div("../data/syllables/tourney/clustering/")

data_syllables = rbind(data_syllables_lexicase_es100, data_syllables_lexicase_es75, data_syllables_lexicase_es50, data_syllables_lexicase_es25, data_syllables_tourney_es100, data_syllables_tourney_es75, data_syllables_tourney_es50, data_syllables_tourney_es25)
data_syllables$succeeded = factor(data_syllables$succeeded, levels = c(TRUE, FALSE))
data_syllables$treatment = factor(data_syllables$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

String Lengths Backwards

data_string_lengths_backwards_lexicase_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_25/")
data_string_lengths_backwards_lexicase_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_50/")
data_string_lengths_backwards_lexicase_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_75/")
data_string_lengths_backwards_lexicase_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/clustering/")
data_string_lengths_backwards_tourney_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_25/")
data_string_lengths_backwards_tourney_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_50/")
data_string_lengths_backwards_tourney_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_75/")
data_string_lengths_backwards_tourney_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/clustering/")

data_string_lengths_backwards = rbind(data_string_lengths_backwards_lexicase_es100, data_string_lengths_backwards_lexicase_es75, data_string_lengths_backwards_lexicase_es50, data_string_lengths_backwards_lexicase_es25, data_string_lengths_backwards_tourney_es100, data_string_lengths_backwards_tourney_es75, data_string_lengths_backwards_tourney_es50, data_string_lengths_backwards_tourney_es25)
data_string_lengths_backwards$succeeded = factor(data_string_lengths_backwards$succeeded, levels = c(TRUE, FALSE))
data_string_lengths_backwards$treatment = factor(data_string_lengths_backwards$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Vector Average

data_vector_average_lexicase_es25 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_25/")
data_vector_average_lexicase_es50 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_50/")
data_vector_average_lexicase_es75 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_75/")
data_vector_average_lexicase_es100 = import_from_error_clustering_and_div("../data/vector-average/lexicase/clustering/")
data_vector_average_tourney_es25 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_25/")
data_vector_average_tourney_es50 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_50/")
data_vector_average_tourney_es75 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_75/")
data_vector_average_tourney_es100 = import_from_error_clustering_and_div("../data/vector-average/tourney/clustering/")

data_vector_average = rbind(data_vector_average_lexicase_es100, data_vector_average_lexicase_es75, data_vector_average_lexicase_es50, data_vector_average_lexicase_es25, data_vector_average_tourney_es100, data_vector_average_tourney_es75, data_vector_average_tourney_es50, data_vector_average_tourney_es25)
data_vector_average$succeeded = factor(data_vector_average$succeeded, levels = c(TRUE, FALSE))
data_vector_average$treatment = factor(data_vector_average$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Negative To Zero

data_negative_to_zero_lexicase_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_25/")
data_negative_to_zero_lexicase_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_50/")
data_negative_to_zero_lexicase_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_75/")
data_negative_to_zero_lexicase_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/clustering/")
data_negative_to_zero_tourney_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_25/")
data_negative_to_zero_tourney_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_50/")
data_negative_to_zero_tourney_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_75/")
data_negative_to_zero_tourney_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/clustering/")

data_negative_to_zero = rbind(data_negative_to_zero_lexicase_es100, data_negative_to_zero_lexicase_es75, data_negative_to_zero_lexicase_es50, data_negative_to_zero_lexicase_es25, data_negative_to_zero_tourney_es100, data_negative_to_zero_tourney_es75, data_negative_to_zero_tourney_es50, data_negative_to_zero_tourney_es25)
data_negative_to_zero$succeeded = factor(data_negative_to_zero$succeeded, levels = c(TRUE, FALSE))
data_negative_to_zero$treatment = factor(data_negative_to_zero$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Doubles Letters

data_double_letters_lexicase_es25 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_25/")
data_double_letters_lexicase_es50 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_50/")
data_double_letters_lexicase_es75 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_75/")
data_double_letters_lexicase_es100 = import_from_error_clustering_and_div("../data/double-letters/lexicase/clustering/")
data_double_letters_tourney_es25 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_25/")
data_double_letters_tourney_es50 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_50/")
data_double_letters_tourney_es75 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_75/")
data_double_letters_tourney_es100 = import_from_error_clustering_and_div("../data/double-letters/tourney/clustering/")

data_double_letters = rbind(data_double_letters_lexicase_es100, data_double_letters_lexicase_es75, data_double_letters_lexicase_es50, data_double_letters_lexicase_es25, data_double_letters_tourney_es100, data_double_letters_tourney_es75, data_double_letters_tourney_es50, data_double_letters_tourney_es25)
data_double_letters$succeeded = factor(data_double_letters$succeeded, levels = c(TRUE, FALSE))
data_double_letters$treatment = factor(data_double_letters$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Count Odds

data_count_odds_lexicase_es25 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_25/")
data_count_odds_lexicase_es50 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_50/")
data_count_odds_lexicase_es75 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_75/")
data_count_odds_lexicase_es100 = import_from_error_clustering_and_div("../data/count-odds/lexicase/clustering/")
data_count_odds_tourney_es25 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_25/")
data_count_odds_tourney_es50 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_50/")
data_count_odds_tourney_es75 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_75/")
data_count_odds_tourney_es100 = import_from_error_clustering_and_div("../data/count-odds/tourney/clustering/")

data_count_odds = rbind(data_count_odds_lexicase_es100, data_count_odds_lexicase_es75, data_count_odds_lexicase_es50, data_count_odds_lexicase_es25, data_count_odds_tourney_es100, data_count_odds_tourney_es75, data_count_odds_tourney_es50, data_count_odds_tourney_es25)
data_count_odds$succeeded = factor(data_count_odds$succeeded, levels = c(TRUE, FALSE))
data_count_odds$treatment = factor(data_count_odds$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))

Modify Some Functions for Publication Plots

shape_size <- 4

# Plots diversity medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_diversity_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){
  p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment, fill=treatment, shape = treatment)) + #, linetype=treatment)) + 
    #stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
    stat_summary(fun.y="median", geom = "line", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
    #facet_grid(. ~ treatment, labeller=label_both) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    coord_cartesian(ylim = c(0,1)) +
    labs(y = "Error Diversity") +
    #scale_fill(guide = 'none')+
    theme(legend.title=element_blank(), legend.justification=legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}

# Plots clusters medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_cluster_count_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){            

  p <- ggplot(data, aes(x=generation, y=cluster.count, color=treatment, fill=treatment, shape = treatment)) + 
    #stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) +
    stat_summary(fun.y="median", geom = "line", size=1) +
    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
    labs(y = "Cluster Count") +
    theme(legend.title=element_blank(), legend.justification = legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  
  return(p)
}


# Makes a plot giving the number of successes at or before each generation
plot_generational_success_counts <- function(data){
  success_counts = get_generational_success_counts(data)
  
  first_treatment = levels(data$treatment)[1]
  num_runs_per_treatment = nrow(subset(data, treatment==first_treatment & generation == 0))
  
  p <- ggplot(success_counts, aes(x=generation, y=num.successes, color=treatment, shape=treatment, fill=treatment)) +
    geom_line(size=1) +
    geom_point(data=subset(success_counts, generation %% 30 == 15), size = shape_size) +
    ylim(c(0, num_runs_per_treatment)) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette) +
    scale_fill_manual(values = cbbPalette) +
    scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15)) +
    labs(x = "Generation", y = "Successes") +
    theme(plot.margin = unit(c(-1, 1, 0.5, 0.5), "lines")) + # plot.margin   margin around entire plot (unit with the sizes of the top, right, bottom, and left margins) 
    theme(legend.position="none") # Removes legend
  
  # NOTE: Can type `theme_get()` to see all the properties of theme, including the default margin of c(1, 1, 0.5, 0.5)
  
  return(p)
}

Plot Medians and Quartiles

RSWN

add_generational_success_counts_plot(data_rswn, plot_diversity_medians_and_quartiles(data_rswn, legend.pos = c(1, 0.5), legend.just = c(1, 0.5)))

## NULL

add_generational_success_counts_plot(data_rswn, plot_cluster_count_medians_and_quartiles(data_rswn))#, legend.pos = c(1, 0.3)))

## NULL

#plot_cluster_count_medians_and_quartiles(data_rswn) + facet_grid(succeeded ~ ., labeller=label_both)

Syllables

add_generational_success_counts_plot(data_syllables, plot_diversity_medians_and_quartiles(data_syllables, legend.pos = c(1, 0)))

## NULL

add_generational_success_counts_plot(data_syllables, plot_cluster_count_medians_and_quartiles(data_syllables))

## NULL

String Lengths Backwards

add_generational_success_counts_plot(data_string_lengths_backwards, plot_diversity_medians_and_quartiles(data_string_lengths_backwards, legend.pos = c(1, 1), legend.just = c(1, 1)))

## NULL

#plot_diversity_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

add_generational_success_counts_plot(data_string_lengths_backwards, plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards))

## NULL

#plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

Vector Average

add_generational_success_counts_plot(data_vector_average, plot_diversity_medians_and_quartiles(data_vector_average))

## NULL

add_generational_success_counts_plot(data_vector_average, plot_cluster_count_medians_and_quartiles(data_vector_average))

## NULL

Negative To Zero

add_generational_success_counts_plot(data_negative_to_zero, plot_diversity_medians_and_quartiles(data_negative_to_zero))

## NULL

add_generational_success_counts_plot(data_negative_to_zero, plot_cluster_count_medians_and_quartiles(data_negative_to_zero))

## NULL

#plot_cluster_count_medians_and_quartiles(data_negative_to_zero) + facet_grid(succeeded ~ ., labeller=label_both)

Double Letters

add_generational_success_counts_plot(data_double_letters, plot_diversity_medians_and_quartiles(data_double_letters))

## NULL

add_generational_success_counts_plot(data_double_letters, plot_cluster_count_medians_and_quartiles(data_double_letters))

## NULL

#plot_cluster_count_medians_and_quartiles(data_double_letters) + facet_grid(succeeded ~ ., labeller=label_both)

Count Odds

add_generational_success_counts_plot(data_count_odds, plot_diversity_medians_and_quartiles(data_count_odds))

## NULL

add_generational_success_counts_plot(data_count_odds, plot_cluster_count_medians_and_quartiles(data_count_odds))

## NULL

Diversity for Elitist Survival Experiment

Tom Helmuth

July 1, 2015

Introduction

Setup Code

Import Data

Replace Space With Newline

Syllables

String Lengths Backwards

Vector Average

Negative To Zero

Doubles Letters

Count Odds

Modify Some Functions for Publication Plots

Plot Medians and Quartiles

RSWN

Syllables

String Lengths Backwards

Vector Average

Negative To Zero

Double Letters

Count Odds