Introduction

The goal of this document is to look at the diversity data for the hyperselection paper. In particular, we want to compare lexicase, tournament, and SLT.

Setup Code

library('ggplot2')
library('cluster')
library('Hmisc')
library("gridExtra")
library("plyr")

setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")

source('../scripts/clustering.R')

Import Data

Replace Space With Newline

data_rswn_lexicase = import_from_error_clustering_and_div("../data/RSWN/lexicase/clustering/")
data_rswn_tourney = import_from_error_clustering_and_div("../data/RSWN/tourney/clustering/")
#data_rswn_ifs = import_from_error_clustering_and_div("../data/RSWN/ifs/clustering/")
data_rswn_ltk = import_from_error_clustering_and_div("../data/RSWN/lexicase-tourney-keep/clustering/")
#data_rswn_ltr = import_from_error_clustering_and_div("../data/RSWN/lexicase-tourney-remove/clustering/")
#data_rswn = rbind(data_rswn_lexicase, data_rswn_tourney, data_rswn_ifs, data_rswn_ltk, data_rswn_ltr)
data_rswn = rbind(data_rswn_lexicase, data_rswn_tourney, data_rswn_ltk)
data_rswn$succeeded = factor(data_rswn$succeeded, levels = c(TRUE, FALSE))
#data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "tourney", "lexicase-tourney-keep"), labels=c("Lex", "Tourn", "SLT"))
data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "lexicase-tourney-keep", "tourney"), labels=c("Lex", "SLT", "Tourn"))

Syllables

data_syllables_lexicase = import_from_error_clustering_and_div("../data/syllables/lexicase/clustering/")
data_syllables_tourney = import_from_error_clustering_and_div("../data/syllables/tourney/clustering/")
#data_syllables_ifs = import_from_error_clustering_and_div("../data/syllables/ifs/clustering/")
data_syllables_ltk = import_from_error_clustering_and_div("../data/syllables/lexicase-tourney-keep/clustering/")
#data_syllables_ltr = import_from_error_clustering_and_div("../data/syllables/lexicase-tourney-remove/clustering/")
data_syllables = rbind(data_syllables_lexicase, data_syllables_tourney, data_syllables_ltk)
data_syllables$succeeded = factor(data_syllables$succeeded, levels = c(TRUE, FALSE))
data_syllables$treatment = factor(data_syllables$treatment, levels = c("lexicase", "lexicase-tourney-keep", "tourney"), labels=c("Lex", "SLT", "Tourn"))

String Lengths Backwards

data_string_lengths_backwards_lexicase = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/clustering/")
data_string_lengths_backwards_tourney = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/clustering/")
data_string_lengths_backwards_ifs = #import_from_error_clustering_and_div("../data/string-lengths-backwards/ifs/clustering/")
data_string_lengths_backwards_ltk = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase-tourney-keep/clustering/")
#data_string_lengths_backwards_ltr = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase-tourney-remove/clustering/")
data_string_lengths_backwards = rbind(data_string_lengths_backwards_lexicase, data_string_lengths_backwards_tourney, data_string_lengths_backwards_ltk)
data_string_lengths_backwards$succeeded = factor(data_string_lengths_backwards$succeeded, levels = c(TRUE, FALSE))
data_string_lengths_backwards$treatment = factor(data_string_lengths_backwards$treatment, levels = c("lexicase", "tourney", "lexicase-tourney-keep"), labels=c("Lex", "Tourn", "SLT"))

Negative To Zero

data_negative_to_zero_lexicase = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/clustering/")
data_negative_to_zero_tourney = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/clustering/")
#data_negative_to_zero_ifs = import_from_error_clustering_and_div("../data/negative-to-zero/ifs/clustering/")
data_negative_to_zero_ltk = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase-tourney-keep/clustering/")
#data_negative_to_zero_ltr = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase-tourney-remove/clustering/")
data_negative_to_zero = rbind(data_negative_to_zero_lexicase, data_negative_to_zero_tourney, data_negative_to_zero_ltk)
data_negative_to_zero$succeeded = factor(data_negative_to_zero$succeeded, levels = c(TRUE, FALSE))
data_negative_to_zero$treatment = factor(data_negative_to_zero$treatment, levels = c("lexicase", "tourney", "lexicase-tourney-keep"), labels=c("Lex", "Tourn", "SLT"))

Doubles Letters

data_double_letters_lexicase = import_from_error_clustering_and_div("../data/double-letters/lexicase/clustering/")
data_double_letters_tourney = import_from_error_clustering_and_div("../data/double-letters/tourney/clustering/")
#data_double_letters_ifs = import_from_error_clustering_and_div("../data/double-letters/ifs/clustering/")
data_double_letters_ltk = import_from_error_clustering_and_div("../data/double-letters/lexicase-tourney-keep/clustering/")
#data_double_letters_ltr = import_from_error_clustering_and_div("../data/double-letters/lexicase-tourney-remove/clustering/")
data_double_letters = rbind(data_double_letters_lexicase, data_double_letters_tourney,  data_double_letters_ltk)
data_double_letters$succeeded = factor(data_double_letters$succeeded, levels = c(TRUE, FALSE))
data_double_letters$treatment = factor(data_double_letters$treatment, levels = c("lexicase", "lexicase-tourney-keep", "tourney"), labels=c("Lex", "SLT", "Tourn"))

Count Odds

data_count_odds_lexicase = import_from_error_clustering_and_div("../data/count-odds/lexicase/clustering/")
data_count_odds_tourney = import_from_error_clustering_and_div("../data/count-odds/tourney/clustering/")
#data_count_odds_ifs = import_from_error_clustering_and_div("../data/count-odds/ifs/clustering/")
data_count_odds_ltk = import_from_error_clustering_and_div("../data/count-odds/lexicase-tourney-keep/clustering/")
#data_count_odds_ltr = import_from_error_clustering_and_div("../data/count-odds/lexicase-tourney-remove/clustering/")
data_count_odds = rbind(data_count_odds_lexicase, data_count_odds_tourney, data_count_odds_ltk)
data_count_odds$succeeded = factor(data_count_odds$succeeded, levels = c(TRUE, FALSE))
data_count_odds$treatment = factor(data_count_odds$treatment, levels = c("lexicase", "tourney", "lexicase-tourney-keep"), labels=c("Lex", "Tourn", "SLT"))

Tweak functions

shape_size <- 4

# Plots diversity medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_diversity_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = "right", legend.just = c(1,0.5)){
#  p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment)) + 
  p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment, shape = treatment)) +
    stat_summary(fun.data="median_hilow", fun.args=list(conf.int=quartiles_percent), alpha=0.6) +
#    stat_summary(fun.data="median_hilow", fun.args=list(conf.int=quartiles_percent), geom = "line", size=1) +
#    stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
    theme_bw() +
    scale_colour_manual(values=cbbPalette) +
    coord_cartesian(ylim = c(0,1)) +
    labs(x = "Generation", y = "Behavioral Diversity") +
    #ylim(c(0,1)) +
    theme(legend.title=element_blank(), legend.justification = legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
  return(p)
}

Plot Medians and Quartiles

RSWN

#add_generational_success_counts_plot(data_rswn, plot_diversity_medians_and_quartiles(data_rswn))

plot_diversity_medians_and_quartiles(data_rswn, legend.pos = c(1, 0.5))

#add_generational_success_counts_plot(data_rswn, plot_cluster_count_medians_and_quartiles(data_rswn))

#plot_cluster_count_medians_and_quartiles(data_rswn) + facet_grid(succeeded ~ ., labeller=label_both)

Syllables

#add_generational_success_counts_plot(data_syllables, plot_diversity_medians_and_quartiles(data_syllables))

plot_diversity_medians_and_quartiles(data_syllables, legend.pos = c(1, 0), legend.just=c(1,0))

#add_generational_success_counts_plot(data_syllables, plot_cluster_count_medians_and_quartiles(data_syllables))

String Lengths Backwards

#add_generational_success_counts_plot(data_string_lengths_backwards, plot_diversity_medians_and_quartiles(data_string_lengths_backwards))

#plot_diversity_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

#add_generational_success_counts_plot(data_string_lengths_backwards, plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards))

#plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)

Negative To Zero

#add_generational_success_counts_plot(data_negative_to_zero, plot_diversity_medians_and_quartiles(data_negative_to_zero))

#add_generational_success_counts_plot(data_negative_to_zero, plot_cluster_count_medians_and_quartiles(data_negative_to_zero))

#plot_cluster_count_medians_and_quartiles(data_negative_to_zero) + facet_grid(succeeded ~ ., labeller=label_both)

Double Letters

#add_generational_success_counts_plot(data_double_letters, plot_diversity_medians_and_quartiles(data_double_letters))

plot_diversity_medians_and_quartiles(data_double_letters, legend.pos = c(1, 0), legend.just=c(1,0))

#add_generational_success_counts_plot(data_double_letters, plot_cluster_count_medians_and_quartiles(data_double_letters))

#plot_cluster_count_medians_and_quartiles(data_double_letters) + facet_grid(succeeded ~ ., labeller=label_both)

Count Odds

#add_generational_success_counts_plot(data_count_odds, plot_diversity_medians_and_quartiles(data_count_odds))

#add_generational_success_counts_plot(data_count_odds, plot_cluster_count_medians_and_quartiles(data_count_odds))