The goal of this document is to look at the diversity data from the elitist survival runs.
library('ggplot2')
library('cluster')
library('Hmisc')
library("gridExtra")
library("plyr")
setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")
source('../scripts/clustering.R')
data_rswn_lexicase_es25 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_25/")
data_rswn_lexicase_es50 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_50/")
data_rswn_lexicase_es75 = import_from_error_clustering_and_div("../data/RSWN/lexicase/elitist_survival_75/")
data_rswn_lexicase_es100 = import_from_error_clustering_and_div("../data/RSWN/lexicase/clustering/")
data_rswn_tourney_es25 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_25/")
data_rswn_tourney_es50 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_50/")
data_rswn_tourney_es75 = import_from_error_clustering_and_div("../data/RSWN/tourney/elitist_survival_75/")
data_rswn_tourney_es100 = import_from_error_clustering_and_div("../data/RSWN/tourney/clustering/")
data_rswn = rbind(data_rswn_lexicase_es100, data_rswn_lexicase_es75, data_rswn_lexicase_es50, data_rswn_lexicase_es25, data_rswn_tourney_es100, data_rswn_tourney_es75, data_rswn_tourney_es50, data_rswn_tourney_es25)
data_rswn$succeeded = factor(data_rswn$succeeded, levels = c(TRUE, FALSE))
data_rswn$treatment = factor(data_rswn$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_syllables_lexicase_es25 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_25/")
data_syllables_lexicase_es50 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_50/")
data_syllables_lexicase_es75 = import_from_error_clustering_and_div("../data/syllables/lexicase/elitist_survival_75/")
data_syllables_lexicase_es100 = import_from_error_clustering_and_div("../data/syllables/lexicase/clustering/")
data_syllables_tourney_es25 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_25/")
data_syllables_tourney_es50 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_50/")
data_syllables_tourney_es75 = import_from_error_clustering_and_div("../data/syllables/tourney/elitist_survival_75/")
data_syllables_tourney_es100 = import_from_error_clustering_and_div("../data/syllables/tourney/clustering/")
data_syllables = rbind(data_syllables_lexicase_es100, data_syllables_lexicase_es75, data_syllables_lexicase_es50, data_syllables_lexicase_es25, data_syllables_tourney_es100, data_syllables_tourney_es75, data_syllables_tourney_es50, data_syllables_tourney_es25)
data_syllables$succeeded = factor(data_syllables$succeeded, levels = c(TRUE, FALSE))
data_syllables$treatment = factor(data_syllables$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_string_lengths_backwards_lexicase_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_25/")
data_string_lengths_backwards_lexicase_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_50/")
data_string_lengths_backwards_lexicase_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/elitist_survival_75/")
data_string_lengths_backwards_lexicase_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/lexicase/clustering/")
data_string_lengths_backwards_tourney_es25 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_25/")
data_string_lengths_backwards_tourney_es50 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_50/")
data_string_lengths_backwards_tourney_es75 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/elitist_survival_75/")
data_string_lengths_backwards_tourney_es100 = import_from_error_clustering_and_div("../data/string-lengths-backwards/tourney/clustering/")
data_string_lengths_backwards = rbind(data_string_lengths_backwards_lexicase_es100, data_string_lengths_backwards_lexicase_es75, data_string_lengths_backwards_lexicase_es50, data_string_lengths_backwards_lexicase_es25, data_string_lengths_backwards_tourney_es100, data_string_lengths_backwards_tourney_es75, data_string_lengths_backwards_tourney_es50, data_string_lengths_backwards_tourney_es25)
data_string_lengths_backwards$succeeded = factor(data_string_lengths_backwards$succeeded, levels = c(TRUE, FALSE))
data_string_lengths_backwards$treatment = factor(data_string_lengths_backwards$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_vector_average_lexicase_es25 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_25/")
data_vector_average_lexicase_es50 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_50/")
data_vector_average_lexicase_es75 = import_from_error_clustering_and_div("../data/vector-average/lexicase/elitist_survival_75/")
data_vector_average_lexicase_es100 = import_from_error_clustering_and_div("../data/vector-average/lexicase/clustering/")
data_vector_average_tourney_es25 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_25/")
data_vector_average_tourney_es50 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_50/")
data_vector_average_tourney_es75 = import_from_error_clustering_and_div("../data/vector-average/tourney/elitist_survival_75/")
data_vector_average_tourney_es100 = import_from_error_clustering_and_div("../data/vector-average/tourney/clustering/")
data_vector_average = rbind(data_vector_average_lexicase_es100, data_vector_average_lexicase_es75, data_vector_average_lexicase_es50, data_vector_average_lexicase_es25, data_vector_average_tourney_es100, data_vector_average_tourney_es75, data_vector_average_tourney_es50, data_vector_average_tourney_es25)
data_vector_average$succeeded = factor(data_vector_average$succeeded, levels = c(TRUE, FALSE))
data_vector_average$treatment = factor(data_vector_average$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_negative_to_zero_lexicase_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_25/")
data_negative_to_zero_lexicase_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_50/")
data_negative_to_zero_lexicase_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/elitist_survival_75/")
data_negative_to_zero_lexicase_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/lexicase/clustering/")
data_negative_to_zero_tourney_es25 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_25/")
data_negative_to_zero_tourney_es50 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_50/")
data_negative_to_zero_tourney_es75 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/elitist_survival_75/")
data_negative_to_zero_tourney_es100 = import_from_error_clustering_and_div("../data/negative-to-zero/tourney/clustering/")
data_negative_to_zero = rbind(data_negative_to_zero_lexicase_es100, data_negative_to_zero_lexicase_es75, data_negative_to_zero_lexicase_es50, data_negative_to_zero_lexicase_es25, data_negative_to_zero_tourney_es100, data_negative_to_zero_tourney_es75, data_negative_to_zero_tourney_es50, data_negative_to_zero_tourney_es25)
data_negative_to_zero$succeeded = factor(data_negative_to_zero$succeeded, levels = c(TRUE, FALSE))
data_negative_to_zero$treatment = factor(data_negative_to_zero$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_double_letters_lexicase_es25 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_25/")
data_double_letters_lexicase_es50 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_50/")
data_double_letters_lexicase_es75 = import_from_error_clustering_and_div("../data/double-letters/lexicase/elitist_survival_75/")
data_double_letters_lexicase_es100 = import_from_error_clustering_and_div("../data/double-letters/lexicase/clustering/")
data_double_letters_tourney_es25 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_25/")
data_double_letters_tourney_es50 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_50/")
data_double_letters_tourney_es75 = import_from_error_clustering_and_div("../data/double-letters/tourney/elitist_survival_75/")
data_double_letters_tourney_es100 = import_from_error_clustering_and_div("../data/double-letters/tourney/clustering/")
data_double_letters = rbind(data_double_letters_lexicase_es100, data_double_letters_lexicase_es75, data_double_letters_lexicase_es50, data_double_letters_lexicase_es25, data_double_letters_tourney_es100, data_double_letters_tourney_es75, data_double_letters_tourney_es50, data_double_letters_tourney_es25)
data_double_letters$succeeded = factor(data_double_letters$succeeded, levels = c(TRUE, FALSE))
data_double_letters$treatment = factor(data_double_letters$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
data_count_odds_lexicase_es25 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_25/")
data_count_odds_lexicase_es50 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_50/")
data_count_odds_lexicase_es75 = import_from_error_clustering_and_div("../data/count-odds/lexicase/elitist_survival_75/")
data_count_odds_lexicase_es100 = import_from_error_clustering_and_div("../data/count-odds/lexicase/clustering/")
data_count_odds_tourney_es25 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_25/")
data_count_odds_tourney_es50 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_50/")
data_count_odds_tourney_es75 = import_from_error_clustering_and_div("../data/count-odds/tourney/elitist_survival_75/")
data_count_odds_tourney_es100 = import_from_error_clustering_and_div("../data/count-odds/tourney/clustering/")
data_count_odds = rbind(data_count_odds_lexicase_es100, data_count_odds_lexicase_es75, data_count_odds_lexicase_es50, data_count_odds_lexicase_es25, data_count_odds_tourney_es100, data_count_odds_tourney_es75, data_count_odds_tourney_es50, data_count_odds_tourney_es25)
data_count_odds$succeeded = factor(data_count_odds$succeeded, levels = c(TRUE, FALSE))
data_count_odds$treatment = factor(data_count_odds$treatment, levels = c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"))
shape_size <- 4
# Plots diversity medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_diversity_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){
p <- ggplot(data, aes(x=generation, y=error.diversity, color=treatment, fill=treatment, shape = treatment)) + #, linetype=treatment)) +
#stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
stat_summary(fun.y="median", geom = "line", size=1) + #, geom = "errorbar") + #, alpha=0.5) +
#facet_grid(. ~ treatment, labeller=label_both) +
stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
theme_bw() +
scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
coord_cartesian(ylim = c(0,1)) +
labs(y = "Error Diversity") +
#scale_fill(guide = 'none')+
theme(legend.title=element_blank(), legend.justification=legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
return(p)
}
# Plots clusters medians and quartiles of data. Takes optional quartiles_percent, which tells what percent of the center data to include
plot_cluster_count_medians_and_quartiles <- function(data, quartiles_percent = 0.5, legend.pos = c(1,0), legend.just = c(1,0)){
p <- ggplot(data, aes(x=generation, y=cluster.count, color=treatment, fill=treatment, shape = treatment)) +
#stat_summary(fun.data="median_hilow", conf.int=quartiles_percent, geom = "smooth", size=1) +
stat_summary(fun.y="median", geom = "line", size=1) +
stat_summary(data=subset(data, generation %% 30 == 15), fun.y = median, geom="point", size = shape_size) +
theme_bw() +
scale_colour_manual(values=cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
scale_fill_manual(values = cbbPalette, breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15), breaks=c("lexicase", "lexicase-ratio-0.75", "lexicase-ratio-0.5", "lexicase-ratio-0.25", "tourney", "tourney-7-ratio-0.75", "tourney-7-ratio-0.5", "tourney-7-ratio-0.25"), labels=c("lex 100%", "lex 75%", "lex 50%", "lex 25%", "tourney 100%", "tourney 75%", "tourney 50%", "tourney 25%")) +
labs(y = "Cluster Count") +
theme(legend.title=element_blank(), legend.justification = legend.just, legend.position=legend.pos, legend.background = element_rect(colour="black", size=0.1), legend.key = element_blank()) # Make changes to legend
return(p)
}
# Makes a plot giving the number of successes at or before each generation
plot_generational_success_counts <- function(data){
success_counts = get_generational_success_counts(data)
first_treatment = levels(data$treatment)[1]
num_runs_per_treatment = nrow(subset(data, treatment==first_treatment & generation == 0))
p <- ggplot(success_counts, aes(x=generation, y=num.successes, color=treatment, shape=treatment, fill=treatment)) +
geom_line(size=1) +
geom_point(data=subset(success_counts, generation %% 30 == 15), size = shape_size) +
ylim(c(0, num_runs_per_treatment)) +
theme_bw() +
scale_colour_manual(values=cbbPalette) +
scale_fill_manual(values = cbbPalette) +
scale_shape_manual(values = c(0, 2, 23, 16, 15, 17, 25, 15)) +
labs(x = "Generation", y = "Successes") +
theme(plot.margin = unit(c(-1, 1, 0.5, 0.5), "lines")) + # plot.margin margin around entire plot (unit with the sizes of the top, right, bottom, and left margins)
theme(legend.position="none") # Removes legend
# NOTE: Can type `theme_get()` to see all the properties of theme, including the default margin of c(1, 1, 0.5, 0.5)
return(p)
}
add_generational_success_counts_plot(data_rswn, plot_diversity_medians_and_quartiles(data_rswn, legend.pos = c(1, 0.5), legend.just = c(1, 0.5)))
## NULL
add_generational_success_counts_plot(data_rswn, plot_cluster_count_medians_and_quartiles(data_rswn))#, legend.pos = c(1, 0.3)))
## NULL
#plot_cluster_count_medians_and_quartiles(data_rswn) + facet_grid(succeeded ~ ., labeller=label_both)
add_generational_success_counts_plot(data_syllables, plot_diversity_medians_and_quartiles(data_syllables, legend.pos = c(1, 0)))
## NULL
add_generational_success_counts_plot(data_syllables, plot_cluster_count_medians_and_quartiles(data_syllables))
## NULL
add_generational_success_counts_plot(data_string_lengths_backwards, plot_diversity_medians_and_quartiles(data_string_lengths_backwards, legend.pos = c(1, 1), legend.just = c(1, 1)))
## NULL
#plot_diversity_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)
add_generational_success_counts_plot(data_string_lengths_backwards, plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards))
## NULL
#plot_cluster_count_medians_and_quartiles(data_string_lengths_backwards) + facet_grid(succeeded ~ ., labeller=label_both)
add_generational_success_counts_plot(data_vector_average, plot_diversity_medians_and_quartiles(data_vector_average))
## NULL
add_generational_success_counts_plot(data_vector_average, plot_cluster_count_medians_and_quartiles(data_vector_average))
## NULL
add_generational_success_counts_plot(data_negative_to_zero, plot_diversity_medians_and_quartiles(data_negative_to_zero))
## NULL
add_generational_success_counts_plot(data_negative_to_zero, plot_cluster_count_medians_and_quartiles(data_negative_to_zero))
## NULL
#plot_cluster_count_medians_and_quartiles(data_negative_to_zero) + facet_grid(succeeded ~ ., labeller=label_both)
add_generational_success_counts_plot(data_double_letters, plot_diversity_medians_and_quartiles(data_double_letters))
## NULL
add_generational_success_counts_plot(data_double_letters, plot_cluster_count_medians_and_quartiles(data_double_letters))
## NULL
#plot_cluster_count_medians_and_quartiles(data_double_letters) + facet_grid(succeeded ~ ., labeller=label_both)
add_generational_success_counts_plot(data_count_odds, plot_diversity_medians_and_quartiles(data_count_odds))
## NULL
add_generational_success_counts_plot(data_count_odds, plot_cluster_count_medians_and_quartiles(data_count_odds))
## NULL