This document will make clustering plots for data from a range of settings and results.
library('ggplot2')
library('cluster')
library('apcluster')
setwd("~/Documents/R/Clustering/lexicase-clusturing-analysis/R_notebooks")
source('../scripts/clustering.R')
These transform the original data files into error files, and transform the error data into data frames we can use.
Lexicase, Run 1, a success
# transform_data_file_into_error_file("data/RSWN/lexicase/data1.csv")
# data_frame1 = make_frame_from_errors_file(
# "data/RSWN/lexicase/errors_data1.csv", 1,
# "replace-space-with-newline", "lexicase", 20, elitize_generation_data)
# write.csv(data_frame1, "data/RSWN/lexicase/error_counts_and_div1.csv")
Lexicase, Run 6, a success
# transform_data_file_into_error_file("data/RSWN/lexicase/data6.csv")
# data_frame6 = make_frame_from_errors_file(
# "data/RSWN/lexicase/errors_data6.csv", 6,
# "replace-space-with-newline", "lexicase", 20, elitize_generation_data)
# write.csv(data_frame6, "data/RSWN/lexicase/error_counts_and_div6.csv")
Lexicase, Run 2, a failure
# transform_data_file_into_error_file("../data/RSWN/lexicase/data2.csv")
# data_frame2 = make_frame_from_errors_file(
# "../data/RSWN/lexicase/errors_data2.csv", 2,
# "replace-space-with-newline", "lexicase", 20, elitize_generation_data)
# write.csv(data_frame2, "../data/RSWN/lexicase/error_counts_and_div2.csv")
Lexicase, Run 3, a failure
# transform_data_file_into_error_file("../data/RSWN/lexicase/data3.csv")
# data_frame3 = make_frame_from_errors_file(
# "../data/RSWN/lexicase/errors_data3.csv", 3,
# "replace-space-with-newline", "lexicase", 20, elitize_generation_data)
# write.csv(data_frame3, "../data/RSWN/lexicase/error_counts_and_div3.csv")
Tourney, Run 50, a success
# transform_data_file_into_error_file("../data/RSWN/tourney/data50.csv")
# data_frame50 = make_frame_from_errors_file(
# "../data/RSWN/tourney/errors_data50.csv", 50,
# "replace-space-with-newline", "tourney", 20, elitize_generation_data)
# write.csv(data_frame50, "../data/RSWN/tourney/error_counts_and_div50.csv")
Tourney, Run 52, a success
# transform_data_file_into_error_file("../data/RSWN/tourney/data52.csv")
# data_frame52 = make_frame_from_errors_file(
# "../data/RSWN/tourney/errors_data52.csv", 52,
# "replace-space-with-newline", "tourney", 20, elitize_generation_data)
# write.csv(data_frame52, "../data/RSWN/tourney/error_counts_and_div52.csv")
Tourney, Run 57, a failure
# transform_data_file_into_error_file("../data/RSWN/tourney/data57.csv")
# data_frame57 = make_frame_from_errors_file(
# "../data/RSWN/tourney/errors_data57.csv", 57,
# "replace-space-with-newline", "tourney", 20, elitize_generation_data)
# write.csv(data_frame57, "../data/RSWN/tourney/error_counts_and_div57.csv")
Tourney, Run 58, a failure
# transform_data_file_into_error_file("../data/RSWN/tourney/data58.csv")
# data_frame58 = make_frame_from_errors_file(
# "../data/RSWN/tourney/errors_data58.csv", 58,
# "replace-space-with-newline", "tourney", 20, elitize_generation_data)
# write.csv(data_frame58, "../data/RSWN/tourney/error_counts_and_div58.csv")
data_frame1 = read.csv("../data/RSWN/lexicase/error_counts_and_div1.csv")
data_frame6 = read.csv("../data/RSWN/lexicase/error_counts_and_div6.csv")
lexicase_success <- rbind(data_frame1, data_frame6)
ggplot(lexicase_success, aes(x=generation)) +
geom_line(aes(y=cluster.count, color=interaction("Run", run.num))) +
labs(y="Number of Clusters", color="") +
coord_cartesian(ylim=c(0,200))
ggplot(lexicase_success, aes(x=generation)) +
geom_line(aes(y=error.diversity, color=interaction("Run", run.num))) +
labs(y="Error Diversity", color="") +
coord_cartesian(ylim=c(0,1))
data_frame2 = read.csv("../data/RSWN/lexicase/error_counts_and_div2.csv")
data_frame3 = read.csv("../data/RSWN/lexicase/error_counts_and_div3.csv")
lexicase_failure <- rbind(data_frame2, data_frame3)
ggplot(lexicase_failure, aes(x=generation)) +
geom_line(aes(y=cluster.count, color=interaction("Run", run.num))) +
labs(y="Number of Clusters", color="") +
coord_cartesian(ylim=c(0,200))
ggplot(lexicase_failure, aes(x=generation)) +
geom_line(aes(y=error.diversity, color=interaction("Run", run.num))) +
labs(y="Error Diversity", color="") +
coord_cartesian(ylim=c(0,1))
tourney_data_frame50 = read.csv("../data/RSWN/tourney/error_counts_and_div50.csv")
tourney_data_frame52 = read.csv("../data/RSWN/tourney/error_counts_and_div52.csv")
tourney_success <- rbind(tourney_data_frame50, tourney_data_frame52)
ggplot(tourney_success, aes(x=generation)) +
geom_line(aes(y=cluster.count, color=interaction("Run", run.num))) +
labs(y="Number of Clusters", color="") +
coord_cartesian(ylim=c(0,200))
ggplot(tourney_success, aes(x=generation)) +
geom_line(aes(y=error.diversity, color=interaction("Run", run.num))) +
labs(y="Error Diversity", color="") +
coord_cartesian(ylim=c(0,1))
tourney_data_frame57 = read.csv("../data/RSWN/tourney/error_counts_and_div57.csv")
tourney_data_frame58 = read.csv("../data/RSWN/tourney/error_counts_and_div58.csv")
tourney_failure <- rbind(tourney_data_frame57, tourney_data_frame58)
ggplot(tourney_failure, aes(x=generation)) +
geom_line(aes(y=cluster.count, color=interaction("Run", run.num))) +
labs(y="Number of Clusters", color="") +
coord_cartesian(ylim=c(0,200))
ggplot(tourney_failure, aes(x=generation)) +
geom_line(aes(y=error.diversity, color=interaction("Run", run.num))) +
labs(y="Error Diversity", color="") +
coord_cartesian(ylim=c(0,1))