require(ggplot2)
require(dplyr)

order_numeric_factor_col <- function(df, column_name) {
    sorted_labels <- paste(sort(as.integer(levels(df[[column_name]]))))
    df[[column_name]] <- factor(df[[column_name]], levels = sorted_labels)
    df
}

plot_memcache_webs <- function(data) {
    ggplot(data, aes(reorder(memcache_webs, response_time, FUN = median), response_time, 
        colour = memcache_webs)) + scale_y_continuous(breaks = seq(0, max(df$response_time), 
        by = 500)) + theme_bw() + ylab("Response time") + xlab("Number of webservers running memcached") + 
        geom_boxplot()
}

col_classes <- c("Date", "factor", "numeric", "factor", "factor", "factor", 
    "factor", "factor")
df <- read.csv("ded-memcache-test.csv.gz", colClasses = col_classes)

# Change the levels on a numeric factor column so that sort works.
df <- order_numeric_factor_col(df, "fpm_max_children")
df <- order_numeric_factor_col(df, "concurrency")
df <- order_numeric_factor_col(df, "memcache_webs")
df$capacity <- as.factor(as.numeric(as.character(df$concurrency))/(8 * as.numeric(as.character(df$fpm_max_children))))
df$capacity_numeric <- as.numeric(as.character(df$capacity))

df <- arrange(df, fpm_max_children)
plot_memcache_webs(filter(df, response_time < 20000, capacity == 0.5))

plot of chunk unnamed-chunk-2

plot_memcache_webs(filter(df, response_time < 20000, capacity == 0.75))

plot of chunk unnamed-chunk-2

plot_memcache_webs(filter(df, response_time < 20000, capacity_numeric > 0.9, 
    capacity_numeric < 0.96))

plot of chunk unnamed-chunk-2

plot_memcache_webs(filter(df, response_time < 20000, capacity == 1))

plot of chunk unnamed-chunk-2

plot_memcache_webs(filter(df, response_time < 20000, capacity == 1.25))

plot of chunk unnamed-chunk-2