require(ggplot2)
require(plyr)
require(dplyr)

setwd("/Users/msonnabaum/Acquia/fastly_testing")

normalize_mime_type <- function(response_content_mimetype) {
    respone_content_mimetype <- as.character(response_content_mimetype)
    if (grepl("html", response_content_mimetype)) {
        "html"
    } else if (grepl("javascript", response_content_mimetype)) {
        "javascript"
    } else if (grepl("css", response_content_mimetype)) {
        "css"
    } else if (grepl("image", response_content_mimetype)) {
        "image"
    } else if (grepl("text/plain", response_content_mimetype)) {
        "text/plain"
    } else {
        "other"
    }
}

Time to onLoad event

pages <- read.csv("pages.csv", colClasses = c("factor", "factor", "numeric", 
    "numeric"))
pages$url <- sapply(pages$job, function(job) {
    switch(job, `06609cd54` = "http://www.acquia.com", `264bc7e4d` = "https://www.acquia.com", 
        `44a3dfbba` = "https://fastly.acquia.com", `6b801bc28` = "http://fastly.acquia.com")
})

ggplot(pages, aes(url, onload, colour = url)) + xlab("milliseconds to onLoad event") + 
    geom_boxplot()

plot of chunk unnamed-chunk-2

entries_col_classes <- c(
# job       run       startedDateTime   time       request_method
  "factor", "factor", "factor",         "numeric", "factor",
# response_content_size response_content_mimetype response_status
  "numeric",            "factor",                 "factor",
# response_bodysize   timings_blocked   timings_dns   timings_connect
  "numeric",          "numeric",        "numeric",    "numeric",
# timings_send  timings_wait  timings_receive   timings_ssl 
  "numeric",    "numeric",    "numeric",        "numeric",
# headers_content_encoding headers_cache_control headers_content_type request_url
  "factor",                "factor",             "factor",            "factor"
)

entries <- read.csv("entries.csv", colClasses=entries_col_classes)
entries <- filter(entries, grepl("^https?://\\w+.acquia.com", request_url))

entries$type <- sapply(entries$response_content_mimetype, normalize_mime_type)
entries$url <- sapply(entries$request_url, function (url) gsub('(https?://\\w+.acquia.com).*', "\\1", url))
entries$rounded_size <- as.factor(sapply(entries$response_content_size, function (size) round_any(size/1024, 50)))
entries <- mutate(entries, ssl = ifelse(grepl("^https", request_url), "https", "http"))
entries <- mutate(entries, host = ifelse(grepl("fastly", request_url), "fastly", "us"))

All requests

ggplot(entries, aes(host, time, colour = host)) + geom_boxplot()

plot of chunk unnamed-chunk-4

summary(filter(entries, host == "fastly")$time)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      13     127     192     492    1180    2970
summary(filter(entries, host == "us")$time)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      20     132     216     538    1250    2510

Requests by type

ggplot(entries, aes(ssl, time, colour = url)) + geom_boxplot() + facet_wrap(~type, 
    ncol = 5)

plot of chunk unnamed-chunk-5

Request wait time

ggplot(entries, aes(ssl, timings_wait, colour = host)) + geom_boxplot()

plot of chunk unnamed-chunk-6

Request receive time

ggplot(filter(entries, timings_receive < 5), aes(reorder(url, timings_receive, 
    FUN = median), timings_receive, colour = url)) + geom_boxplot()

plot of chunk unnamed-chunk-7