library(ggplot2)
library(plyr)
library(lubridate)
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
## here

df <- read.csv("~/college_board_logs.csv", colClasses = c("factor", "character", 
    "character", "factor"))
df$datetime <- ymd_hms(df$datetime)

df$thirty.seconds <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime), 
    minute(df$datetime), findInterval(second(df$datetime), 30), sep = ""))
df$ten.seconds <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime), 
    minute(df$datetime), findInterval(second(df$datetime), 10), sep = ""))
df$minute <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime), 
    minute(df$datetime), sep = ""))

sites.per.interval.df <- data.frame(web = df$webname, site = df$vhost, minute = df$minute, 
    thirty.seconds = df$thirty.seconds, ten.seconds = df$ten.seconds)

sites.per.minute <- ddply(sites.per.interval.df, .(minute, web), summarise, 
    numsites = length(unique(site)))
sites.per.thirty.seconds <- ddply(sites.per.interval.df, .(thirty.seconds, web), 
    summarise, numsites = length(unique(site)))
sites.per.ten.seconds <- ddply(sites.per.interval.df, .(ten.seconds, web), summarise, 
    numsites = length(unique(site)))

ggplot(sites.per.ten.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 10 seconds")

plot of chunk unnamed-chunk-1


ggplot(sites.per.thirty.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 30 seconds")

plot of chunk unnamed-chunk-1


ggplot(sites.per.minute, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 1 minute")

plot of chunk unnamed-chunk-1