library(ggplot2)
library(plyr)
library(lubridate)
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
## here

df <- read.csv("~/wmg_logs.csv", stringsAsFactors = FALSE)
df$parsed.datetime <- parse_date_time(df$datetime, "%d/%b/%Y:%H:%M:%S %z")

df$second <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime), 
    hour(df$parsed.datetime), minute(df$parsed.datetime), second(df$parsed.datetime), 
    sep = ""))

df$thirty.seconds <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime), 
    hour(df$parsed.datetime), minute(df$parsed.datetime), findInterval(second(df$parsed.datetime), 
        30), sep = ""))
df$ten.seconds <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime), 
    hour(df$parsed.datetime), minute(df$parsed.datetime), findInterval(second(df$parsed.datetime), 
        10), sep = ""))
df$minute <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime), 
    hour(df$parsed.datetime), minute(df$parsed.datetime), sep = ""))

sites.per.interval.df <- data.frame(site = df$site, minute = df$minute, thirty.seconds = df$thirty.seconds, 
    ten.seconds = df$ten.seconds)

sites.per.minute <- ddply(sites.per.interval.df, .(minute), summarise, numsites = length(unique(site)))
sites.per.thirty.seconds <- ddply(sites.per.interval.df, .(thirty.seconds), 
    summarise, numsites = length(unique(site)))
sites.per.ten.seconds <- ddply(sites.per.interval.df, .(ten.seconds), summarise, 
    numsites = length(unique(site)))

ggplot(sites.per.ten.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + xlab("Number of sites active within a period of 10 seconds")

plot of chunk unnamed-chunk-1


ggplot(sites.per.thirty.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + xlab("Number of sites active within a period of 30 seconds")

plot of chunk unnamed-chunk-1


ggplot(sites.per.minute, aes(as.factor(numsites), fill = as.factor(numsites))) + 
    geom_histogram() + xlab("Number of sites active within a period of 1 minute")

plot of chunk unnamed-chunk-1