library(ggplot2)
library(plyr)
library(lubridate)
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
##
## here
df <- read.csv("~/college_board_logs.csv", colClasses = c("factor", "character",
"character", "factor"))
df$datetime <- ymd_hms(df$datetime)
df$thirty.seconds <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime),
minute(df$datetime), findInterval(second(df$datetime), 30), sep = ""))
df$ten.seconds <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime),
minute(df$datetime), findInterval(second(df$datetime), 10), sep = ""))
df$minute <- as.factor(paste(month(df$datetime), day(df$datetime), hour(df$datetime),
minute(df$datetime), sep = ""))
sites.per.interval.df <- data.frame(web = df$webname, site = df$vhost, minute = df$minute,
thirty.seconds = df$thirty.seconds, ten.seconds = df$ten.seconds)
sites.per.minute <- ddply(sites.per.interval.df, .(minute, web), summarise,
numsites = length(unique(site)))
sites.per.thirty.seconds <- ddply(sites.per.interval.df, .(thirty.seconds, web),
summarise, numsites = length(unique(site)))
sites.per.ten.seconds <- ddply(sites.per.interval.df, .(ten.seconds, web), summarise,
numsites = length(unique(site)))
ggplot(sites.per.ten.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 10 seconds")
ggplot(sites.per.thirty.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 30 seconds")
ggplot(sites.per.minute, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + facet_wrap(~web, ncol = 1) + xlab("Number of sites active within a period of 1 minute")