library(ggplot2)
library(plyr)
library(lubridate)
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
##
## here
df <- read.csv("~/wmg_logs.csv", stringsAsFactors = FALSE)
df$parsed.datetime <- parse_date_time(df$datetime, "%d/%b/%Y:%H:%M:%S %z")
df$second <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime),
hour(df$parsed.datetime), minute(df$parsed.datetime), second(df$parsed.datetime),
sep = ""))
df$thirty.seconds <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime),
hour(df$parsed.datetime), minute(df$parsed.datetime), findInterval(second(df$parsed.datetime),
30), sep = ""))
df$ten.seconds <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime),
hour(df$parsed.datetime), minute(df$parsed.datetime), findInterval(second(df$parsed.datetime),
10), sep = ""))
df$minute <- as.factor(paste(month(df$parsed.datetime), day(df$parsed.datetime),
hour(df$parsed.datetime), minute(df$parsed.datetime), sep = ""))
sites.per.interval.df <- data.frame(site = df$site, minute = df$minute, thirty.seconds = df$thirty.seconds,
ten.seconds = df$ten.seconds)
sites.per.minute <- ddply(sites.per.interval.df, .(minute), summarise, numsites = length(unique(site)))
sites.per.thirty.seconds <- ddply(sites.per.interval.df, .(thirty.seconds),
summarise, numsites = length(unique(site)))
sites.per.ten.seconds <- ddply(sites.per.interval.df, .(ten.seconds), summarise,
numsites = length(unique(site)))
ggplot(sites.per.ten.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + xlab("Number of sites active within a period of 10 seconds")
ggplot(sites.per.thirty.seconds, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + xlab("Number of sites active within a period of 30 seconds")
ggplot(sites.per.minute, aes(as.factor(numsites), fill = as.factor(numsites))) +
geom_histogram() + xlab("Number of sites active within a period of 1 minute")