library(dplyr)
library(data.table)
library(ggplot2)
library(car)
library(lubridate)
callFiles <- list.files("C:/R/Datasets/clients/", full.names = TRUE)
callsDF <- data.frame()
for (i in 1:length(callFiles)) {
callsDF <- rbind(callsDF, read.csv(callFiles[i], stringsAsFactors = T))
}
callsDF <- rename(callsDF, type = sys_class_name, opened = opened_at)
callsDF$opened <- as.Date(callsDF$opened, "%Y/%m/%d")
callsDF <- arrange(callsDF, opened)
callsDF <- unique(callsDF)
ref <- table(callsDF$number)
dupes <- names(ref[ref > 1])
duplicates <- callsDF[callsDF$number %in% dupes,]
duplicates <- select(duplicates, 1:2)
duplicates <- unique(duplicates)
dupes <- as.vector(rownames(duplicates))
callsDF <- callsDF[!(rownames(callsDF) %in% dupes),]
duplicates
## number opened
## 55243 1155242 2017-09-21
## 55246 1155242 2017-09-22
## 55287 1155242 2017-09-23
## 55288 1155242 2017-09-24
Checks for the last/latest date in range, and subtracts 365 days. If less than 365 days’ data is supplied, then all data will be listed
lastDate <- range(callsDF$opened)[2]
aYearAgo <- lastDate - 365
callsDF <- callsDF[!(callsDF$opened < aYearAgo),]
range(callsDF$opened)
## [1] "2016-10-01" "2017-09-22"
dim(callsDF)
## [1] 55284 3
head(callsDF)
## number opened type
## 1 1100000 2016-10-01 Request
## 2 1100001 2016-10-01 Request
## 3 1100002 2016-10-01 Request
## 4 1100003 2016-10-01 Request
## 5 1100004 2016-10-01 Request
## 6 1100005 2016-10-01 Request
summary(callsDF$type)
## Change request Incident Request
## 457 30167 24660
Build a frequency / contingency table summarising calls
calls <- as.data.frame.matrix(table(callsDF$opened, callsDF$type))
calls$Total <- rowSums(calls)
dates <- rownames(calls)
calls$Opened <- dates
row.names(calls) <- 1:nrow(calls)
calls$Opened <- as.Date(calls$Opened, "%Y-%m-%d")
calls$Month <- month(calls$Opened, label = T)
calls <- rename(calls, Change = `Change request`)
calls$Day <- weekdays(calls$Opened)
calls$Day <- factor(calls$Day, levels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
calls$perHour <- round(calls$Total/24,0)
calls <- select(calls, c(Month, Opened, Day, Change, Incident,
Request, Total, perHour))
callLambdas <- select(calls, c(Day, perHour))
callLambdas <- group_by(callLambdas, Day)
callLambdas <- summarise(callLambdas, Lambda = mean(perHour))
callLambdas$Lambda <- round(callLambdas$Lambda, 0)
callLambdas$dayNum <- as.integer(factor(callLambdas$Day,
levels = c("Monday", "Tuesday",
"Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"),
ordered = T))
callLambdas <- arrange(callLambdas, dayNum)
callLambdas <- within(callLambdas, rm(dayNum))
head(callLambdas)
## # A tibble: 6 x 2
## Day Lambda
## <fctr> <dbl>
## 1 Monday 10
## 2 Tuesday 9
## 3 Wednesday 9
## 4 Thursday 8
## 5 Friday 6
## 6 Saturday 2
wkdays <- c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday")
mths <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
"Sep", "Oct", "Nov", "Dec")
callRates <- data.frame(row.names = mths)
for (m in 1:12) {
for (d in 1:7) {
lambdas <- calls %>%
filter(Month == mths[m]) %>%
filter(Day == wkdays[d]) %>%
select(perHour)
callRates[m,d] <- round(mean(lambdas$perHour),0)
}
}
colnames(callRates) <- wkdays
callRates
## Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## Jan 9 10 10 8 5 0 2
## Feb 11 8 9 8 6 1 2
## Mar 9 9 9 8 6 1 2
## Apr 8 9 8 5 6 1 2
## May 8 9 8 8 6 2 2
## Jun 9 8 9 8 7 2 3
## Jul 13 11 10 9 7 1 2
## Aug 12 11 9 9 8 1 1
## Sep 11 7 8 7 6 1 2
## Oct 9 8 8 7 7 2 3
## Nov 10 9 8 9 8 2 3
## Dec 7 7 6 7 5 2 3