Data Preparation

Library Load

library(dplyr)
library(data.table)
library(ggplot2)
library(car)
library(lubridate)

Data Load

  1. Gather list of files in specified folder;
  2. Iterate list and build data frame.
callFiles <- list.files("C:/R/Datasets/clients/", full.names = TRUE)
callsDF <- data.frame()

for (i in 1:length(callFiles)) {
    callsDF <- rbind(callsDF, read.csv(callFiles[i], stringsAsFactors = T))
}

Data transformations

  1. Rename columns;
  2. Transform dates;
  3. Sort by date.
callsDF <- rename(callsDF, type = sys_class_name, opened = opened_at)
callsDF$opened <- as.Date(callsDF$opened, "%Y/%m/%d")
callsDF <- arrange(callsDF, opened)

Remove duplicate reference numbers

  1. Remove identical rows;
  2. Identify duplicate reference numbers;
  3. Remove duplicates.
callsDF <- unique(callsDF)
ref <- table(callsDF$number)
dupes <- names(ref[ref > 1])
duplicates <- callsDF[callsDF$number %in% dupes,]
duplicates <- select(duplicates, 1:2)
duplicates <- unique(duplicates)
dupes <- as.vector(rownames(duplicates))
callsDF <- callsDF[!(rownames(callsDF) %in% dupes),]
duplicates
##        number     opened
## 55243 1155242 2017-09-21
## 55246 1155242 2017-09-22
## 55287 1155242 2017-09-23
## 55288 1155242 2017-09-24

Remove all but the last 12 months’ data

Checks for the last/latest date in range, and subtracts 365 days. If less than 365 days’ data is supplied, then all data will be listed

lastDate <- range(callsDF$opened)[2]
aYearAgo <- lastDate - 365
callsDF <- callsDF[!(callsDF$opened < aYearAgo),]
range(callsDF$opened)
## [1] "2016-10-01" "2017-09-22"
dim(callsDF)
## [1] 55284     3
head(callsDF)
##    number     opened    type
## 1 1100000 2016-10-01 Request
## 2 1100001 2016-10-01 Request
## 3 1100002 2016-10-01 Request
## 4 1100003 2016-10-01 Request
## 5 1100004 2016-10-01 Request
## 6 1100005 2016-10-01 Request
summary(callsDF$type)
## Change request       Incident        Request 
##            457          30167          24660

Extract Data

Build calls frequency table

Build a frequency / contingency table summarising calls

calls <- as.data.frame.matrix(table(callsDF$opened, callsDF$type))
calls$Total <- rowSums(calls)
dates <- rownames(calls)
calls$Opened <- dates
row.names(calls) <- 1:nrow(calls)
calls$Opened <- as.Date(calls$Opened, "%Y-%m-%d")
calls$Month <- month(calls$Opened, label = T)
calls <- rename(calls, Change = `Change request`)
calls$Day <- weekdays(calls$Opened)
calls$Day <- factor(calls$Day, levels = c("Monday", "Tuesday", "Wednesday",
                                          "Thursday", "Friday", "Saturday", "Sunday"))

Calculate perHour call Rate (Poisson Lambda)

calls$perHour <- round(calls$Total/24,0)

Create calls dataframe

calls <- select(calls, c(Month, Opened, Day, Change, Incident,
                         Request, Total, perHour))

Daily call Lambdas (for all months)

callLambdas <- select(calls, c(Day, perHour))
callLambdas <- group_by(callLambdas, Day)
callLambdas <- summarise(callLambdas, Lambda = mean(perHour))
callLambdas$Lambda <- round(callLambdas$Lambda, 0)
callLambdas$dayNum <- as.integer(factor(callLambdas$Day,
                                        levels = c("Monday", "Tuesday",
                                                   "Wednesday", "Thursday",
                                                   "Friday", "Saturday", "Sunday"),
                                        ordered = T))
callLambdas <- arrange(callLambdas, dayNum)
callLambdas <- within(callLambdas, rm(dayNum))
head(callLambdas)
## # A tibble: 6 x 2
##         Day Lambda
##      <fctr>  <dbl>
## 1    Monday     10
## 2   Tuesday      9
## 3 Wednesday      9
## 4  Thursday      8
## 5    Friday      6
## 6  Saturday      2

Generate Lambdas for month and day

wkdays <- c("Monday", "Tuesday", "Wednesday",
            "Thursday", "Friday", "Saturday", "Sunday")
mths <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
          "Sep", "Oct", "Nov", "Dec")
callRates <- data.frame(row.names = mths)


for (m in 1:12) {
    for (d in 1:7) {
        lambdas <- calls %>%
            filter(Month == mths[m]) %>%
            filter(Day == wkdays[d]) %>%
            select(perHour)
        callRates[m,d] <- round(mean(lambdas$perHour),0)
    }
}

colnames(callRates) <- wkdays
callRates
##     Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## Jan      9      10        10        8      5        0      2
## Feb     11       8         9        8      6        1      2
## Mar      9       9         9        8      6        1      2
## Apr      8       9         8        5      6        1      2
## May      8       9         8        8      6        2      2
## Jun      9       8         9        8      7        2      3
## Jul     13      11        10        9      7        1      2
## Aug     12      11         9        9      8        1      1
## Sep     11       7         8        7      6        1      2
## Oct      9       8         8        7      7        2      3
## Nov     10       9         8        9      8        2      3
## Dec      7       7         6        7      5        2      3

Analysis

Total calls per month

Total call volumes per month by weekday

Heatmap of Call Rates