Time Series for Daily Incident Ticket

This document describes how to use timeseries for better resource planning based on forecasted volume of incident tickets. There are several different ways in which Analytic can be useful for efficient service delivery and project/program management in IT services industry, such as defect forecasting, complexity forecasting, attrition forecasting etc.. This is just one of the example.

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(forecast)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: timeDate
## This is forecast 6.1

library(tseries)
library(TTR)

## Loading required package: xts
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last

p2 <- read.table(file="ticket.txt", sep="\t")
dim(p2)

## [1] 191   1

## Descriptive TS. look at the time series plot

p2ts <- ts(p2, frequency=7)  

plot.ts(p2ts, type = 'l')

## TS plot doesn't seem to indicate any trend or seasonality 
## Let's decompose it by using "additive" method

decom <- decompose(p2ts, type = "additive") 

plot(decom)

## decompose seems to confirm no-trend, no-seasonality 
## Lets see what we get from ETS model with bootstrap

fit <- ets(p2ts)
fc <- forecast(fit, h = 28, simulate = T, bootstrap = T,
               npaths = 5000)
plot(fc)

accuracy(fc)

##                      ME     RMSE      MAE MPE MAPE     MASE      ACF1
## Training set 0.05776093 6.493462 4.630829 NaN  Inf 0.875717 0.2078312

mean(fc$residuals)

## [1] 0.05776093

Box.test(fit$residuals, lag=20, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fit$residuals
## X-squared = 42.2106, df = 20, p-value = 0.002596

acf(fc$residuals, lag.max=20) #, type = c("covariance"))

x <- fc$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

shapiro.test(fc$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  fc$residuals
## W = 0.8756, p-value = 1.892e-11

## The above time series model doesn't look good enough as -
## mean(fc$residuals) isn't close to zero.
## Ljung-Box test gives p-value close to zero (should be GT 0.05), suggesting  
## that there is strong evidence of non-zero autocorrelations at lag 1 - 20.
## So, we should try other mothods
## Let's See how holt Winters looks like without trend and seasonality.

fchw <- HoltWinters(p2ts, beta = F, gamma = F)

plot(fchw)

fchw$SSE

## [1] 9416.805

## SSE is too high from above. So, let's try with trend and seasonality

fchw1  <- HoltWinters(p2ts) 
plot(fchw1)

fchw1$SSE

## [1] 8703.01

## Not much of a difference in SSE. Let's see how the forecaste looks

fchw2 <- forecast.HoltWinters(fchw1, h = 28)

mean(fchw2$residuals)

## [1] -0.5247765

accuracy(fchw2)

##                      ME     RMSE      MAE MPE MAPE      MASE      ACF1
## Training set -0.5247765 6.877425 4.711368 NaN  Inf 0.8890321 0.1705149

plot.forecast(fchw2)

acf(fchw2$residuals, lag.max = 20)

Box.test(fchw2$residuals, lag=10, type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fchw2$residuals
## X-squared = 13.3876, df = 10, p-value = 0.2028

shapiro.test(fchw2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  fchw2$residuals
## W = 0.8921, p-value = 2.813e-10

plot.ts(fchw2$residuals)

x <- NULL
x <- fc$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

## The above time series model does look better as -
## Ljung-Box test gives p-value GT 0.05, suggesting that there is little  
## evidence of non-zero autocorrelations in the forecast error at lag 1 - 20.
## Shapiro test also looks good as the p-value is quite close to zero.
## However, the residual mean is in negative (can be assumed to be zero as -ve)
## isn't a valid value in this case).
## So, we should try some other mothod to see if residual mean is close to zero 
## Let's See how Autoregressive Integrated Moving Average (ARIMA).ARIMA models  
## include irregular component of a TS that allows for non-zero autocorrelations
## in the regular component. 

## Let's see if the TS is stationary
adf.test(p2ts)

## Warning in adf.test(p2ts): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  p2ts
## Dickey-Fuller = -4.1683, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

## From above test, TS looks stationary as p-value < 0.05
## Let's find out model parameter for ARIMA and forecast usng the same

auto.arima(p2)

## Series: p2 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1  intercept
##       0.3340     6.5833
## s.e.  0.0681     0.7233
## 
## sigma^2 estimated as 44.56:  log likelihood=-633.67
## AIC=1273.34   AICc=1273.47   BIC=1283.09

p2tsArima <- arima(p2ts, order = c(1,0,0))
p2tsArima

## 
## Call:
## arima(x = p2ts, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.3340     6.5833
## s.e.  0.0681     0.7233
## 
## sigma^2 estimated as 44.56:  log likelihood = -633.67,  aic = 1273.34

fc_p2tsArima <- forecast.Arima(p2tsArima, h = 28)

plot.forecast(fc_p2tsArima)

mean(fc_p2tsArima$residuals)

## [1] 0.01348488

acf(fc_p2tsArima$residuals, lag.max = 20)

Box.test(fc_p2tsArima$residuals, lag = 20, type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fc_p2tsArima$residuals
## X-squared = 28.4148, df = 20, p-value = 0.09994

accuracy(fc_p2tsArima)

##                      ME     RMSE      MAE  MPE MAPE      MASE       ACF1
## Training set 0.01348488 6.675144 4.988872 -Inf  Inf 0.9434249 0.01537802

shapiro.test(fc_p2tsArima$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  fc_p2tsArima$residuals
## W = 0.8493, p-value = 8.737e-13

plot.ts(fc_p2tsArima$residuals)

x <- NULL
x <- fc_p2tsArima$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)

## The above time series model does look better than the previous one as -
## Ljung-Box test gives p-value GT 0.05, suggesting that there is little  
## evidence of non-zero autocorrelations in the forecast error at lag 1 - 20.
## Shapiro test also looks good as the p-value is quite close to zero.
## The residual mean is also close to zero.
## So statistically, the above ARIMA model seems to give the best prediction
## If we ignore the -ve mean residual of Holt Winters, then that model also look ## good. There is  need to run the model against future data and refine the same.
## But overall, this is an excellent starting point.

Time Series for Daily Incident Ticket

Ranjit Mishra

Sunday, September 06, 2015