This document describes how to use timeseries for better resource planning based on forecasted volume of incident tickets. There are several different ways in which Analytic can be useful for efficient service delivery and project/program management in IT services industry, such as defect forecasting, complexity forecasting, attrition forecasting etc.. This is just one of the example.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: timeDate
## This is forecast 6.1
library(tseries)
library(TTR)
## Loading required package: xts
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
p2 <- read.table(file="ticket.txt", sep="\t")
dim(p2)
## [1] 191 1
## Descriptive TS. look at the time series plot
p2ts <- ts(p2, frequency=7)
plot.ts(p2ts, type = 'l')
## TS plot doesn't seem to indicate any trend or seasonality
## Let's decompose it by using "additive" method
decom <- decompose(p2ts, type = "additive")
plot(decom)
## decompose seems to confirm no-trend, no-seasonality
## Lets see what we get from ETS model with bootstrap
fit <- ets(p2ts)
fc <- forecast(fit, h = 28, simulate = T, bootstrap = T,
npaths = 5000)
plot(fc)
accuracy(fc)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.05776093 6.493462 4.630829 NaN Inf 0.875717 0.2078312
mean(fc$residuals)
## [1] 0.05776093
Box.test(fit$residuals, lag=20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: fit$residuals
## X-squared = 42.2106, df = 20, p-value = 0.002596
acf(fc$residuals, lag.max=20) #, type = c("covariance"))
x <- fc$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)
shapiro.test(fc$residuals)
##
## Shapiro-Wilk normality test
##
## data: fc$residuals
## W = 0.8756, p-value = 1.892e-11
## The above time series model doesn't look good enough as -
## mean(fc$residuals) isn't close to zero.
## Ljung-Box test gives p-value close to zero (should be GT 0.05), suggesting
## that there is strong evidence of non-zero autocorrelations at lag 1 - 20.
## So, we should try other mothods
## Let's See how holt Winters looks like without trend and seasonality.
fchw <- HoltWinters(p2ts, beta = F, gamma = F)
plot(fchw)
fchw$SSE
## [1] 9416.805
## SSE is too high from above. So, let's try with trend and seasonality
fchw1 <- HoltWinters(p2ts)
plot(fchw1)
fchw1$SSE
## [1] 8703.01
## Not much of a difference in SSE. Let's see how the forecaste looks
fchw2 <- forecast.HoltWinters(fchw1, h = 28)
mean(fchw2$residuals)
## [1] -0.5247765
accuracy(fchw2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.5247765 6.877425 4.711368 NaN Inf 0.8890321 0.1705149
plot.forecast(fchw2)
acf(fchw2$residuals, lag.max = 20)
Box.test(fchw2$residuals, lag=10, type="Ljung-Box")
##
## Box-Ljung test
##
## data: fchw2$residuals
## X-squared = 13.3876, df = 10, p-value = 0.2028
shapiro.test(fchw2$residuals)
##
## Shapiro-Wilk normality test
##
## data: fchw2$residuals
## W = 0.8921, p-value = 2.813e-10
plot.ts(fchw2$residuals)
x <- NULL
x <- fc$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)
## The above time series model does look better as -
## Ljung-Box test gives p-value GT 0.05, suggesting that there is little
## evidence of non-zero autocorrelations in the forecast error at lag 1 - 20.
## Shapiro test also looks good as the p-value is quite close to zero.
## However, the residual mean is in negative (can be assumed to be zero as -ve)
## isn't a valid value in this case).
## So, we should try some other mothod to see if residual mean is close to zero
## Let's See how Autoregressive Integrated Moving Average (ARIMA).ARIMA models
## include irregular component of a TS that allows for non-zero autocorrelations
## in the regular component.
## Let's see if the TS is stationary
adf.test(p2ts)
## Warning in adf.test(p2ts): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: p2ts
## Dickey-Fuller = -4.1683, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
## From above test, TS looks stationary as p-value < 0.05
## Let's find out model parameter for ARIMA and forecast usng the same
auto.arima(p2)
## Series: p2
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 intercept
## 0.3340 6.5833
## s.e. 0.0681 0.7233
##
## sigma^2 estimated as 44.56: log likelihood=-633.67
## AIC=1273.34 AICc=1273.47 BIC=1283.09
p2tsArima <- arima(p2ts, order = c(1,0,0))
p2tsArima
##
## Call:
## arima(x = p2ts, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.3340 6.5833
## s.e. 0.0681 0.7233
##
## sigma^2 estimated as 44.56: log likelihood = -633.67, aic = 1273.34
fc_p2tsArima <- forecast.Arima(p2tsArima, h = 28)
plot.forecast(fc_p2tsArima)
mean(fc_p2tsArima$residuals)
## [1] 0.01348488
acf(fc_p2tsArima$residuals, lag.max = 20)
Box.test(fc_p2tsArima$residuals, lag = 20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: fc_p2tsArima$residuals
## X-squared = 28.4148, df = 20, p-value = 0.09994
accuracy(fc_p2tsArima)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.01348488 6.675144 4.988872 -Inf Inf 0.9434249 0.01537802
shapiro.test(fc_p2tsArima$residuals)
##
## Shapiro-Wilk normality test
##
## data: fc_p2tsArima$residuals
## W = 0.8493, p-value = 8.737e-13
plot.ts(fc_p2tsArima$residuals)
x <- NULL
x <- fc_p2tsArima$residuals
h <- hist(x, breaks=40, col="red", main = "Histogram of Residuals")
xfit<-seq(min(x),max(x),length=20)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
lines(xfit, yfit, col="blue", lwd=2)
## The above time series model does look better than the previous one as -
## Ljung-Box test gives p-value GT 0.05, suggesting that there is little
## evidence of non-zero autocorrelations in the forecast error at lag 1 - 20.
## Shapiro test also looks good as the p-value is quite close to zero.
## The residual mean is also close to zero.
## So statistically, the above ARIMA model seems to give the best prediction
## If we ignore the -ve mean residual of Holt Winters, then that model also look ## good. There is need to run the model against future data and refine the same.
## But overall, this is an excellent starting point.