inst— title: ‘Forecasting Assignment #2’ author: “Jerome Dixon” date: “September 6, 2017” output: html_document —

Develop a forecast for the call volume for products 3 and 4 using leading indicators and/or dummy time and season variables. You should justify the criteria you choose for selecting your best model and the reasoning behind your model development.

library(readr)          # read in data table
library(tidyverse)      # data manipulation and visualization
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'purrr' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.1
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(lubridate)      # easily work with dates and times
## Warning: package 'lubridate' was built under R version 3.4.1
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(xts)            # Overarching time series forecasting package
## Warning: package 'xts' was built under R version 3.4.1
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(forecast)       # forecasting package
## Warning: package 'forecast' was built under R version 3.4.1
library(TTR)            # decomposing time series trends/components
## Warning: package 'TTR' was built under R version 3.4.1
library(zoo)            # helper package for working with time series data
library(ggfortify)      # visualize time series objects with ggplot
## 
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
## 
##     gglagplot
Call_Volumes_vs_Nos_Accounts <- read_csv("~/DAPT/Q3/Forecasting Methods/Call Volumes vs Nos Accounts.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   Date = col_character()
## )
## See spec(...) for full column specifications.
View(Call_Volumes_vs_Nos_Accounts)
Calls <- as.data.frame(Call_Volumes_vs_Nos_Accounts)

Index and format data into time series objects.

as.Date(as.character(Calls$DATE),format="%Y%m%d")
## [1] "Date of length 0"
z <- read.zoo(Calls, index = 20, format = "%m/%d/%Y")
x <- xts(z$CallVolProd3)
x2 <- xts(z$CallVolProd4)
View(z)
View(x)
View(x2)

Visualize data to understand what is going on.

plot(x, col="darkblue", ylab="Call Volume Product #3")

OBSERVATIONS:

  1. Something causes a systemic shift October 2010.
  2. By March 2011 system appears to be back at equilibrium.
  3. Variability appears constant and similar to original variability after systemic shift occurred.
  4. No Visibe Seasonality. No Multiplication. Additive step…positive.

Add Dummy Variables and Seasonality

library(lubridate)
Calls$Date <- mdy(Calls$Date)

## Adding Week
Calls$Week <- as.factor(strftime(Calls$Date, format ="%V"))

## Adding Month
Calls$Month <- factor(month(Calls$Date))

## Pulling out Date for plotting purposes
pDate <- as.Date(Calls$Date)

## Dummy Variables
Month.dummies <- model.matrix(~ 0 + Month, data = Calls)
Week.dummies <- model.matrix(~ 0 + Week, data = Calls)

colnames(Month.dummies) <- gsub("Month", "", colnames(Month.dummies))
colnames(Week.dummies) <- gsub("Week", "", colnames(Week.dummies))

dv <- as.data.frame(cbind(Month.dummies[,-12], Week.dummies[, -53]))

dv3 <- cbind(x,dv)
## Warning in cbind(x, dv): number of rows of result is not a multiple of
## vector length (arg 2)
dv4 <- cbind(x2,dv)
## Warning in cbind(x2, dv): number of rows of result is not a multiple of
## vector length (arg 2)
str(dv3)
## List of 146
##  $ : int 63408
##  $ : int 59471
##  $ : int 60317
##  $ : int 54997
##  $ : int 68149
##  $ : int 70927
##  $ : int 70397
##  $ : int 53244
##  $ : int 53562
##  $ : int 62253
##  $ : int 66824
##  $ : int 66891
##  $ : int 64820
##  $ : int 66797
##  $ : int 62464
##  $ : int 64815
##  $ : int 66241
##  $ : int 69790
##  $ : int 62319
##  $ : int 60715
##  $ : int 64483
##  $ : int 62861
##  $ : int 61335
##  $ : int 63242
##  $ : int 65216
##  $ : int 66229
##  $ : int 72536
##  $ : int 69956
##  $ : int 70034
##  $ : int 69142
##  $ : int 66333
##  $ : int 62948
##  $ : int 71501
##  $ : int 62779
##  $ : int 71892
##  $ : int 68323
##  $ : int 67963
##  $ : int 61336
##  $ : int 66425
##  $ : int 68948
##  $ : int 65443
##  $ : int 65907
##  $ : int 59723
##  $ : int 62315
##  $ : int 54050
##  $ : int 57969
##  $ : int 73747
##  $ : int 72013
##  $ : int 73560
##  $ : int 75002
##  $ : int 84096
##  $ : int 88236
##  $ : int 95022
##  $ : int 89457
##  $ : int 95785
##  $ : int 82841
##  $ : int 107940
##  $ : int 103384
##  $ : int 102892
##  $ : int 83473
##  $ : int 84399
##  $ : int 101109
##  $ : int 95070
##  $ : int 92645
##  $ : int 92411
##  $ : int 93386
##  $ : int 93158
##  $ : int 96814
##  $ : int 94892
##  $ : int 101500
##  $ : int 97388
##  $ : int 95490
##  $ : int 94082
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 1 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 1 1 1 1 1 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
##  - attr(*, "dim")= int [1:2] 73 2
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "x" "dv"
str(dv4)
## List of 146
##  $ : int 36534
##  $ : int 43675
##  $ : int 43644
##  $ : int 35080
##  $ : int 44229
##  $ : int 39342
##  $ : int 37673
##  $ : int 24513
##  $ : int 28547
##  $ : int 35957
##  $ : int 40721
##  $ : int 41202
##  $ : int 40073
##  $ : int 40286
##  $ : int 41939
##  $ : int 51783
##  $ : int 58216
##  $ : int 52163
##  $ : int 39459
##  $ : int 31953
##  $ : int 32340
##  $ : int 29010
##  $ : int 27678
##  $ : int 28420
##  $ : int 29265
##  $ : int 29160
##  $ : int 31414
##  $ : int 33910
##  $ : int 34871
##  $ : int 33136
##  $ : int 33310
##  $ : int 24738
##  $ : int 33108
##  $ : int 26632
##  $ : int 31527
##  $ : int 28825
##  $ : int 30897
##  $ : int 26628
##  $ : int 29930
##  $ : int 30091
##  $ : int 27749
##  $ : int 27584
##  $ : int 22329
##  $ : int 21131
##  $ : int 17141
##  $ : int 21337
##  $ : int 20254
##  $ : int 18414
##  $ : int 18074
##  $ : int 18075
##  $ : int 16466
##  $ : int 14994
##  $ : int 16619
##  $ : int 14306
##  $ : int 15217
##  $ : int 12608
##  $ : int 16389
##  $ : int 14850
##  $ : int 14635
##  $ : int 10856
##  $ : int 11650
##  $ : int 15295
##  $ : int 14846
##  $ : int 15108
##  $ : int 17531
##  $ : int 17282
##  $ : int 16223
##  $ : int 17490
##  $ : int 16246
##  $ : int 17507
##  $ : int 17304
##  $ : int 16463
##  $ : int 17515
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 1 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 1 1 1 1 1 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
##  - attr(*, "dim")= int [1:2] 73 2
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "x" "dv"

Train and Test Set

nTotal <- length(x)

tValid <- 20
tTrain <- nTotal - tValid

xTrain <- x[1:tTrain, ]
yTrain <- x[1:tTrain]

xValid <- x[(tTrain + 1):nTotal, ]
yValid <- x[(tTrain + 1):nTotal]
plot(x2, col="darkblue", ylab="Call Volume Product #4")

OBSERVATIONS:

  1. Big spike mid February 2010.
  2. After spike gradual decrease until December 2010.
  3. After January 2011 system appears to be at equilibrium.
  4. No Seasonality. No Multiplication. Additive step but negative.

Choose best forecasting model.

require(forecast)

pred <- Arima(x,c(1,1,0), seasonal=list(order=c(1,1,0), period=12), include.mean = FALSE)
plot(forecast(pred, h=10), include = 150, xlab = "Bi-Monthly",ylab = "Call Volume",lwd = 2,
     col = 'red',main="Forecasting using ARIMA Model with Seasonality")

#lines(xValid)

Test Set

summary(pred)
## Series: x 
## ARIMA(1,1,0)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1     sar1
##       -0.2824  -0.5732
## s.e.   0.1278   0.1139
## 
## sigma^2 estimated as 62331780:  log likelihood=-624.99
## AIC=1255.98   AICc=1256.41   BIC=1262.26
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE       MASE
## Training set 236.7129 7037.321 5094.116 0.06774808 6.640522 0.06852125
##                     ACF1
## Training set -0.04815483
require(forecast)

pred2 <- auto.arima(xTrain, approximation = FALSE, trace = FALSE)
plot(forecast(pred2, h=10), include = 150, xlab = "Bi-Monthly",ylab = "Call Volume",lwd = 2,
     col = 'red',main="Forecasting using ARIMA Model - Auto")

#lines(xValid)
summary(pred2)
## Series: xTrain 
## ARIMA(0,1,0)                    
## 
## sigma^2 estimated as 32323831:  log likelihood=-523.36
## AIC=1048.72   AICc=1048.8   BIC=1050.67
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE       MASE
## Training set 597.6869 5631.514 4152.366 0.3842823 6.329305 0.06236258
##                    ACF1
## Training set -0.1497645