inst— title: ‘Forecasting Assignment #2’ author: “Jerome Dixon” date: “September 6, 2017” output: html_document —
Develop a forecast for the call volume for products 3 and 4 using leading indicators and/or dummy time and season variables. You should justify the criteria you choose for selecting your best model and the reasoning behind your model development.
library(readr) # read in data table
library(tidyverse) # data manipulation and visualization
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'purrr' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.1
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(lubridate) # easily work with dates and times
## Warning: package 'lubridate' was built under R version 3.4.1
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(xts) # Overarching time series forecasting package
## Warning: package 'xts' was built under R version 3.4.1
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(forecast) # forecasting package
## Warning: package 'forecast' was built under R version 3.4.1
library(TTR) # decomposing time series trends/components
## Warning: package 'TTR' was built under R version 3.4.1
library(zoo) # helper package for working with time series data
library(ggfortify) # visualize time series objects with ggplot
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
##
## gglagplot
Call_Volumes_vs_Nos_Accounts <- read_csv("~/DAPT/Q3/Forecasting Methods/Call Volumes vs Nos Accounts.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## Date = col_character()
## )
## See spec(...) for full column specifications.
View(Call_Volumes_vs_Nos_Accounts)
Calls <- as.data.frame(Call_Volumes_vs_Nos_Accounts)
Index and format data into time series objects.
as.Date(as.character(Calls$DATE),format="%Y%m%d")
## [1] "Date of length 0"
z <- read.zoo(Calls, index = 20, format = "%m/%d/%Y")
x <- xts(z$CallVolProd3)
x2 <- xts(z$CallVolProd4)
View(z)
View(x)
View(x2)
Visualize data to understand what is going on.
plot(x, col="darkblue", ylab="Call Volume Product #3")
Add Dummy Variables and Seasonality
library(lubridate)
Calls$Date <- mdy(Calls$Date)
## Adding Week
Calls$Week <- as.factor(strftime(Calls$Date, format ="%V"))
## Adding Month
Calls$Month <- factor(month(Calls$Date))
## Pulling out Date for plotting purposes
pDate <- as.Date(Calls$Date)
## Dummy Variables
Month.dummies <- model.matrix(~ 0 + Month, data = Calls)
Week.dummies <- model.matrix(~ 0 + Week, data = Calls)
colnames(Month.dummies) <- gsub("Month", "", colnames(Month.dummies))
colnames(Week.dummies) <- gsub("Week", "", colnames(Week.dummies))
dv <- as.data.frame(cbind(Month.dummies[,-12], Week.dummies[, -53]))
dv3 <- cbind(x,dv)
## Warning in cbind(x, dv): number of rows of result is not a multiple of
## vector length (arg 2)
dv4 <- cbind(x2,dv)
## Warning in cbind(x2, dv): number of rows of result is not a multiple of
## vector length (arg 2)
str(dv3)
## List of 146
## $ : int 63408
## $ : int 59471
## $ : int 60317
## $ : int 54997
## $ : int 68149
## $ : int 70927
## $ : int 70397
## $ : int 53244
## $ : int 53562
## $ : int 62253
## $ : int 66824
## $ : int 66891
## $ : int 64820
## $ : int 66797
## $ : int 62464
## $ : int 64815
## $ : int 66241
## $ : int 69790
## $ : int 62319
## $ : int 60715
## $ : int 64483
## $ : int 62861
## $ : int 61335
## $ : int 63242
## $ : int 65216
## $ : int 66229
## $ : int 72536
## $ : int 69956
## $ : int 70034
## $ : int 69142
## $ : int 66333
## $ : int 62948
## $ : int 71501
## $ : int 62779
## $ : int 71892
## $ : int 68323
## $ : int 67963
## $ : int 61336
## $ : int 66425
## $ : int 68948
## $ : int 65443
## $ : int 65907
## $ : int 59723
## $ : int 62315
## $ : int 54050
## $ : int 57969
## $ : int 73747
## $ : int 72013
## $ : int 73560
## $ : int 75002
## $ : int 84096
## $ : int 88236
## $ : int 95022
## $ : int 89457
## $ : int 95785
## $ : int 82841
## $ : int 107940
## $ : int 103384
## $ : int 102892
## $ : int 83473
## $ : int 84399
## $ : int 101109
## $ : int 95070
## $ : int 92645
## $ : int 92411
## $ : int 93386
## $ : int 93158
## $ : int 96814
## $ : int 94892
## $ : int 101500
## $ : int 97388
## $ : int 95490
## $ : int 94082
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 1 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 1 1 1 1 1 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## - attr(*, "dim")= int [1:2] 73 2
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "x" "dv"
str(dv4)
## List of 146
## $ : int 36534
## $ : int 43675
## $ : int 43644
## $ : int 35080
## $ : int 44229
## $ : int 39342
## $ : int 37673
## $ : int 24513
## $ : int 28547
## $ : int 35957
## $ : int 40721
## $ : int 41202
## $ : int 40073
## $ : int 40286
## $ : int 41939
## $ : int 51783
## $ : int 58216
## $ : int 52163
## $ : int 39459
## $ : int 31953
## $ : int 32340
## $ : int 29010
## $ : int 27678
## $ : int 28420
## $ : int 29265
## $ : int 29160
## $ : int 31414
## $ : int 33910
## $ : int 34871
## $ : int 33136
## $ : int 33310
## $ : int 24738
## $ : int 33108
## $ : int 26632
## $ : int 31527
## $ : int 28825
## $ : int 30897
## $ : int 26628
## $ : int 29930
## $ : int 30091
## $ : int 27749
## $ : int 27584
## $ : int 22329
## $ : int 21131
## $ : int 17141
## $ : int 21337
## $ : int 20254
## $ : int 18414
## $ : int 18074
## $ : int 18075
## $ : int 16466
## $ : int 14994
## $ : int 16619
## $ : int 14306
## $ : int 15217
## $ : int 12608
## $ : int 16389
## $ : int 14850
## $ : int 14635
## $ : int 10856
## $ : int 11650
## $ : int 15295
## $ : int 14846
## $ : int 15108
## $ : int 17531
## $ : int 17282
## $ : int 16223
## $ : int 17490
## $ : int 16246
## $ : int 17507
## $ : int 17304
## $ : int 16463
## $ : int 17515
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 1 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 1 1 1 1 1 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## $ : num [1:73] 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## - attr(*, "dim")= int [1:2] 73 2
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "x" "dv"
Train and Test Set
nTotal <- length(x)
tValid <- 20
tTrain <- nTotal - tValid
xTrain <- x[1:tTrain, ]
yTrain <- x[1:tTrain]
xValid <- x[(tTrain + 1):nTotal, ]
yValid <- x[(tTrain + 1):nTotal]
plot(x2, col="darkblue", ylab="Call Volume Product #4")
require(forecast)
pred <- Arima(x,c(1,1,0), seasonal=list(order=c(1,1,0), period=12), include.mean = FALSE)
plot(forecast(pred, h=10), include = 150, xlab = "Bi-Monthly",ylab = "Call Volume",lwd = 2,
col = 'red',main="Forecasting using ARIMA Model with Seasonality")
#lines(xValid)
summary(pred)
## Series: x
## ARIMA(1,1,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## -0.2824 -0.5732
## s.e. 0.1278 0.1139
##
## sigma^2 estimated as 62331780: log likelihood=-624.99
## AIC=1255.98 AICc=1256.41 BIC=1262.26
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 236.7129 7037.321 5094.116 0.06774808 6.640522 0.06852125
## ACF1
## Training set -0.04815483
require(forecast)
pred2 <- auto.arima(xTrain, approximation = FALSE, trace = FALSE)
plot(forecast(pred2, h=10), include = 150, xlab = "Bi-Monthly",ylab = "Call Volume",lwd = 2,
col = 'red',main="Forecasting using ARIMA Model - Auto")
#lines(xValid)
summary(pred2)
## Series: xTrain
## ARIMA(0,1,0)
##
## sigma^2 estimated as 32323831: log likelihood=-523.36
## AIC=1048.72 AICc=1048.8 BIC=1050.67
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 597.6869 5631.514 4152.366 0.3842823 6.329305 0.06236258
## ACF1
## Training set -0.1497645