Read sections 6.4 and 6.5 of Practical Time Series Forecasting with R.

Develop a forecast for the call volume for products 4 using ARIMA models. You should justify the criteria you choose for selecting your best model and the reasoning behind your model development.

What is the best forecast you have developed for the call volume? Did you use exponential smoothing, regression, or ARIMA models for this forecast?

library(readr)          # read in data table
library(tidyverse)      # data manipulation and visualization
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'purrr' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.1
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(lubridate)      # easily work with dates and times
## Warning: package 'lubridate' was built under R version 3.4.1
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(xts)            # Overarching time series forecasting package
## Warning: package 'xts' was built under R version 3.4.1
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(forecast)       # forecasting package
## Warning: package 'forecast' was built under R version 3.4.1
library(TTR)            # decomposing time series trends/components
## Warning: package 'TTR' was built under R version 3.4.1
library(zoo)            # helper package for working with time series data
library(ggfortify)      # visualize time series objects with ggplot
## 
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
## 
##     gglagplot
library(caTools)        # Split data into training and test sets
## Warning: package 'caTools' was built under R version 3.4.2
Call_Volumes <- read_csv("~/DAPT/Q3/Forecasting Methods/Call Volumes vs Nos Accounts.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   Date = col_character()
## )
## See spec(...) for full column specifications.
View(Call_Volumes)
Calls <- as.data.frame(Call_Volumes)

Index and format data into time series objects.

z <- read.zoo(Calls, index = 20, format = "%m/%d/%Y")

Call_Volume_Product4_ts <- xts(z$CallVolProd4)

plot(Call_Volume_Product4_ts, col="darkblue")

x <- Call_Volume_Product4_ts

Linear Regression Model for Forecasting

Step 1: Prepare dataframe for lm model. Step 2: Create training and test data sets.

x4.lm <- Calls[,19:20]
Calls$Date <- as.Date(Calls$Date,format="%m/%d/%Y")
str(x4.lm)
## 'data.frame':    73 obs. of  2 variables:
##  $ CallVolProd4: int  36534 43675 43644 35080 44229 39342 37673 24513 28547 35957 ...
##  $ Date        : chr  "11/1/2009" "11/8/2009" "11/15/2009" "11/22/2009" ...
set.seed(97)
split <- sample.split(x4.lm$CallVolProd4, SplitRatio = 0.80)

#get training and test data
call_4_train <- subset(x4.lm, split == TRUE)
call_4_test <- subset(x4.lm, split == FALSE)

Create Dummy Variables for Seasonality: Week, Month and Quarter

Calls$Week  <- factor(week(Calls$Date))
Calls$Quarter <- factor(quarter(Calls$Date))
Calls$Month <- factor(month(Calls$Date))

dummies <- model.matrix(~ 0 + Month + Quarter + Week, data = Calls)

call4.model <- data.frame(cbind(call_4_train$CallVolProd4, dummies))
## Warning in cbind(call_4_train$CallVolProd4, dummies): number of rows of
## result is not a multiple of vector length (arg 1)
str(call4.model)
## 'data.frame':    73 obs. of  67 variables:
##  $ V1      : num  36534 43675 35080 28547 35957 ...
##  $ Month1  : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ Month2  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month3  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month4  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month5  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month6  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month7  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month8  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month9  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month10 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month11 : num  1 1 1 1 1 0 0 0 0 0 ...
##  $ Month12 : num  0 0 0 0 0 1 1 1 1 0 ...
##  $ Quarter2: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Quarter3: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Quarter4: num  1 1 1 1 1 1 1 1 1 0 ...
##  $ Week2   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week3   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week4   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week5   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week6   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week7   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week8   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week9   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week10  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week11  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week12  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week13  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week14  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week15  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week16  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week17  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week18  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week19  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week20  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week21  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week22  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week23  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week24  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week25  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week26  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week27  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week28  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week29  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week30  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week31  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week32  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week33  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week34  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week35  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week36  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week37  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week38  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week39  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week40  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week41  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week42  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week43  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week44  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ Week45  : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ Week46  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ Week47  : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ Week48  : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ Week49  : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ Week50  : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ Week51  : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ Week52  : num  0 0 0 0 0 0 0 0 1 0 ...
call4.test <- data.frame(cbind(call_4_test$CallVolProd4, dummies))
## Warning in cbind(call_4_test$CallVolProd4, dummies): number of rows of
## result is not a multiple of vector length (arg 1)
str(call4.test)
## 'data.frame':    73 obs. of  67 variables:
##  $ V1      : num  43644 44229 39342 37673 24513 ...
##  $ Month1  : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ Month2  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month3  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month4  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month5  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month6  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month7  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month8  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month9  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month10 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Month11 : num  1 1 1 1 1 0 0 0 0 0 ...
##  $ Month12 : num  0 0 0 0 0 1 1 1 1 0 ...
##  $ Quarter2: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Quarter3: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Quarter4: num  1 1 1 1 1 1 1 1 1 0 ...
##  $ Week2   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week3   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week4   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week5   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week6   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week7   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week8   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week9   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week10  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week11  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week12  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week13  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week14  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week15  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week16  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week17  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week18  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week19  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week20  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week21  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week22  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week23  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week24  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week25  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week26  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week27  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week28  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week29  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week30  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week31  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week32  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week33  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week34  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week35  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week36  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week37  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week38  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week39  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week40  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week41  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week42  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week43  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Week44  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ Week45  : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ Week46  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ Week47  : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ Week48  : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ Week49  : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ Week50  : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ Week51  : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ Week52  : num  0 0 0 0 0 0 0 0 1 0 ...

Run Linear Regression Model (lm)

lm_CallVolume_Product4 <- lm(V1 ~ 0 + ., data = call4.model)

summary1 <- summary(lm_CallVolume_Product4)

summary1
## 
## Call:
## lm(formula = V1 ~ 0 + ., data = call4.model)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -15269  -1396      0   1396  15269 
## 
## Coefficients: (13 not defined because of singularities)
##          Estimate Std. Error t value Pr(>|t|)    
## Month1    35243.0     8122.4   4.339 0.000318 ***
## Month2    42947.0     8122.4   5.287 3.56e-05 ***
## Month3    33910.0    11486.8   2.952 0.007880 ** 
## Month4    24738.0    11486.8   2.154 0.043650 *  
## Month5    29930.0    11486.8   2.606 0.016920 *  
## Month6    22329.0    11486.8   1.944 0.066113 .  
## Month7    20254.0    11486.8   1.763 0.093133 .  
## Month8    15217.0    11486.8   1.325 0.200195    
## Month9    11650.0    11486.8   1.014 0.322596    
## Month10    7378.5    18162.2   0.406 0.688868    
## Month11   26630.5     8122.4   3.279 0.003756 ** 
## Month12   37683.0     8122.4   4.639 0.000158 ***
## Quarter2       NA         NA      NA       NA    
## Quarter3       NA         NA      NA       NA    
## Quarter4       NA         NA      NA       NA    
## Week2      8627.0    11486.8   0.751 0.461376    
## Week3     14225.5    11486.8   1.238 0.229898    
## Week4     11439.5    11486.8   0.996 0.331203    
## Week5      4523.0    11486.8   0.394 0.697927    
## Week6     -6827.5    11486.8  -0.594 0.558923    
## Week7     -5807.5    11486.8  -0.506 0.618677    
## Week8     -2550.5    11486.8  -0.222 0.826536    
## Week9          NA         NA      NA       NA    
## Week10     6381.5    14068.4   0.454 0.654996    
## Week11      452.0    14068.4   0.032 0.974688    
## Week12    -3353.5    14068.4  -0.238 0.814018    
## Week13         NA         NA      NA       NA    
## Week14    10133.0    16244.7   0.624 0.539828    
## Week15     8398.0    16244.7   0.517 0.610849    
## Week16     8572.0    16244.7   0.528 0.603528    
## Week17         NA         NA      NA       NA    
## Week18     1597.0    16244.7   0.098 0.922666    
## Week19    -1105.0    16244.7  -0.068 0.946444    
## Week20      967.0    16244.7   0.060 0.953123    
## Week21    -3302.0    16244.7  -0.203 0.840981    
## Week22         NA         NA      NA       NA    
## Week23     7762.0    16244.7   0.478 0.637962    
## Week24     5420.0    16244.7   0.334 0.742119    
## Week25     5255.0    16244.7   0.323 0.749683    
## Week26         NA         NA      NA       NA    
## Week27      877.0    16244.7   0.054 0.957481    
## Week28    -3113.0    16244.7  -0.192 0.849963    
## Week29     1083.0    16244.7   0.067 0.947508    
## Week30         NA         NA      NA       NA    
## Week31     3197.0    16244.7   0.197 0.845969    
## Week32     2857.0    16244.7   0.176 0.862163    
## Week33     2858.0    16244.7   0.176 0.862115    
## Week34     1402.0    16244.7   0.086 0.932082    
## Week35         NA         NA      NA       NA    
## Week36      958.0    16244.7   0.059 0.953559    
## Week37     3200.0    16244.7   0.197 0.845826    
## Week38     -794.0    16244.7  -0.049 0.961502    
## Week39         NA         NA      NA       NA    
## Week40     7916.5    21489.8   0.368 0.716457    
## Week41     7467.5    21489.8   0.347 0.731853    
## Week42     7729.5    21489.8   0.360 0.722854    
## Week43    10152.5    21489.8   0.472 0.641731    
## Week44     9903.5    14068.4   0.704 0.489576    
## Week45     3318.5    11486.8   0.289 0.775633    
## Week46     -345.5    11486.8  -0.030 0.976303    
## Week47    -3603.5    11486.8  -0.314 0.756991    
## Week48         NA         NA      NA       NA    
## Week49    -8565.0    11486.8  -0.746 0.464558    
## Week50     1185.0    11486.8   0.103 0.918862    
## Week51     4191.0    11486.8   0.365 0.719050    
## Week52         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11490 on 20 degrees of freedom
## Multiple R-squared:  0.9656, Adjusted R-squared:  0.8745 
## F-statistic:  10.6 on 53 and 20 DF,  p-value: 2.224e-07

Updated Final Model based on p values

Product4 <- lm(V1 ~ 0 + Month1 + Month2 + Month3 + Month4 + Month5 + Month6 + Month7 + Month11 + Month12, data = call4.model)

summary2 <- summary(Product4)

summary2
## 
## Call:
## lm(formula = V1 ~ 0 + Month1 + Month2 + Month3 + Month4 + Month5 + 
##     Month6 + Month7 + Month11 + Month12, data = call4.model)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -19371  -2933   1371  10856  19065 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## Month1     43006       3286  13.089  < 2e-16 ***
## Month2     39151       3674  10.658 8.08e-16 ***
## Month3     34904       3927   8.888 8.88e-13 ***
## Month4     31514       5195   6.066 7.81e-08 ***
## Month5     29561       4647   6.362 2.41e-08 ***
## Month6     26938       5195   5.185 2.36e-06 ***
## Month7     19966       5195   3.843 0.000282 ***
## Month11    27591       3463   7.966 3.70e-11 ***
## Month12    36886       3674  10.041 8.98e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10390 on 64 degrees of freedom
## Multiple R-squared:   0.91,  Adjusted R-squared:  0.8973 
## F-statistic:  71.9 on 9 and 64 DF,  p-value: < 2.2e-16
predict <- predict(Product4, env = call_4_test, se.fit = TRUE)

View(predict)

Call Volume 4 Forecast with Exponential Smoothing

ses(as.numeric(x), alpha=0.2, initial="simple", h=3)
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 74       16743.06 9724.470 23761.65 6009.052 27477.07
## 75       16743.06 9585.474 23900.65 5796.477 27689.64
## 76       16743.06 9449.127 24036.99 5587.952 27898.17
fit1 <- ses(as.numeric(x), alpha=0.2, initial="simple", h=3)
fit2 <- ses(as.numeric(x), alpha=0.6, initial="simple", h=3)
fit3 <- ses(as.numeric(x), h=3)
plot(fit1,ylab="Product 4 Call Volume",
  xlab="Months", main="", fcol="white", type="o")
lines(fitted(fit1), col="blue", type="o")
lines(fitted(fit2), col="red", type="o")
lines(fitted(fit3), col="green", type="o")
lines(fit1$mean, col="blue", type="o")
lines(fit2$mean, col="red", type="o")
lines(fit3$mean, col="green", type="o")
legend("topleft",lty=1, col=c(1,"blue","red","green"), 
  c("data", expression(alpha == 0.2), expression(alpha == 0.6),
  expression(alpha == 0.89)),pch=1)

Final Smoothing Model

fit1 <- ets(as.numeric(x), model = "ZAZ")
summary(fit1)
## ETS(M,A,N) 
## 
## Call:
##  ets(y = as.numeric(x), model = "ZAZ") 
## 
##   Smoothing parameters:
##     alpha = 0.699 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 44506.8433 
##     b = -51.872 
## 
##   sigma:  0.1348
## 
##      AIC     AICc      BIC 
## 1511.907 1512.802 1523.359 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -456.9208 4403.507 3027.768 -2.862957 10.99699 1.004526
##                   ACF1
## Training set 0.1718114
options(repr.plot.width=12, repr.plot.height=7)
plot(fit1)
lines(fitted(fit1), col="red", lty=2)

plot(forecast(fit1,h=5),
  ylab="Forecasted Call Volumme: Product 4",
  xlab="Months", main="", fcol="blue", type="o")

## ARIMA Forecast for Call Volume Product #4

require(forecast)

pred <- Arima(x,c(0,1,0), seasonal=list(order=c(1,1,0), period=12), include.mean = FALSE)
plot(forecast(pred, h=10), include = 150, xlab = "Index",ylab = "Call Volume",lwd = 2,
     col = 'red',main="Forecasting using ARIMA Model with Seasonality")