Develop a forecast for the call volume for products 4 using ARIMA models. You should justify the criteria you choose for selecting your best model and the reasoning behind your model development.
What is the best forecast you have developed for the call volume? Did you use exponential smoothing, regression, or ARIMA models for this forecast?
library(readr) # read in data table
library(tidyverse) # data manipulation and visualization
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'purrr' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.1
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(lubridate) # easily work with dates and times
## Warning: package 'lubridate' was built under R version 3.4.1
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(xts) # Overarching time series forecasting package
## Warning: package 'xts' was built under R version 3.4.1
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(forecast) # forecasting package
## Warning: package 'forecast' was built under R version 3.4.1
library(TTR) # decomposing time series trends/components
## Warning: package 'TTR' was built under R version 3.4.1
library(zoo) # helper package for working with time series data
library(ggfortify) # visualize time series objects with ggplot
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
##
## gglagplot
library(caTools) # Split data into training and test sets
## Warning: package 'caTools' was built under R version 3.4.2
Call_Volumes <- read_csv("~/DAPT/Q3/Forecasting Methods/Call Volumes vs Nos Accounts.csv")
## Parsed with column specification:
## cols(
## .default = col_integer(),
## Date = col_character()
## )
## See spec(...) for full column specifications.
View(Call_Volumes)
Calls <- as.data.frame(Call_Volumes)
Index and format data into time series objects.
z <- read.zoo(Calls, index = 20, format = "%m/%d/%Y")
Call_Volume_Product4_ts <- xts(z$CallVolProd4)
plot(Call_Volume_Product4_ts, col="darkblue")
x <- Call_Volume_Product4_ts
Step 1: Prepare dataframe for lm model. Step 2: Create training and test data sets.
x4.lm <- Calls[,19:20]
Calls$Date <- as.Date(Calls$Date,format="%m/%d/%Y")
str(x4.lm)
## 'data.frame': 73 obs. of 2 variables:
## $ CallVolProd4: int 36534 43675 43644 35080 44229 39342 37673 24513 28547 35957 ...
## $ Date : chr "11/1/2009" "11/8/2009" "11/15/2009" "11/22/2009" ...
set.seed(97)
split <- sample.split(x4.lm$CallVolProd4, SplitRatio = 0.80)
#get training and test data
call_4_train <- subset(x4.lm, split == TRUE)
call_4_test <- subset(x4.lm, split == FALSE)
Calls$Week <- factor(week(Calls$Date))
Calls$Quarter <- factor(quarter(Calls$Date))
Calls$Month <- factor(month(Calls$Date))
dummies <- model.matrix(~ 0 + Month + Quarter + Week, data = Calls)
call4.model <- data.frame(cbind(call_4_train$CallVolProd4, dummies))
## Warning in cbind(call_4_train$CallVolProd4, dummies): number of rows of
## result is not a multiple of vector length (arg 1)
str(call4.model)
## 'data.frame': 73 obs. of 67 variables:
## $ V1 : num 36534 43675 35080 28547 35957 ...
## $ Month1 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ Month2 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month6 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month7 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month9 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month10 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month11 : num 1 1 1 1 1 0 0 0 0 0 ...
## $ Month12 : num 0 0 0 0 0 1 1 1 1 0 ...
## $ Quarter2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Quarter3: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Quarter4: num 1 1 1 1 1 1 1 1 1 0 ...
## $ Week2 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week6 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week7 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week9 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week10 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week13 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week15 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week16 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week17 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week18 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week19 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week21 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week22 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week23 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week24 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week25 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week26 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week27 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week28 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week29 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week30 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week31 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week32 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week34 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week35 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week36 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week37 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week38 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week39 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week40 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week42 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week43 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week44 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ Week45 : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Week46 : num 0 0 1 0 0 0 0 0 0 0 ...
## $ Week47 : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Week48 : num 0 0 0 0 1 0 0 0 0 0 ...
## $ Week49 : num 0 0 0 0 0 1 0 0 0 0 ...
## $ Week50 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ Week51 : num 0 0 0 0 0 0 0 1 0 0 ...
## $ Week52 : num 0 0 0 0 0 0 0 0 1 0 ...
call4.test <- data.frame(cbind(call_4_test$CallVolProd4, dummies))
## Warning in cbind(call_4_test$CallVolProd4, dummies): number of rows of
## result is not a multiple of vector length (arg 1)
str(call4.test)
## 'data.frame': 73 obs. of 67 variables:
## $ V1 : num 43644 44229 39342 37673 24513 ...
## $ Month1 : num 0 0 0 0 0 0 0 0 0 1 ...
## $ Month2 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month6 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month7 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month9 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month10 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Month11 : num 1 1 1 1 1 0 0 0 0 0 ...
## $ Month12 : num 0 0 0 0 0 1 1 1 1 0 ...
## $ Quarter2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Quarter3: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Quarter4: num 1 1 1 1 1 1 1 1 1 0 ...
## $ Week2 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week3 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week5 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week6 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week7 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week8 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week9 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week10 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week13 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week14 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week15 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week16 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week17 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week18 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week19 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week20 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week21 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week22 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week23 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week24 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week25 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week26 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week27 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week28 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week29 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week30 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week31 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week32 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week33 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week34 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week35 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week36 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week37 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week38 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week39 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week40 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week41 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week42 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week43 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Week44 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ Week45 : num 0 1 0 0 0 0 0 0 0 0 ...
## $ Week46 : num 0 0 1 0 0 0 0 0 0 0 ...
## $ Week47 : num 0 0 0 1 0 0 0 0 0 0 ...
## $ Week48 : num 0 0 0 0 1 0 0 0 0 0 ...
## $ Week49 : num 0 0 0 0 0 1 0 0 0 0 ...
## $ Week50 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ Week51 : num 0 0 0 0 0 0 0 1 0 0 ...
## $ Week52 : num 0 0 0 0 0 0 0 0 1 0 ...
lm_CallVolume_Product4 <- lm(V1 ~ 0 + ., data = call4.model)
summary1 <- summary(lm_CallVolume_Product4)
summary1
##
## Call:
## lm(formula = V1 ~ 0 + ., data = call4.model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15269 -1396 0 1396 15269
##
## Coefficients: (13 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## Month1 35243.0 8122.4 4.339 0.000318 ***
## Month2 42947.0 8122.4 5.287 3.56e-05 ***
## Month3 33910.0 11486.8 2.952 0.007880 **
## Month4 24738.0 11486.8 2.154 0.043650 *
## Month5 29930.0 11486.8 2.606 0.016920 *
## Month6 22329.0 11486.8 1.944 0.066113 .
## Month7 20254.0 11486.8 1.763 0.093133 .
## Month8 15217.0 11486.8 1.325 0.200195
## Month9 11650.0 11486.8 1.014 0.322596
## Month10 7378.5 18162.2 0.406 0.688868
## Month11 26630.5 8122.4 3.279 0.003756 **
## Month12 37683.0 8122.4 4.639 0.000158 ***
## Quarter2 NA NA NA NA
## Quarter3 NA NA NA NA
## Quarter4 NA NA NA NA
## Week2 8627.0 11486.8 0.751 0.461376
## Week3 14225.5 11486.8 1.238 0.229898
## Week4 11439.5 11486.8 0.996 0.331203
## Week5 4523.0 11486.8 0.394 0.697927
## Week6 -6827.5 11486.8 -0.594 0.558923
## Week7 -5807.5 11486.8 -0.506 0.618677
## Week8 -2550.5 11486.8 -0.222 0.826536
## Week9 NA NA NA NA
## Week10 6381.5 14068.4 0.454 0.654996
## Week11 452.0 14068.4 0.032 0.974688
## Week12 -3353.5 14068.4 -0.238 0.814018
## Week13 NA NA NA NA
## Week14 10133.0 16244.7 0.624 0.539828
## Week15 8398.0 16244.7 0.517 0.610849
## Week16 8572.0 16244.7 0.528 0.603528
## Week17 NA NA NA NA
## Week18 1597.0 16244.7 0.098 0.922666
## Week19 -1105.0 16244.7 -0.068 0.946444
## Week20 967.0 16244.7 0.060 0.953123
## Week21 -3302.0 16244.7 -0.203 0.840981
## Week22 NA NA NA NA
## Week23 7762.0 16244.7 0.478 0.637962
## Week24 5420.0 16244.7 0.334 0.742119
## Week25 5255.0 16244.7 0.323 0.749683
## Week26 NA NA NA NA
## Week27 877.0 16244.7 0.054 0.957481
## Week28 -3113.0 16244.7 -0.192 0.849963
## Week29 1083.0 16244.7 0.067 0.947508
## Week30 NA NA NA NA
## Week31 3197.0 16244.7 0.197 0.845969
## Week32 2857.0 16244.7 0.176 0.862163
## Week33 2858.0 16244.7 0.176 0.862115
## Week34 1402.0 16244.7 0.086 0.932082
## Week35 NA NA NA NA
## Week36 958.0 16244.7 0.059 0.953559
## Week37 3200.0 16244.7 0.197 0.845826
## Week38 -794.0 16244.7 -0.049 0.961502
## Week39 NA NA NA NA
## Week40 7916.5 21489.8 0.368 0.716457
## Week41 7467.5 21489.8 0.347 0.731853
## Week42 7729.5 21489.8 0.360 0.722854
## Week43 10152.5 21489.8 0.472 0.641731
## Week44 9903.5 14068.4 0.704 0.489576
## Week45 3318.5 11486.8 0.289 0.775633
## Week46 -345.5 11486.8 -0.030 0.976303
## Week47 -3603.5 11486.8 -0.314 0.756991
## Week48 NA NA NA NA
## Week49 -8565.0 11486.8 -0.746 0.464558
## Week50 1185.0 11486.8 0.103 0.918862
## Week51 4191.0 11486.8 0.365 0.719050
## Week52 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11490 on 20 degrees of freedom
## Multiple R-squared: 0.9656, Adjusted R-squared: 0.8745
## F-statistic: 10.6 on 53 and 20 DF, p-value: 2.224e-07
Product4 <- lm(V1 ~ 0 + Month1 + Month2 + Month3 + Month4 + Month5 + Month6 + Month7 + Month11 + Month12, data = call4.model)
summary2 <- summary(Product4)
summary2
##
## Call:
## lm(formula = V1 ~ 0 + Month1 + Month2 + Month3 + Month4 + Month5 +
## Month6 + Month7 + Month11 + Month12, data = call4.model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19371 -2933 1371 10856 19065
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Month1 43006 3286 13.089 < 2e-16 ***
## Month2 39151 3674 10.658 8.08e-16 ***
## Month3 34904 3927 8.888 8.88e-13 ***
## Month4 31514 5195 6.066 7.81e-08 ***
## Month5 29561 4647 6.362 2.41e-08 ***
## Month6 26938 5195 5.185 2.36e-06 ***
## Month7 19966 5195 3.843 0.000282 ***
## Month11 27591 3463 7.966 3.70e-11 ***
## Month12 36886 3674 10.041 8.98e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10390 on 64 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.8973
## F-statistic: 71.9 on 9 and 64 DF, p-value: < 2.2e-16
predict <- predict(Product4, env = call_4_test, se.fit = TRUE)
View(predict)
ses(as.numeric(x), alpha=0.2, initial="simple", h=3)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 74 16743.06 9724.470 23761.65 6009.052 27477.07
## 75 16743.06 9585.474 23900.65 5796.477 27689.64
## 76 16743.06 9449.127 24036.99 5587.952 27898.17
fit1 <- ses(as.numeric(x), alpha=0.2, initial="simple", h=3)
fit2 <- ses(as.numeric(x), alpha=0.6, initial="simple", h=3)
fit3 <- ses(as.numeric(x), h=3)
plot(fit1,ylab="Product 4 Call Volume",
xlab="Months", main="", fcol="white", type="o")
lines(fitted(fit1), col="blue", type="o")
lines(fitted(fit2), col="red", type="o")
lines(fitted(fit3), col="green", type="o")
lines(fit1$mean, col="blue", type="o")
lines(fit2$mean, col="red", type="o")
lines(fit3$mean, col="green", type="o")
legend("topleft",lty=1, col=c(1,"blue","red","green"),
c("data", expression(alpha == 0.2), expression(alpha == 0.6),
expression(alpha == 0.89)),pch=1)
fit1 <- ets(as.numeric(x), model = "ZAZ")
summary(fit1)
## ETS(M,A,N)
##
## Call:
## ets(y = as.numeric(x), model = "ZAZ")
##
## Smoothing parameters:
## alpha = 0.699
## beta = 1e-04
##
## Initial states:
## l = 44506.8433
## b = -51.872
##
## sigma: 0.1348
##
## AIC AICc BIC
## 1511.907 1512.802 1523.359
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -456.9208 4403.507 3027.768 -2.862957 10.99699 1.004526
## ACF1
## Training set 0.1718114
options(repr.plot.width=12, repr.plot.height=7)
plot(fit1)
lines(fitted(fit1), col="red", lty=2)
plot(forecast(fit1,h=5),
ylab="Forecasted Call Volumme: Product 4",
xlab="Months", main="", fcol="blue", type="o")
## ARIMA Forecast for Call Volume Product #4
require(forecast)
pred <- Arima(x,c(0,1,0), seasonal=list(order=c(1,1,0), period=12), include.mean = FALSE)
plot(forecast(pred, h=10), include = 150, xlab = "Index",ylab = "Call Volume",lwd = 2,
col = 'red',main="Forecasting using ARIMA Model with Seasonality")