## Importing the data set
## Source: https://archive.ics.uci.edu/ml/datasets/Daily+Demand+Forecasting+Orders
## Abstract: The dataset was collected during 60 days, this is a real database of a brazilian logistics company.

## Added column called DayTotal to capture total number of days [=(Week-1)*7+Day]

library(readr)
bank <- read_csv("C:/Users/bryce_anderson/Desktop/Boston College/Predictive Analytics and Forecasting/Week 3 (PA&F)/Daily_Demand_Forecasting_Orders3.csv")
## R Packages Utilized
library(ggplot2)
library(forecast)
## Creating a time series of the data set
bankTS <- ts(bank)
library(ggplot2)
attach(bank)
plot(Target~DayTotal, data=bank, xlab="Total Number of Days", ylab="Target (Total Orders)", main="Target (Total Orders) over Number of Days", col="DodgerBlue")
abline(lm(Target~DayTotal), col="orange")

## Creating target forecast
bankFTarget <- (bankTS[,"Target"])
bankFTarget
## Time Series:
## Start = 1 
## End = 60 
## Frequency = 1 
##  [1] 539.577 224.675 129.412 317.120 210.517 207.364 263.043 248.958
##  [9] 344.291 248.428 281.420 243.568 308.178 363.402 336.872 246.992
## [17] 308.880 233.126 404.380 298.560 229.249 236.304 297.174 409.401
## [25] 231.035 238.826 235.598 242.112 490.790 289.657 298.459 323.603
## [33] 616.453 346.035 307.645 253.847 530.944 333.359 306.356 416.830
## [41] 415.187 268.002 234.503 234.724 230.064 357.394 259.246 244.235
## [49] 402.607 255.061 342.606 268.640 188.601 202.022 213.509 316.849
## [57] 286.412 303.447 304.950 331.900

Seasonal ARIMA Model

## Set a lag of 5 to represent a week of orders
autoplot(bankFTarget) + ylab("Daily Target Orders") + xlab("Day")

bankFTarget %>% diff(lag=5) %>% ggtsdisplay()

bankFTarget %>% diff(lag=5) %>% diff() %>% ggtsdisplay()

bankFTarget %>%
  Arima(order=c(0,1,1), seasonal=c(0,1,1)) %>%
  residuals() %>% ggtsdisplay()

arima1 <- Arima(bankFTarget, order=c(0,1,3), seasonal=c(0,1,1))
checkresiduals(arima1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,3)
## Q* = 7.6437, df = 7, p-value = 0.3651
## 
## Model df: 3.   Total lags used: 10
arima1 %>% forecast(h=12) %>% autoplot()

auto.arima(bankFTarget)
## Series: bankFTarget 
## ARIMA(0,0,0) with non-zero mean 
## 
## Coefficients:
##           mean
##       300.8733
## s.e.   11.4708
## 
## sigma^2 estimated as 8029:  log likelihood=-354.35
## AIC=712.71   AICc=712.92   BIC=716.9

ETS Model

# Generate forecasts from an ETS model
fit.ets <- ets(bankFTarget)
fit.ets
## ETS(A,N,N) 
## 
## Call:
##  ets(y = bankFTarget) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
## 
##   Initial states:
##     l = 300.8709 
## 
##   sigma:  90.3757
## 
##      AIC     AICc      BIC 
## 790.1036 790.5322 796.3866
checkresiduals(fit.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 7.467, df = 8, p-value = 0.4872
## 
## Model df: 2.   Total lags used: 10
ets1 <- bankFTarget %>% ets() %>% forecast(h=12) %>% autoplot()
ets1

Test set ARIMA and ETS Comparison

## Holding out half of the data set for a test set
## Limited set of observations so some small overlap over period 3
bankTrain <- subset(bankFTarget, start=1, end=3)
bankTest <- subset(bankFTarget, start=3, end=6)

## Repeating the previous models over the test sets

## ARIMA Test set
autoplot(bankTest) + ylab("Daily Target Orders") + xlab("Day")

bankTest %>%
  Arima(order=c(0,1,1), seasonal=c(0,1,1)) %>%
  residuals() %>% ggtsdisplay()

arima3 <- Arima(bankTest, order=c(0,1,3), seasonal=c(0,1,1))
checkresiduals(arima3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,3)
## Q* = NA, df = 3, p-value = NA
## 
## Model df: 3.   Total lags used: 6
arima3 %>% forecast(h=12) %>% autoplot()

auto.arima(bankTest)
## Series: bankTest 
## ARIMA(0,0,0) with zero mean 
## 
## sigma^2 estimated as 51157:  log likelihood=-27.36
## AIC=56.72   AICc=58.72   BIC=56.11
## ETS Test set
fit.ets2 <- ets(bankTest)
fit.ets2
## ETS(A,N,N) 
## 
## Call:
##  ets(y = bankTest) 
## 
##   Smoothing parameters:
##     alpha = 0.4248 
## 
##   Initial states:
##     l = 129.412 
## 
##   sigma:  93.8639
checkresiduals(fit.ets2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = NA, df = 3, p-value = NA
## 
## Model df: 2.   Total lags used: 5
ets2 <- bankTest %>% ets() %>% forecast(h=12) %>% autoplot()
ets2