#setwd("~/wd678/Unit 7")
#install.packages("forecast")
library(forecast, quietly=TRUE, warn.conflicts=FALSE)
## Warning: package 'forecast' was built under R version 3.3.3

Question 1

As a data-driven method, neural networks would be a good consideration for many reasons: There were a few changes in the structures of some wine sales time series that neural networks could capture. Some of the wine sales time series had local patterns that could be “learned” from the data. Since short-term sales forecasts would be repeated frequently (every month) on several flavors of wines (six), automation would be convenient and economical. Neural networks could also capture future structural changes, using roll-forward forecasting of wine sales time series, that model-driven methods wouldn’t.

Question 2

The original series for fortified wine sales was plotted. The series was then partitioned into training and validation periods. Then a neural network using nnetar with 11 non-seasonal lags was run below:

dataFrame <- read.csv("AustralianWines.csv", stringsAsFactors=FALSE, header=TRUE)
wineSales.ts <- ts(dataFrame$Fortified, start=c(1980,1), end = c(1994,12), freq=12)
plot(wineSales.ts, bty="l", main="Original fortified wine sales series", ylab="Wine sales (liters)")

nValid <- 12
nTrain <- length(wineSales.ts) - nValid
train.ts <- window(wineSales.ts, start = c(1980,1), end = c(1980, nTrain))
valid.ts <- window(wineSales.ts, start = c(1980, nTrain + 1), end = c(1980, nTrain + nValid))
plot(train.ts, bty= "l", main="Training period partition for fortified wine sales", ylab="Wine sales (liters)")

The prime number used for set.seed was the same used in lecture.

set.seed(8373493)
sales.nnetar <- nnetar(train.ts, p=11)
sales.nnetar
## Series: train.ts 
## Model:  NNAR(11,1,6)[12] 
## Call:   nnetar(y = train.ts, p = 11)
## 
## Average of 20 networks, each of which is
## a 12-6-1 network with 85 weights
## options were - linear output units 
## 
## sigma^2 estimated as 5734
summary(sales.nnetar$model[[1]])
## a 12-6-1 network with 85 weights
## options were - linear output units 
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1 
##   -0.14    1.60   -1.88    0.65    4.43   -2.46   -0.67    0.05   -3.61 
##  i9->h1 i10->h1 i11->h1 i12->h1 
##   -2.41    1.52    2.98    1.65 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2 
##    0.44   -0.83    1.07    0.71    0.73    0.00    0.66    0.38   -1.07 
##  i9->h2 i10->h2 i11->h2 i12->h2 
##    0.09    0.00    0.09   -1.09 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3 
##    1.21    3.92   -0.71    1.73   -2.67    1.49    0.36   -0.03    3.92 
##  i9->h3 i10->h3 i11->h3 i12->h3 
##    0.30   -3.39    1.96   -5.85 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4 
##   -2.17    3.83    1.14    0.28   -1.49   -0.99    3.75    2.98   -4.88 
##  i9->h4 i10->h4 i11->h4 i12->h4 
##   -0.52   -1.34    0.72   -1.59 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5 
##   -0.89    1.48   -1.48   -0.52   -1.33    0.44   -0.22   -0.45    0.73 
##  i9->h5 i10->h5 i11->h5 i12->h5 
##   -0.91    0.29    0.24   -0.83 
##   b->h6  i1->h6  i2->h6  i3->h6  i4->h6  i5->h6  i6->h6  i7->h6  i8->h6 
##   -0.65   -1.81   -0.12    0.84    0.43   -0.82   -2.19   -1.92    3.76 
##  i9->h6 i10->h6 i11->h6 i12->h6 
##   -0.03   -0.85    0.71   -0.97 
##  b->o h1->o h2->o h3->o h4->o h5->o h6->o 
##  1.98  1.59 -2.75  1.39 -1.27 -1.82 -2.04

Question 2.a

The actual, and forecasted wine sales series using neural networks, were plotted below for the training and validation periods. The residuals series was then plotted for the training period.

The wine sales series was well captured by the neural network, to the point that the network model could have been overfitted. The RMSE and MAPE (2.1%), computed for the training period of the neural network, were relatively small so the peaks and valleys of the residual series had small amplitudes. The residual series had no notable trend or seasonality indicating that the wine sales series had been been well captured by the neural network.

sales.nnetar.pred <- forecast(sales.nnetar, h=nValid)
sales.nnetar.pred$mean
##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 1994 1319.548 1407.169 1855.154 2049.701 2132.175 2376.346 2758.660
##           Aug      Sep      Oct      Nov      Dec
## 1994 2581.042 2139.324 1798.984 2306.188 2608.601

Below are plots of: (1) the original wine sales time series, (2) the neural network model fit in the training period, and (3) the neural network model predictions for the validation period.

yrange=range(wineSales.ts)
plot(c(1980,1995), c(1000,7000), ylab="Wine sales (liters)", xlab="Time", bty="l", xaxt="n", lty=1, main="Plots of the original wine sales series & the neural network model")
#plot(yrange, ylab="Wine sales (liters)", xlab="Time", bty="l", xaxt="n", xlim=c(1980,1995), lty=1, main="Original fortified wine sales series and the neural network model")
axis(1, at=seq(1980,1995,1), labels=format(seq(1980,1995,1)))
lines(wineSales.ts, lwd=2, lty=1)
lines(sales.nnetar$fitted, lwd=1,col="blue", lty=1)
lines(sales.nnetar.pred$mean,lwd=1,col="red",lty=2)
legend(1982.7, 7000, c("Original Series", "Neural network model fitted in training period", "Neural network model forecasted in validation period"), lty=c(1,1,2), col=c("black", "blue", "red"), bty="n")

plot(c(1981,1994), c(-152,209), ylab="Residuals", xlab="Time", bty="l", xaxt="n", main="Residuals of the neural network model fit in the training period")
axis(1, at=seq(1981,1994,1), labels=format(seq(1981,1994,1)))
lines(sales.nnetar$residuals)

accuracy(sales.nnetar$fitted, train.ts) 
##                   ME     RMSE      MAE        MPE     MAPE         ACF1
## Test set -0.09094804 75.71998 57.53445 -0.2129987 2.097739 -0.009408954
##          Theil's U
## Test set 0.1172943

Question 2.b

The forecasted neural network model sales for January 1994 to December 1994 were computed and plotted above (dashed blue line). The computation was repeated and the forecasts were printed below:

sales.nnetar.pred <- forecast(sales.nnetar, h=nValid)
sales.nnetar.pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 1994 1319.548 1407.169 1855.154 2049.701 2132.175 2376.346 2758.660
##           Aug      Sep      Oct      Nov      Dec
## 1994 2581.042 2139.324 1798.984 2306.188 2608.601
accuracy(sales.nnetar.pred,valid.ts)
##                        ME      RMSE       MAE        MPE      MAPE
## Training set  -0.09094804  75.71998  57.53445 -0.2129987  2.097739
## Test set     138.75901697 289.14325 245.23423  5.1737224 10.881000
##                   MASE         ACF1 Theil's U
## Training set 0.2067678 -0.009408954        NA
## Test set     0.8813246 -0.040116767 0.6427012

Question 3.a

The ets function automatically chose the ETS(M,A,M) model to fit in the training period. The ETS(M,A,M) model had multiplicative error and seasonality, and additive trend. The restricted ETS models had been allowed by using “restrict=FALSE” in the ets function argument, but multiplicative trend models were not fit, as directed in lecture (allow.multiplicative.trend was left out of the argument so it was “=FALSE” by default).

esm <- ets(train.ts, model="ZZZ", restrict=FALSE)
esm
## ETS(M,A,M) 
## 
## Call:
##  ets(y = train.ts, model = "ZZZ", restrict = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.0555 
##     beta  = 9e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 4040.0811 
##     b = -6.7983 
##     s=1.1316 1.0399 0.8877 0.9505 1.2722 1.3862
##            1.1463 1.1097 0.9345 0.8513 0.6996 0.5903
## 
##   sigma:  0.0859
## 
##      AIC     AICc      BIC 
## 2755.038 2759.118 2808.145

Question 3.b

The wine sales point forecasts using the ETS(M,A,M) model are shown below for each month in 1994:

esm.pred <- forecast(esm, h=nValid, level=0)
esm.pred
##          Point Forecast     Lo 0     Hi 0
## Jan 1994       1289.829 1289.829 1289.829
## Feb 1994       1521.475 1521.475 1521.475
## Mar 1994       1842.645 1842.645 1842.645
## Apr 1994       2013.011 2013.011 2013.011
## May 1994       2379.117 2379.117 2379.117
## Jun 1994       2445.906 2445.906 2445.906
## Jul 1994       2943.532 2943.532 2943.532
## Aug 1994       2688.471 2688.471 2688.471
## Sep 1994       1998.782 1998.782 1998.782
## Oct 1994       1857.773 1857.773 1857.773
## Nov 1994       2165.635 2165.635 2165.635
## Dec 1994       2344.995 2344.995 2344.995
plot(wineSales.ts, xlab="Time", ylab="Wine sales (liters)", bty="l", xaxt="n", lty=2, main="ETS(M,A,M) forecasts for fortified wine sales")
axis(1, at=seq(1980,1995,1), labels=format(seq(1980,1995,1)))
lines(esm$fitted, col="red")
lines(esm.pred$mean, col="green", lty=2)
legend(1987, 6000, c("Original Series", "Fitted in training period", "Forecasted in validation period"), lty=c(2,1,2), col=c("black", "red", "green"), bty="n")

Question 3.c

The predictive performance of the neural networks model was better (lower) for the training period (MAPEs: exponential smoothing model=7.2%, neural network model=2.1%).

The MAPEs of both models were similar for the validation period (both MAPEs=10.9). The neural network model had a lower RMSE in the validation period (exponential smoothing model RMSE=328.9, neural network model RMSE=289.1).

The neural network model was perhaps overfitted explaining its superior performance in the training period (MAPE 2.1%) and large increase in MAPE in the validation period (MAPE 10.9%). Factors that could account for overfitting were the number of neurons in the hidden layer of the neural network, the number of hidden layers, and the number of neural networks fit (epochs).

accuracy(esm$fitted, train.ts)
##                 ME     RMSE      MAE       MPE     MAPE       ACF1
## Test set -25.32466 287.8687 224.6507 -1.317643 7.229271 0.05168201
##          Theil's U
## Test set 0.3930087
accuracy(sales.nnetar$fitted, train.ts)
##                   ME     RMSE      MAE        MPE     MAPE         ACF1
## Test set -0.09094804 75.71998 57.53445 -0.2129987 2.097739 -0.009408954
##          Theil's U
## Test set 0.1172943
accuracy(esm.pred$mean, valid.ts)         
##                ME     RMSE     MAE      MPE     MAPE        ACF1 Theil's U
## Test set 125.5691 328.9246 256.394 4.443793 10.85886 -0.01105575 0.7140459
accuracy(sales.nnetar.pred$mean, valid.ts)
##               ME     RMSE      MAE      MPE   MAPE        ACF1 Theil's U
## Test set 138.759 289.1432 245.2342 5.173722 10.881 -0.04011677 0.6427012
plot(c(1994,1995), c(1000,4000), type="n", xlab="Year 1994", ylab="Wine sales (liters)", bty="l", xaxt="n", yaxt="n", main="Original series, ETS(M,A,M), & neural net models for validation period")
axis(1,at=seq(1994,1995,1/11), labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))
axis(2,at=seq(1000,4000,1000), labels=format(seq(1000,4000,1000)), las=2)
lines(valid.ts)
lines(esm.pred$mean,col="blue", lty=2)
lines(sales.nnetar.pred$mean, col="red", lty=2)
legend(1994,4000,c("Original wine sales", "Smoothing - ETS(M,A,M)", "Neural network"), lty=c(1,2,2), col=c("black","blue","red"), lwd=c(2,2,2), bty="n")