Forecasting Australian Wine Sales: You are hired to obtain short-term forecasts (2-3 months ahead) for each of the six series, and this task will be repeated every month.
library(forecast)
library(pander)
library(knitr)
wine <- read.csv("AustralianWines.csv", header = T, stringsAsFactors = F)
wine <- wine[1:180, ]
head(wine)
tail(wine)
#read in each as a time series and partition
#fortified
Fortified.ts <- ts(wine$Fortified, start = c(1980, 1), frequency = 12)
FortifiedvalidLength <- 12
FortifiedtrainLength <- length(Fortified.ts) - FortifiedvalidLength
FortifiedsalesTrain <- window(Fortified.ts, end = c(1980, FortifiedtrainLength))
FortifiedsalesValid <- window(Fortified.ts, start = c(1980, FortifiedtrainLength + 1))
#Red
Red.ts <- ts(wine$Red, start = c(1980, 1), frequency = 12)
RedvalidLength <- 12
RedtrainLength <- length(Red.ts) - RedvalidLength
RedsalesTrain <- window(Red.ts, end = c(1980, RedtrainLength))
RedsalesValid <- window(Red.ts, start = c(1980, RedtrainLength + 1))
#Rose
Rose.ts <- ts(wine$Rose, start = c(1980, 1), frequency = 12)
RosevalidLength <- 12
RosetrainLength <- length(Rose.ts) - RosevalidLength
RosesalesTrain <- window(Rose.ts, end = c(1980, RosetrainLength))
RosesalesValid <- window(Rose.ts, start = c(1980, RosetrainLength + 1))
#parkling
Sparkling.ts <- ts(wine$sparkling, start = c(1980, 1), frequency = 12)
SparklingvalidLength <- 12
SparklingtrainLength <- length(Sparkling.ts) - SparklingvalidLength
SparklingsalesTrain <- window(Sparkling.ts, end = c(1980, SparklingtrainLength))
SparklingsalesValid <- window(Sparkling.ts, start = c(1980, SparklingtrainLength + 1))
#Sweet
Sweet.white.ts <- ts(wine$Sweet.white, start = c(1980, 1), frequency = 12)
SweetvalidLength <- 12
SweettrainLength <- length(Sweet.white.ts) - SweetvalidLength
SweetsalesTrain <- window(Sweet.white.ts, end = c(1980, SweettrainLength))
SweetsalesValid <- window(Sweet.white.ts, start = c(1980, SweettrainLength + 1))
#Dry
Dry.white.ts <- ts(wine$Dry.white, start = c(1980, 1), frequency = 12)
DryvalidLength <- 12
DrytrainLength <- length(Dry.white.ts) - DryvalidLength
DrysalesTrain <- window(Dry.white.ts, end = c(1980, DrytrainLength))
DrysalesValid <- window(Dry.white.ts, start = c(1980, DrytrainLength + 1))
#plots
par(mfrow = c(2,3))
plot(Fortified.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Fortified Wine Sales")
plot(Red.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Red Wine Sales")
plot(Rose.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Rose Sales")
plot(Sparkling.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Sparkling Sales")
plot(Sweet.white.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Sweet Wine Sales")
plot(Dry.white.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Dry Wine Sales")

- Would you consider neural networks for this task? Explain why.
I think it would be appropriate to consider neural networks for forecasting each of the six series plotted above, even for the short-term. Each type of wine displays differently nuanced components such as seasonality, trend, and error which can be captured by neural networks. Although neural networks can be computationally challenging, if the company were looking for a one-size fits all forecasting model, this method might be appropriate because it is capable of learning the subtleties of each series. However, it is important that other forecasting methods that can capture the various components in each series be considered as well.
- Use neural networks to forecast fortified wine sales, as follows:
- Partition the data using the period until December 1993 as the training period.
Fortified.ts <- ts(wine$Fortified, start = c(1980, 1), frequency = 12)
FortifiedvalidLength <- 12
FortifiedtrainLength <- length(Fortified.ts) - FortifiedvalidLength
FortifiedsalesTrain <- window(Fortified.ts, end = c(1980, FortifiedtrainLength))
FortifiedsalesValid <- window(Fortified.ts, start = c(1980, FortifiedtrainLength + 1))
- Run a neural network using R’s nnetar with 11 nonseasonal lags (i.e., p = 11). Leave all other arguments at their default.
set.seed(201)
fortified.nn <- nnetar(FortifiedsalesTrain, p = 11)
fortified.nn
Series: FortifiedsalesTrain
Model: NNAR(11,1,6)[12]
Call: nnetar(y = FortifiedsalesTrain, p = 11)
Average of 20 networks, each of which is
a 12-6-1 network with 85 weights
options were - linear output units
sigma^2 estimated as 6772
summary(fortified.nn$model[[1]])
a 12-6-1 network with 85 weights
options were - linear output units
b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
1.34 0.66 -3.88 -2.79 3.02 -0.22 -0.63 -1.13
i8->h1 i9->h1 i10->h1 i11->h1 i12->h1
2.69 0.20 -2.05 1.52 3.10
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
3.70 3.03 -1.79 -0.85 3.09 -7.69 -1.40 6.90
i8->h2 i9->h2 i10->h2 i11->h2 i12->h2
2.13 -2.09 3.73 -2.95 -3.37
b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
2.09 -1.77 1.45 -1.70 1.28 -2.66 1.39 0.38
i8->h3 i9->h3 i10->h3 i11->h3 i12->h3
0.53 0.93 1.33 -0.53 -0.31
b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
-3.86 -0.61 -9.73 -1.68 4.01 7.70 -5.67 -4.29
i8->h4 i9->h4 i10->h4 i11->h4 i12->h4
4.78 3.11 -5.60 1.35 0.25
b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
1.15 1.41 -1.39 1.01 -1.73 -0.41 0.69 -0.52
i8->h5 i9->h5 i10->h5 i11->h5 i12->h5
-2.16 0.32 0.40 -0.05 -1.30
b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
2.47 -0.45 -1.09 -0.89 2.13 -0.52 -0.57 1.50
i8->h6 i9->h6 i10->h6 i11->h6 i12->h6
1.72 -2.42 -1.51 -0.50 1.02
b->o h1->o h2->o h3->o h4->o h5->o h6->o
2.10 1.98 0.64 -1.66 -1.25 -1.33 -1.86
- Create a time plot for the actual and forecasted series over the training period. Interpret what you see in the plots.
fortifiedForecast <- forecast(fortified.nn, h = FortifiedvalidLength)
plot(c(1980, 1993), c(1000, 6000), type = "n", xlab = "Year", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n", main = "Fortified Wine Historical Sales \nNeural Network Model Fit")
lines(FortifiedsalesTrain, col = "deeppink4", lwd = 2)
axis(1, at = seq(1980, 1994, 1))
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortified.nn$fitted, lty = 1, col = "blue", lwd = 1)
legend(1980, 2000, c("Actual Sales", "NN Model MAPE = 2.29%"), lty = c(1, 1), lwd = c(2,1), col = c("deeppink4", "blue"), bty = "n")

Create also a time plot of the forecast errors for the training period.
plot(fortified.nn$residuals, col = "deeppink4", main = "Residuals from Neural Net Model", xlab = "Year", ylab = "Residuals", bty = "l")
abline(h = 0)

Interpretation
- Use the neural network to forecast sales for each month in the validation period (January 1994 to December 1994).
fortifiedForecast
Jan Feb Mar Apr May Jun Jul
1994 1378.183 1382.149 1766.535 2060.303 2276.977 2320.602 2731.557
Aug Sep Oct Nov Dec
1994 2599.842 2116.505 1841.124 2322.511 2689.677
- Compare your neural network to an exponential smoothing model used to forecast fortified wine sales.
fortified.ets <- ets(FortifiedsalesTrain, model = "ZZZ", restrict = FALSE)
fortified.ets.forecast <- forecast(fortified.ets, h = FortifiedvalidLength)
- Use R’s ets function to automatically select and fit an exponential smoothing model to the training period until December 1993. Which model did ets fit?
fortified.ets
ETS(M,A,M)
Call:
ets(y = FortifiedsalesTrain, model = "ZZZ", restrict = FALSE)
Smoothing parameters:
alpha = 0.0555
beta = 9e-04
gamma = 1e-04
Initial states:
l = 4040.0811
b = -6.7983
s=1.1316 1.0399 0.8877 0.9505 1.2722 1.3862
1.1463 1.1097 0.9345 0.8513 0.6996 0.5903
sigma: 0.0859
AIC AICc BIC
2755.038 2759.118 2808.145
The ets() function chooses a smoothing model with multiplicative error, additive trend, multiplicative seasonality, an alpha of .05 and a much smaller beta value.
plot(c(1980, 1993), c(1000, 6000), type = "n", xlab = "Year", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n", main = "Fortified Wine Historical Sales \nFitted Exponential Smoothing Model")
lines(FortifiedsalesTrain, col = "deeppink4", lwd = 2)
axis(1, at = seq(1980, 1994, 1))
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortified.ets$fitted, lty = 1, lwd = 1, col = "blue")
legend(1987, 5900, c("Actual Sales", "ETS Model MAPE = 7.23%"), lty = c(1, 1), lwd = c(2,1), col = c("deeppink4", "blue"), bty = "n")

plot(c(1994, 1995), c(1000, 6000), type = "n", xlab = "Month", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n", main = "1994 Fortified Wine Sales \nActual and Forecasted Values")
lines(FortifiedsalesValid, col = "deeppink4", lwd = 2)
axis(1, at=seq(1994,1995,1/11),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul", "Aug","Sep","Oct","Nov","Dec"), xaxs = "r")
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortifiedForecast$mean, lty = 2, col = "blue", lwd = 2)
lines(fortified.ets.forecast$mean, lty = 3, lwd = 3)
legend(1994+1/11, 5900, c("Actual Sales", "NN Model Forecast", "Smoothing Forecast"), lty = c(2, 2, 3), lwd = c(1,2, 3), col = c("deeppink4", "blue", "black"), bty = "n")

- Use this exponential smoothing model to forecast sales for each month in 1994.
fortified.ets.forecast
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 1994 1289.829 1147.913 1431.745 1072.788 1506.871
Feb 1994 1521.475 1353.802 1689.148 1265.041 1777.909
Mar 1994 1842.645 1639.237 2046.054 1531.559 2153.732
Apr 1994 2013.011 1790.409 2235.614 1672.571 2353.452
May 1994 2379.117 2115.554 2642.679 1976.033 2782.201
Jun 1994 2445.906 2174.435 2717.376 2030.728 2861.083
Jul 1994 2943.532 2616.195 3270.870 2442.913 3444.151
Aug 1994 2688.471 2388.895 2988.047 2230.309 3146.633
Sep 1994 1998.782 1775.592 2221.971 1657.443 2340.120
Oct 1994 1857.773 1649.880 2065.666 1539.829 2175.717
Nov 1994 2165.635 1922.749 2408.521 1794.173 2537.097
Dec 1994 2344.995 2081.384 2608.606 1941.836 2748.153
- How does the neural network compare to the exponential smoothing model in terms of predictive performance in the training period? In the validation period?
accuracy(fortifiedForecast, FortifiedsalesValid)
ME RMSE MAE MPE MAPE
Training set 8.231706e-03 82.29415 64.69893 -0.235581 2.283963
Test set 1.260029e+02 296.22162 254.37924 4.489456 11.561448
MASE ACF1 Theil's U
Training set 0.2325155 0.08597936 NA
Test set 0.9141900 -0.16585493 0.66165
accuracy(fortified.ets.forecast, FortifiedsalesValid)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -25.32466 287.8687 224.6507 -1.317643 7.229271 0.8073515 0.05168201
Test set 125.56906 328.9246 256.3940 4.443793 10.858860 0.9214307 -0.01105575
Theil's U
Training set NA
Test set 0.7140459
Although the neural network fit the training set more closely than the optimal smoothing model chosen by the ETS() function, the Exponential Smoothing forecast slightly outperforms the NN forecast. This is an example where a NN overfits the data in the training set, therefore warranting caution in forecasting.
---
title: "R Notebook"
output: html_notebook
---

Forecasting Australian Wine Sales: You are hired to obtain short-term forecasts (2-3 months ahead) for each of the six series, and this task will be repeated every month. 

```{r, message=FALSE, warning=FALSE}
library(forecast)
library(pander)
library(knitr)
```


```{r, message=FALSE, warning=FALSE}
wine <- read.csv("AustralianWines.csv", header = T, stringsAsFactors = F)
wine <- wine[1:180, ]
head(wine)
tail(wine)
```

```{r, fig.height=5, fig.width=8, message=FALSE, warning=FALSE}
#read in each as a time series and partition
#fortified
Fortified.ts <- ts(wine$Fortified, start = c(1980, 1), frequency = 12)
FortifiedvalidLength <- 12
FortifiedtrainLength <- length(Fortified.ts) - FortifiedvalidLength
FortifiedsalesTrain <- window(Fortified.ts, end = c(1980, FortifiedtrainLength))
FortifiedsalesValid <- window(Fortified.ts, start = c(1980, FortifiedtrainLength + 1))
#Red
Red.ts <- ts(wine$Red, start = c(1980, 1), frequency = 12)
RedvalidLength <- 12
RedtrainLength <- length(Red.ts) - RedvalidLength
RedsalesTrain <- window(Red.ts, end = c(1980, RedtrainLength))
RedsalesValid <- window(Red.ts, start = c(1980, RedtrainLength + 1))
#Rose
Rose.ts <- ts(wine$Rose, start = c(1980, 1), frequency = 12)
RosevalidLength <- 12
RosetrainLength <- length(Rose.ts) - RosevalidLength
RosesalesTrain <- window(Rose.ts, end = c(1980, RosetrainLength))
RosesalesValid <- window(Rose.ts, start = c(1980, RosetrainLength + 1))
#parkling
Sparkling.ts <- ts(wine$sparkling, start = c(1980, 1), frequency = 12)
SparklingvalidLength <- 12
SparklingtrainLength <- length(Sparkling.ts) - SparklingvalidLength
SparklingsalesTrain <- window(Sparkling.ts, end = c(1980, SparklingtrainLength))
SparklingsalesValid <- window(Sparkling.ts, start = c(1980, SparklingtrainLength + 1))
#Sweet
Sweet.white.ts <- ts(wine$Sweet.white, start = c(1980, 1), frequency = 12)
SweetvalidLength <- 12
SweettrainLength <- length(Sweet.white.ts) - SweetvalidLength
SweetsalesTrain <- window(Sweet.white.ts, end = c(1980, SweettrainLength))
SweetsalesValid <- window(Sweet.white.ts, start = c(1980, SweettrainLength + 1))
#Dry
Dry.white.ts <- ts(wine$Dry.white, start = c(1980, 1), frequency = 12)
DryvalidLength <- 12
DrytrainLength <- length(Dry.white.ts) - DryvalidLength
DrysalesTrain <- window(Dry.white.ts, end = c(1980, DrytrainLength))
DrysalesValid <- window(Dry.white.ts, start = c(1980, DrytrainLength + 1))

#plots
par(mfrow = c(2,3))
plot(Fortified.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Fortified Wine Sales")
plot(Red.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Red Wine Sales")
plot(Rose.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Rose Sales")
plot(Sparkling.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Sparkling Sales")
plot(Sweet.white.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Sweet Wine Sales")
plot(Dry.white.ts, xlab = "Year", ylab = "Sales (thousands)", col = "deeppink4", main = "Dry Wine Sales")


```


1. Would you consider neural networks for this task? Explain why. 

I think it would be appropriate to consider neural networks for forecasting each of the six series plotted above, even for the short-term. Each type of wine displays differently nuanced components such as seasonality, trend, and error which can be captured by neural networks. Although neural networks can be computationally challenging, if the company were looking for a one-size fits all forecasting model, this method might be appropriate because it is capable of learning the subtleties of each series. However, it is important that other forecasting methods that can capture the various components in each series be considered as well. 

2. Use neural networks to forecast fortified wine sales, as follows: 

* Partition the data using the period until December 1993 as the training period.

```{r}
Fortified.ts <- ts(wine$Fortified, start = c(1980, 1), frequency = 12)
FortifiedvalidLength <- 12
FortifiedtrainLength <- length(Fortified.ts) - FortifiedvalidLength
FortifiedsalesTrain <- window(Fortified.ts, end = c(1980, FortifiedtrainLength))
FortifiedsalesValid <- window(Fortified.ts, start = c(1980, FortifiedtrainLength + 1))
```


* Run a neural network using R’s nnetar with 11 nonseasonal lags (i.e., p = 11). Leave all other arguments at their default. 

```{r}
set.seed(201)
fortified.nn <- nnetar(FortifiedsalesTrain, p = 11)
fortified.nn
summary(fortified.nn$model[[1]])
```


  (a) Create a time plot for the actual and forecasted series over the training period.  Interpret what you see in the plots. 

```{r, fig.height=4, fig.width=7}
fortifiedForecast <- forecast(fortified.nn, h = FortifiedvalidLength)
plot(c(1980, 1993), c(1000, 6000), type = "n", xlab = "Year", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n", main = "Fortified Wine Historical Sales \nNeural Network Model Fit")
lines(FortifiedsalesTrain, col = "deeppink4", lwd = 2)
axis(1, at = seq(1980, 1994, 1))
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortified.nn$fitted, lty = 1, col = "blue", lwd = 1)
legend(1980, 2000, c("Actual Sales", "NN Model MAPE = 2.29%"), lty = c(1, 1), lwd = c(2,1), col = c("deeppink4", "blue"), bty = "n")
```

  
Create also a time plot of the forecast errors for the training period.  
  
```{r, fig.height=4, fig.width=7}
plot(fortified.nn$residuals, col = "deeppink4", main = "Residuals from Neural Net Model", xlab = "Year", ylab = "Residuals", bty = "l")
abline(h = 0)
```
**Interpretation**

  (b) Use the neural network to forecast sales for each month in the validation period (January 1994 to December 1994). 
  
```{r}
fortifiedForecast
```  
  
3. Compare your neural network to an exponential smoothing model used to forecast fortified wine sales. 

```{r}
fortified.ets <- ets(FortifiedsalesTrain, model = "ZZZ", restrict = FALSE)
fortified.ets.forecast <- forecast(fortified.ets, h = FortifiedvalidLength)
```

  (a) Use R’s ets function to automatically select and fit an exponential smoothing model to the training period until December 1993. Which model did ets fit?
  
```{r}
fortified.ets
```  
  

The ets() function chooses a smoothing model with multiplicative error, additive trend, multiplicative seasonality, an alpha of .05 and a much smaller beta value. 

```{r, fig.height=4, fig.width=7}
plot(c(1980, 1993), c(1000, 6000), type = "n", xlab = "Year", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n",  main = "Fortified Wine Historical Sales \nFitted Exponential Smoothing Model")
lines(FortifiedsalesTrain, col = "deeppink4", lwd = 2)
axis(1, at = seq(1980, 1994, 1))
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortified.ets$fitted, lty = 1, lwd = 1, col = "blue")
legend(1987, 5900, c("Actual Sales", "ETS Model MAPE = 7.23%"), lty = c(1, 1), lwd = c(2,1), col = c("deeppink4", "blue"), bty = "n")
```

  
```{r, fig.height=4, fig.width=7}
plot(c(1994, 1995), c(1000, 6000), type = "n", xlab = "Month", ylab = "Sales", bty = "l", xaxt = "n", yaxt = "n", main = "1994 Fortified Wine Sales \nActual and Forecasted Values")
lines(FortifiedsalesValid, col = "deeppink4", lwd = 2)
axis(1, at=seq(1994,1995,1/11),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul", "Aug","Sep","Oct","Nov","Dec"), xaxs = "r")
axis(2, at = seq(1000, 6000, 500), labels = format(seq(1000, 6000, 500)))
lines(fortifiedForecast$mean, lty = 2, col = "blue", lwd = 2)
lines(fortified.ets.forecast$mean, lty = 3, lwd = 3)
legend(1994+1/11, 5900, c("Actual Sales", "NN Model Forecast", "Smoothing Forecast"), lty = c(2, 2, 3), lwd = c(1,2, 3), col = c("deeppink4", "blue", "black"), bty = "n")
```


  
  
  (b) Use this exponential smoothing model to forecast sales for each month in 1994. 
  
```{r}
fortified.ets.forecast
```
  
  (c) How does the neural network compare to the exponential smoothing model in terms of predictive performance in the training period? In the validation period?
  
  * **Neural Network**
  
```{r}
accuracy(fortifiedForecast, FortifiedsalesValid)
```

  * **ETS Model**
  
```{r}
accuracy(fortified.ets.forecast, FortifiedsalesValid)
```

Although the neural network fit the training set more closely than the optimal smoothing model chosen by the ETS() function, the Exponential Smoothing forecast slightly outperforms the NN forecast. This is an example where a NN overfits the data in the training set, therefore warranting caution in forecasting. 
