#Loading necessary libraries:

library(forecast)
library(tidytext)
library(stringr)
library(ggplot2)
library(ggfortify)
library(readr)
library(dplyr)
library(tidyr)
library(knitr)
library(pander)
library(zoo)

Chapter 5: Smoothing Methods

For this assignment I will be completing the assigned problems (Problems 2, 5-updated, and 8) from Chapter 5 in the Practical Time Series Forecasting with R textbook by, Galit Schmueli and Kenneth C. Lichtendahl Jr.

Problem 2: Relationship between Moving Average and Exponential Smoothing

Assume that we apply a moving average to a series, using a very short window span. If we wanted to achieve an equivalent results using simple exponential smoothing, what value should the smoothing constant take?

If we apply a moving average to a series with a very short window, in order to generate an equivalent result using a simple exponential smoothing method we would need to make the two smoothers; window width (\(w\)) and the smoothing constant (\(\alpha\)) approximately equal, by setting the window width of the moving average equal to \(w = \frac{2}{\alpha} - 1\)

Therfore, we could take the actual window width of the very short window span mentioned in this problem and determine the value for \(\alpha\) by tranforming the equation mentioned.

\(w = \frac{2}{\alpha} -1\) ==> \(\alpha = \frac{2}{w +1}\)

Then using the transformed equation for \(\alpha\) we can use the window width to determine the \(\alpha\) value to make the moving average approximately equal to the simple exponential smoothing value. As the book mentions, in both cases the parameters (w, \(\alpha\)) determine the importance of the most recent information over older information. Furthermore, when the trailing moving average, which is used for forecasting, is used and \(w=1\) this generates a naive forecast.

Problem 5 - Updated: Forecasting Department Store Sales

To begin question 5, I will import the file that contains data on the quarterly sales for a department store over a 6-year period.

#Importing the Department Store Sales Data

Store_Sales <- read.csv("DeptStoreSales.csv", stringsAsFactors = FALSE)
str(Store_Sales)
## 'data.frame':    24 obs. of  2 variables:
##  $ Quarter: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Sales  : int  50147 49325 57048 76781 48617 50898 58517 77691 50862 53028 ...
head(Store_Sales)
##   Quarter Sales
## 1       1 50147
## 2       2 49325
## 3       3 57048
## 4       4 76781
## 5       5 48617
## 6       6 50898
tail(Store_Sales)
##    Quarter  Sales
## 19      19  71486
## 20      20  92183
## 21      21  60800
## 22      22  64900
## 23      23  76997
## 24      24 103337
#Create a time series object from the Store Sales Data

Store_STS <- ts(Store_Sales$Sales, start=c(1,1), frequency = 4)

yrange = range(Store_STS)

Figure 1. Time Series of the Department Store Sales

plot(Store_STS, ylim = c(0,120000), ylab = "Quarterly Sales", xlab = "Year", main = "Time Series of Department Store Sales", bty = "l")

#Decompsing the time series
Components_SS <- decompose(Store_STS)
Components_SS
## $x
##     Qtr1   Qtr2   Qtr3   Qtr4
## 1  50147  49325  57048  76781
## 2  48617  50898  58517  77691
## 3  50862  53028  58849  79660
## 4  51640  54119  65681  85175
## 5  56405  60031  71486  92183
## 6  60800  64900  76997 103337
## 
## $seasonal
##          Qtr1        Qtr2        Qtr3        Qtr4
## 1 -10371.0688  -8603.2938   -175.8687  19150.2313
## 2 -10371.0688  -8603.2938   -175.8687  19150.2313
## 3 -10371.0688  -8603.2938   -175.8687  19150.2313
## 4 -10371.0688  -8603.2938   -175.8687  19150.2313
## 5 -10371.0688  -8603.2938   -175.8687  19150.2313
## 6 -10371.0688  -8603.2938   -175.8687  19150.2313
## 
## $trend
##       Qtr1     Qtr2     Qtr3     Qtr4
## 1       NA       NA 58134.00 58139.38
## 2 58519.62 58817.00 59211.38 59758.25
## 3 60066.00 60353.62 60697.00 60930.62
## 4 61921.00 63464.38 64749.38 66084.00
## 5 67548.62 69150.25 70575.62 71733.62
## 6 73031.12 75114.25       NA       NA
## 
## $random
##          Qtr1        Qtr2        Qtr3        Qtr4
## 1          NA          NA  -910.13125  -508.60625
## 2   468.44375   684.29375  -518.50625 -1217.48125
## 3  1167.06875  1277.66875 -1672.13125  -420.85625
## 4    90.06875  -742.08125  1107.49375   -59.23125
## 5  -772.55625  -515.95625  1086.24375  1299.14375
## 6 -1860.05625 -1610.95625          NA          NA
## 
## $figure
## [1] -10371.0688  -8603.2938   -175.8687  19150.2313
## 
## $type
## [1] "additive"
## 
## attr(,"class")
## [1] "decomposed.ts"

Figure 2. Decomposition of Time Series

plot(Components_SS)

(a) Which of the following methods would not be suitable for forecasting this series. Explain why or why not for each one.

From Figure 1 we can see that a slight trend may exist and that seasonality is likely present. Figure 2 shows us that this time series has both an upward trend and additive seasonality.

Moving average of raw series: Moving average of the raw series would not be suitable because it would not capture the trend or seasonality of the time series. Moving average should be used with stationary series

Moving average of deseasonalized series: Moving average of the deseasonalized series would be suitable if this series only exhibited seasonality and not trend. However, since trend exists for this series, this method would also not be suitable to forecast the series.

Simple exponential smoothing of the raw series: Similarly to the moving average method, simple exponential smoothing (SES) of the raw series is not suitable for this series because trend and seasonality exist. SES should be used when no trend or seasonality exists and therefore the series would only have level (\(Lt\)) and noise.

Double exponential smoothing of the raw series: Using Holt’s Exponential Smoothing or Double Exponential Smoothing would not be suitable for this time series either. This is again because seasonality exists within the series and this method does not account for seasonality but does account for trend in the forecasting model.

Holt-Winter’s exponential smoothing of the raw series: The Holt-Winter’s Exponential Smoothing method would be suitable for this time series because this method accounts for both trend and seasonality (with M seaons), as well as level and noise, in the forecast.

(b) A forecaster was tasked to generate forecasts for 4 quarters ahead. She therefore partitioned the data so that the last 4 quarters were designated as the validation period. The foreccaster approached the forecasting task by using multiplicative Holt-Winter’s exponential smoothing. Specifically, you should call the hw function with the parameter seasonal=“multiplicative”. Let the method pick the appropriate parameters for \[\alpha, \beta, \gamma\]

i. Run this method on the data. Request the forecasts on the validation period.(Note that the forecasted values for the validation set will be different than what the book shows.)*

In order to begin to forecast using the time series, the data must be split between the training and validation periods.

#Creating training and validation periods, using the last 4 quarters as the validation period.
validLen<- 4
TrainLeng <- length(Store_STS) - validLen
  
SSTS_Train <- window(Store_STS, start =c(1,1), end=c(1, TrainLeng))
SSTS_Valid <- window(Store_STS, start=c(1,TrainLeng+1), end=c(1, TrainLeng+validLen))

#Creating tables of the actual values in both the training period and the validation period.
pander(SSTS_Train)
Q1 Q2 Q3 Q4
50147 49325 57048 76781
48617 50898 58517 77691
50862 53028 58849 79660
51640 54119 65681 85175
56405 60031 71486 92183
pander(SSTS_Valid)
  Q1 Q2 Q3 Q4
6 60800 64900 76997 103337

Figure 3. Time Series of the Department Store Sales Showing Training and Validation Periods

yrange <- range(Store_STS)
plot(c(1, 7.5), c(48000,114500), type="n", xlab="Year",  ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")


lines(Store_STS, bty="l")

axis(1, at=seq(1,7.5,1), labels=format(seq(1,7.5,1)))


axis(2, at=seq(46000, 120000, 10000), labels=format(seq(48000, 120000, 10000)), las=0)

abline(v=6)
arrows(1, 105000, 6, 105000, code=3, length=0.1)
text(3, 108000, "Training")

abline(v=7)
arrows(6, 105000, 7, 105000, code=3, length=0.1)
text(6.5, 108000, "Validation")

#Performing the Holt-Winter's exponential smoothing method on the department store sales time series using multiplicative seasonality and letting the method choose the appropriate smoothing parameters.


HW_SSTS <- hw(SSTS_Train, seasonal = "multiplicative", h=4)
summary(HW_SSTS)
## 
## Forecast method: Holt-Winters' multiplicative method
## 
## Model Information:
## Holt-Winters' multiplicative method 
## 
## Call:
##  hw(y = SSTS_Train, h = 4, seasonal = "multiplicative") 
## 
##   Smoothing parameters:
##     alpha = 0.4032 
##     beta  = 0.1429 
##     gamma = 0.4549 
## 
##   Initial states:
##     l = 57401.8119 
##     b = 605.4045 
##     s=1.3012 0.9795 0.8614 0.8579
## 
##   sigma:  0.0258
## 
##      AIC     AICc      BIC 
## 372.3936 390.3936 381.3552 
## 
## Error measures:
##                   ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 246.631 1499.632 977.6487 0.3530739 1.665686 0.3137009
##                     ACF1
## Training set -0.07882461
## 
## Forecasts:
##      Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## 6 Q1       61334.90 59303.21  63366.58 58227.70  64442.09
## 6 Q2       64971.30 62529.36  67413.25 61236.67  68705.94
## 6 Q3       76718.11 73376.84  80059.37 71608.08  81828.13
## 6 Q4       99420.55 94372.29 104468.81 91699.90 107141.20

\(\alpha\) = 0.4032 \(\beta\) = 0.1429 \(\gamma\) = 0.4549

The point forecasts for the validation period are: Q21 - $61,334.90 Q22 - $64,971.30 Q23 - $76,718.11 Q24 - $99,420.55

Figure 4. Holt-Winter’s Exponential Smoothing Multiplicative Method on Department Store Sales

autoplot(HW_SSTS)

ii. Using the forecasts for the validation set that you came up with in i. above, compute the MAPE values for the forecasts of quarters 21-22.

Performance Evaluation for the Holt-Winter’s Forecast

accuracy(HW_SSTS, SSTS_Valid)
##                   ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 246.631 1499.632  977.6487 0.3530739 1.665686 0.3137009
## Test set     897.285 1981.638 1200.3859 0.7906391 1.285456 0.3851712
##                      ACF1 Theil's U
## Training set -0.078824613        NA
## Test set      0.009540924 0.1291442

Performance Evaluation for Quarter 21

Q21 <- window(Store_STS, start = c(1,21), end= c(1,21))
accuracy(HW_SSTS, Q21)
##                     ME      RMSE      MAE        MPE      MAPE      MASE
## Training set  246.6310 1499.6322 977.6487  0.3530739 1.6656858 0.3137009
## Test set     -534.8988  534.8988 534.8988 -0.8797678 0.8797678 0.1716345
##                     ACF1
## Training set -0.07882461
## Test set              NA

The MAPE for Q21 training set is ~1.67 and for the test set (validation period) is ~0.88.

Performance Evaluation for Quarter 22

Q22 <- window(Store_STS, start = c(1,22), end= c(1,22))
accuracy(HW_SSTS, Q22)
##                   ME     RMSE      MAE        MPE      MAPE       MASE
## Training set 246.631 1499.632 977.6487  0.3530739 1.6656858 0.31370087
## Test set     -71.303   71.303  71.3030 -0.1098659 0.1098659 0.02287919
##                     ACF1
## Training set -0.07882461
## Test set              NA

The MAPE for Q22 training set is ~1.67 and for the test set (validation period) is ~0.11.

Performance Evaluation for Quarters 21-22

Q21_22 <- window(Store_STS, start = c(1,21), end= c(1,22))
accuracy(HW_SSTS, Q21_22)
##                     ME      RMSE      MAE        MPE      MAPE       MASE
## Training set  246.6310 1499.6322 977.6487  0.3530739 1.6656858 0.31370087
## Test set     -303.1009  381.5763 303.1009 -0.4948169 0.4948169 0.09725683
##                     ACF1  Theil's U
## Training set -0.07882461         NA
## Test set     -0.50000000 0.01739097

The MAPE for Q21-Q22 training set is still ~1.67% and for the test set (validation period) is ~0.49%. The MAPE for Q22 was much lower compared to the MAPE for Q21. But overall the MAPE is very low for the test set (validation period) for Q21 and Q22 and then those quarters combined.

(c) The fit and the residuals were displayed in the book. Please reproduce them with R code. Using all the information from (b) and your generated figures, is this model suitable for forecasting quarters 21 and 22?

Reproducing the two graphs in Figure 5.11 on page 111 in the book.

Figure 5. Exponential Smoothing: Actual vs. Forecast (Training Data) for the Department Store Sales

plot(c(1, 6), c(0,100000), type="n", xlab="Year",  ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")


axis(1, at=seq(1,6,1), labels=format(seq(1,6,1)))


axis(2, at=seq(0, 120000, 10000), labels=format(seq(0, 120000, 10000)), las=0)


lines(SSTS_Train, bty="l", col="black", lwd=2.5)

lines(HW_SSTS$fitted, lwd = 2, col = "red")

legend(4,20000, c("Actual", "Forecast"), lwd=2, lty=1, col=c("black","red"), bty="n")
title("Exp Smoothing: Actual vs. Forecast (Training Data)")

Figure 6. Exponential Smoothing Forecast Errors (Training Data) for the Department Store Sales

plot(HW_SSTS$residuals, xlab = "Year", ylab ="Forecast error", main="Exp Smoothing Forecast Errors (Training Period)")

abline(h=0)

From looking at the information and figures in part b and c, it become clear that the Holt-Winter’s exponential smoothing method is suitable for forecasting quarters 21 and 22. The method captures the seasonality and trend of the data and the MAPEs were very low.

(d) Another analyst decided to take a much simpler approach, and instead of using exponential smoothing he used differencing. Use differencing to remove the trend and seasonal pattern. Which order works better: first removing trend and then seasonality or the the opposite order? Show a the progression of time plots as you difference the data and each final series to provide evidence in support of your answer.

First we will remove the seasonality and then remove the trend.

Figure 7. Differencing the Department Store Sales by Lag-4 then Lag-1

par(mfrow=c(2,2))

plot(Store_STS, ylab="Sales ($)", xlab="Year", bty="l", main="Department Store Sales")

plot(diff(Store_STS, lag=4), ylab="Lag-4", xlab="Year", bty="l", main="Lag 4 Difference")

plot(diff(Store_STS, lag=1), ylab="Lag-1", xlab="Year", bty="l", main="Lag 1 Difference")

lag4ThenLag1 <- diff(diff(Store_STS, lag=4),lag=1)
     plot(lag4ThenLag1, ylab="Lag-4 then Lag-1", xlab="Year", bty="l", main="Twice-Differenced (Lag-4, Lag-1)")

Second with will remove the trend and then remove the seasonality.

Figure 8. Differencing the Department Store Sales by Lag-1 then Lag-4

par(mfrow=c(2,2))

plot(Store_STS, ylab="Sales ($)", xlab="Year", bty="l", main="Department Store Sales")

plot(diff(Store_STS, lag=1), ylab="Lag-1", xlab="Year", bty="l", main="Lag 1 Difference")

plot(diff(Store_STS, lag=4), ylab="Lag-4", xlab="Year", bty="l", main="Lag 4 Difference")

lag1ThenLag4 <- diff(diff(Store_STS, lag=1),lag=4)
     plot(lag1ThenLag4, ylab="Lag-4 then Lag-1", xlab="Year", bty="l", main="Twice-Differenced (Lag-1, Lag-4)")

Looking at Figure 7 and Figure 8, it does not appear that there is a difference in the series if seasonality is removed before or after trend. The order seems to be irrelevant, and both come to the same double-differenced plots. The double-difference plot allows us to have a more stationary process to use for forecasting.

(e) Forecast quarters 21-22 using the average of the double-differenced series from (d). Remember to use only the training period (until quarter 20), and to adjust back for the trend and seasonal pattern.

Now we can use a mean forecast on the double differenced data using the function \(meanf()\). This shows us the point forecasts for the entire validation period (4 quarters).

pointForecasts <- meanf(diff(diff(SSTS_Train, lag=4), lag=1), h=4)

pointForecasts
##      Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 6 Q1          569.2 -2116.935 3255.335 -3714.114 4852.514
## 6 Q2          569.2 -2116.935 3255.335 -3714.114 4852.514
## 6 Q3          569.2 -2116.935 3255.335 -3714.114 4852.514
## 6 Q4          569.2 -2116.935 3255.335 -3714.114 4852.514

Now the point forecast needs to be converted back into a forecast for the original time series.

realForecasts <- vector()

for (i in 1:validLen) {
  if(i == 1) {
    realForecasts[i] <- pointForecasts$mean[i] + SSTS_Train[(TrainLeng+i)-validLen] + (SSTS_Train[TrainLeng] - SSTS_Train[TrainLeng - validLen])
  } else {
    realForecasts[i] <- pointForecasts$mean[i] + SSTS_Train[(TrainLeng+i)-validLen] + (realForecasts[i-1] - SSTS_Train[TrainLeng+i-1-validLen])
  }
}

realForecasts
## [1]  63982.2  68177.4  80201.6 101467.8

Using the double-differenced or twice-differenced series, we can see that the forecast for quarter 21 and 22 are $63,982.20 and $68,177.40 respectively.

Figure 9. Plotting the Forecasts for Quarters 21-24 Using Double Differenced Series

par(mfrow = c(1,1))
plot(realForecasts, type="l", bty="l")

(f) Compare the forecasts from (e) to the exponential smoothing forecasts found in (b). Which of the two forecasting methods would you choose? Explain.

To compare the forecasts from (e) to the exponential smoothing forecasts found in (b), I will first plot the double-differenced forecast from (e) against the actual values in the validation period.

Figure 10. Plotting the Forecasts the Actual and Double-Differenced Forecasts of the Department Store Sales

yrange = range(Store_STS)

plot(c(1,7), yrange, type="n", xlab="Year", ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")

lines(Store_STS, bty="l")

axis(1, at=seq(1,7.5,1), labels=format(seq(1,7.5,1)))


axis(2, at=seq(46000, 120000, 10000), labels=format(seq(48000, 120000, 10000)), las=0)

abline(v=6)
arrows(1, 105000, 6, 105000, code=3, length=0.1)
text(3, 108000, "Training")

abline(v=7)
arrows(6, 105000, 7, 105000, code=3, length=0.1)
text(6.5, 108000, "Validation")

lines(seq(6, 7-1/validLen, 1/validLen), realForecasts, col="red", lwd=2, lty=2)
legend(1,100000, c("Actual", "Double-Differenced Forecasts"),lwd=2, lty=c(1,2), col=c("black", "red"), bty="n")

Because we are only sending the 4 data points for the validation period forecasts, there will be no way to calculate metrics for the training period. The MAPE we calculated when using the Holt-Winter’s method was 1.67% and the MAPE for the double-differenced method was 4.06%. I would use the Holt-Winter’s method because it not only generated a lower MAPE but it is also easier to produce and allow the model to select it’s own parameters, instead of going through and differencing by both trend and seasonality and then converting the double-difference back into the forecast original series. It is a lot easier to just use the \(hw()\) function.

accuracy(realForecasts, SSTS_Valid)
##                ME    RMSE     MAE       MPE     MAPE        ACF1 Theil's U
## Test set -1948.75 2942.41 2883.35 -3.159236 4.063656 -0.07650809 0.1948234

(g) What is an even simpler approach that should be compared as a baseline? Complete that comparison.

As discussed in the previous assignments, the simpliest approach is the naive forecast, this approach should act as a baseline for all other forecasts to be compared to. For this data series, we know that seasonality exists in this time series we will use the seasonal naive method for the comparison.

snaive_SSTS <- snaive(SSTS_Train, validLen)
snaive_SSTS$mean
##    Qtr1  Qtr2  Qtr3  Qtr4
## 6 56405 60031 71486 92183
accuracy(snaive_SSTS, SSTS_Valid)
##                   ME     RMSE     MAE      MPE     MAPE     MASE
## Training set 2925.25 3878.784 3116.50 4.344358 4.737739 1.000000
## Test set     6482.25 7032.176 6482.25 8.170540 8.170540 2.079978
##                    ACF1 Theil's U
## Training set 0.54855812        NA
## Test set     0.01334402 0.4705349

Figure 11. Plotting Department Store Sales, Actual, Holt-Winter’s, and Seasonal Naive Forecasts

yrange = range(Store_STS)

plot(c(1,7), yrange, type="n", xlab="Year", ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")

lines(Store_STS, bty="l")
lines(snaive_SSTS$mean, lty=2, col="blue", lwd=2)
lines(HW_SSTS$mean, col="red", lty=3, lwd=2)

axis(1, at=seq(1,7.5,1), labels=format(seq(1,7.5,1)))


axis(2, at=seq(46000, 200000, 10000), labels=format(seq(48000, 200000, 10000)), las=0)

abline(v=6)
arrows(1, 100000, 6, 100000, code=3, length=0.1)
text(3, 104000, "Training")

abline(v=7)
arrows(6, 100000, 7, 100000, code=3, length=0.1)
text(6.5, 104000, "Validation")

legend(1,100000, c("Actual", "Holt-Winter's", "Seasonal Naive Forecast"),lwd=2, lty=c(1:3), col=c("black", "red", "blue"), bty="n")

Figure 11 shows us that although the seasonal naive forecast follows the appropriate pattern as the actual validation period, it is not as well suited to model the data as the Holt-Winer’s forecast is. Additionally, the MAPE for the seasonal naive forecast is not as low as the MAPE for the Holt-Winter’s.

Problem 8: Forecasting Australian Wine Sales

We will begin by importing the data from the Australian Wine spreadsheed. This dataset includes time plots of monthly sales of six types of Australian wines (red, rose, sweet white, dry white, sparkling, and forified) from 1980-1994.

Wine <- read.csv("AustralianWines.csv", stringsAsFactors = FALSE)
head(Wine)
##    Month Fortified  Red Rose sparkling Sweet.white Dry.white
## 1 Jan-80      2585  464  112      1686          85      1954
## 2 Feb-80      3368  675  118      1591          89      2302
## 3 Mar-80      3210  703  129      2304         109      3054
## 4 Apr-80      3111  887   99      1712          95      2414
## 5 May-80      3756 1139  116      1471          91      2226
## 6 Jun-80      4216 1077  168      1377          95      2725
tail(Wine)
##     Month Fortified Red Rose sparkling Sweet.white Dry.white
## 183              NA  NA             NA          NA        NA
## 184              NA  NA             NA          NA        NA
## 185              NA  NA             NA          NA        NA
## 186              NA  NA             NA          NA        NA
## 187              NA  NA             NA          NA        NA
## 188              NA  NA             NA          NA        NA
summary(Wine)
##     Month             Fortified         Red           Rose          
##  Length:188         Min.   :1154   Min.   : 464   Length:188        
##  Class :character   1st Qu.:2377   1st Qu.:1123   Class :character  
##  Mode  :character   Median :2894   Median :1559   Mode  :character  
##                     Mean   :2999   Mean   :1630                     
##                     3rd Qu.:3527   3rd Qu.:2106                     
##                     Max.   :5618   Max.   :3670                     
##                     NA's   :8      NA's   :8                        
##    sparkling     Sweet.white      Dry.white   
##  Min.   :1170   Min.   : 85.0   Min.   :1954  
##  1st Qu.:1605   1st Qu.:141.5   1st Qu.:2736  
##  Median :1896   Median :223.5   Median :3090  
##  Mean   :2431   Mean   :247.1   Mean   :3240  
##  3rd Qu.:2599   3rd Qu.:319.2   3rd Qu.:3685  
##  Max.   :7242   Max.   :662.0   Max.   :5725  
##  NA's   :8      NA's   :8       NA's   :8

(a) Which smoothing method would you choose if you had to choose the same method for forecasting all series? Why?

After looking at the wine sales, it become apparent that trend and seasonality exist for some of the wine sales, because of this I would use the Holt-Winter’s exponential smoothing method if I had to choose the same method for forecasting all the series.

(b) Fortified wine has the largest market share of the six types of wine. You are asked to focus on fortified wind sales alone and produce as accurate a forecast as possible for the next two month:

*Start by partitioning the data using the period until December 1993 as the training period.

First I will create a time series for just the Fortified Wine Sales (see Appendix for all ts for wine types). The time series for just fortified wine is called “Fort”. I will then create the training and validation periods.

Figure 12. Time Sreies of Fortified Wine Sales

Fort <- ts(Wine$Fortified, start=c(1980,1), frequency=12)
validLen2 <- 12
TrainLen <- length(Fort) - validLen2
autoplot(Fort)
## Warning: Removed 8 rows containing missing values (geom_path).

Fort_Train <- window(Fort, end=c(1993,12))
Fort_Valid <- window(Fort, start=c(1994,1))
Apply Holt-Winter’s exponential smoothing (with multiplicative seasonality to sales)
Fort_HW <- hw(Fort_Train, seasonal="multiplicative", h=12)
summary(Fort_HW)
## 
## Forecast method: Holt-Winters' multiplicative method
## 
## Model Information:
## Holt-Winters' multiplicative method 
## 
## Call:
##  hw(y = Fort_Train, h = 12, seasonal = "multiplicative") 
## 
##   Smoothing parameters:
##     alpha = 0.0539 
##     beta  = 5e-04 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 3977.0021 
##     b = -7.7979 
##     s=1.0924 1.0337 0.8934 0.958 1.2884 1.3862
##            1.1318 1.113 0.9259 0.8547 0.7177 0.6049
## 
##   sigma:  0.0869
## 
##      AIC     AICc      BIC 
## 2759.531 2763.611 2812.638 
## 
## Error measures:
##                     ME     RMSE     MAE      MPE     MAPE      MASE
## Training set -22.06217 280.3027 221.711 -1.45904 7.260039 0.7967865
##                    ACF1
## Training set 0.03407816
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1994       1326.700 1178.965 1474.436 1100.758 1552.642
## Feb 1994       1566.896 1392.151 1741.641 1299.647 1834.145
## Mar 1994       1857.505 1650.032 2064.979 1540.202 2174.809
## Apr 1994       2003.232 1779.128 2227.335 1660.494 2345.969
## May 1994       2396.889 2128.312 2665.466 1986.136 2807.642
## Jun 1994       2426.159 2153.851 2698.467 2009.700 2842.618
## Jul 1994       2957.688 2625.158 3290.219 2449.127 3466.249
## Aug 1994       2736.162 2428.002 3044.322 2264.872 3207.452
## Sep 1994       2025.046 1796.568 2253.523 1675.620 2374.472
## Oct 1994       1879.638 1667.178 2092.097 1554.709 2204.566
## Nov 1994       2164.681 1919.545 2409.818 1789.778 2539.585
## Dec 1994       2276.860 2018.526 2535.193 1881.772 2671.947
plot(Fort_HW)

The forecast for the next two months, first two months in Jan 1994 and Feb 1994, are $1,326.70 and $1,566.90 respectively.

accuracy(Fort_HW, Fort_Valid)
##                     ME     RMSE      MAE       MPE      MAPE      MASE
## Training set -22.06217 280.3027 221.7110 -1.459040  7.260039 0.7967865
## Test set     115.04537 337.3427 265.2853  3.739657 11.246490 0.9533845
##                    ACF1 Theil's U
## Training set 0.03407816        NA
## Test set     0.01658673 0.7213012

The MAPE for the Holt-Winter’s forecast is 7.26%.

(c) Create a plot for the residuals from the Holt-Winter’s exponential smoothing

**Using the \(checkresiduals()\) function, it is easier to see if the residuals resemble white noise, from the residuals it does not seem that they resemble white noise very well.

Figure 13. Residuals from the Holt-Winter’s Multiplicative Method on Fortified Wine Sales

checkresiduals(Fort_HW)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 39.543, df = 8, p-value = 3.897e-06
## 
## Model df: 16.   Total lags used: 24
i. Based on this plot, which of the following statements are reasonable?

December (month 12) are not captured well by the model: This statement is somewhat reasonable, since the ACF at lag 12 (December) seems to be higher and outside the blue range.

There is a strong correlation between sales on the same calendar month: This statement seems reasonable, since seasonality seems to be present when we looked at the original time series.

The model does not capture seasonality well: This is a reasonable statement, it doesn’t seem like the Holt-Winter’s method was able to generate a more stationary model, therefore the seasonality was not captured well.

We should first deseasonalize the data and then apply Holt-Winter’s exponential smoothing: Unreasonable. Holt-Winter’s exponential smoothing captures both trend and seasonality, so there would be no reason to deseasonalize the data before applying Holt-Winter’s.

ii. How can you handle the above effect with exponential smoothing?

THe effect from the exponential smoothing above could be handled using different smoothing constants, specifically for seasonality (\(\gamma\)). We could also look at other models, i.e. double-differencing smoothing method to attempt to get an even more stationary model to use for forecasting, then use that new model with the simple exponential smoothing method to see if the seasonality and trend were more controlled.

Appendix

Problem 5 - Updated: Forecasting Department Store Sales

Figure A. Time Series of Department Store Sales

autoplot(Store_STS)

Figure B. Moving Average Forecasts vs. Actual for Department Store Sales

trailingMA <- rollmean(SSTS_Train, k=4, align="right")

centeredMA <- ma(SSTS_Train, order=4)

yrange = range(Store_STS)

plot(c(1,8), yrange, type="n", xlab="Year", ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")

lines(Store_STS, bty="l")

axis(1, at=seq(1, 8, 1))

axis(2, at=seq(0000, 200000, 50000), labels=format(seq(0.0, 200.0, 50.0)), las=2)

lines(trailingMA, col="red", lty=2)
lines(centeredMA)

legend(1, 100000, c("Actual","Centered Moving Average", "Trailing Moving Average"), lty=c(1,1,2), col=c("black","black", "red"), bty="n")

We notice that the forecasts for the trailing moving average and centered moving average do not start until year 2, this is because there must be 4 periods (quarters) or 1 full year of data to compute the moving average because that is what we set our period to be equal to.

Computing the trailing moving average at time 2 (1st forecast point) verified in excel.

trailingMA[1]
## [1] 58325.25

Figure C. Time Series of the Department Store Sales Showing Training and Validation Periods and Moving Averages

yrange <- range(Store_STS)
plot(c(1, 7.5), c(48000,114500), type="n", xlab="Year",  ylab="Department Store Sales", bty="l", xaxt="n", yaxt="n")


lines(Store_STS, bty="l")

axis(1, at=seq(1,7.5,1), labels=format(seq(1,7.5,1)))

axis(2, at=seq(46000, 120000, 10000), labels=format(seq(48000, 120000, 10000)), las=0)

maForecastedValue <- tail(trailingMA,1)

maForecasts <- ts(rep(maForecastedValue, validLen), start=c(1,TrainLeng+1), freq=4)

lines(trailingMA, col="red")
lines(maForecasts, lwd=2, col="red", lty=2)


abline(v=6)
arrows(1, 105000, 6, 105000, code=3, length=0.1)
text(3, 108000, "Training")

abline(v=7)
arrows(6, 105000, 7, 105000, code=3, length=0.1)
text(6.5, 108000, "Validation")

accuracy(maForecasts, SSTS_Valid)
##               ME     RMSE     MAE      MPE     MAPE      ACF1 Theil's U
## Test set 6482.25 17815.72 13658.5 4.553724 16.09045 0.1722954  1.147438
ses_SS <- ses(SSTS_Train, h=4)
summary(ses_SS)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = SSTS_Train, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.1204 
## 
##   Initial states:
##     l = 59176.5154 
## 
##   sigma:  13229.28
## 
##      AIC     AICc      BIC 
## 445.5222 447.0222 448.5094 
## 
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE     MASE
## Training set 3043.423 13229.28 10409.03 1.209064 15.54216 3.339973
##                    ACF1
## Training set -0.0578796
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 6 Q1       66507.99 49553.99 83462.00 40579.08 92436.90
## 6 Q2       66507.99 49431.45 83584.54 40391.68 92624.31
## 6 Q3       66507.99 49309.79 83706.20 40205.60 92810.38
## 6 Q4       66507.99 49188.97 83827.01 40020.84 92995.15
print(ses_SS$residuals)
##           Qtr1         Qtr2         Qtr3         Qtr4
## 1  -9029.51535  -8763.92906     14.66791  19745.90119
## 2 -10796.45142  -7215.04136   1272.99532  20293.66569
## 3  -8979.66402  -5732.08221    779.33513  21496.46584
## 4  -9112.73863  -5536.12827   6692.68683  25380.56666
## 5  -6446.46965  -2044.00583   9657.19041  29191.00197
SS_ETS <- ets(SSTS_Train)
summary(SS_ETS)
## ETS(M,N,M) 
## 
## Call:
##  ets(y = SSTS_Train) 
## 
##   Smoothing parameters:
##     alpha = 0.9907 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 59697.8836 
##     s=1.3045 0.9951 0.8621 0.8383
## 
##   sigma:  0.0255
## 
##      AIC     AICc      BIC 
## 367.7322 377.0655 374.7023 
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 546.4286 1597.297 1338.208 0.819708 2.126239 0.4293945
##                   ACF1
## Training set -0.234507
SS_ETS_FORECAST <- forecast(SS_ETS, h=4)

accuracy(SS_ETS_FORECAST, SSTS_Valid)
##                     ME     RMSE      MAE      MPE     MAPE      MASE
## Training set  546.4286 1597.297 1338.208 0.819708 2.126239 0.4293945
## Test set     5829.9855 6830.684 5829.985 7.023908 7.023908 1.8706836
##                    ACF1 Theil's U
## Training set -0.2345070        NA
## Test set      0.2139347 0.4780883

Problem 8: Forecasting Australian Wine Sales

Figure D. Time Series of Austrailian Wine Sales

Wine_TS <- ts(Wine, start=c(1980,1), frequency=12)
## Warning in data.matrix(data): NAs introduced by coercion

## Warning in data.matrix(data): NAs introduced by coercion
autoplot(Wine_TS, na.rm=TRUE)

Figure E. Time Series of Austrailian Wine Sales by Wine Type

Wine_TS_Fort <- ts(Wine$Fortified)
autoplot(Wine_TS_Fort)
## Warning: Removed 8 rows containing missing values (geom_path).

Wine_TS_Red <- ts(Wine$Red)
autoplot(Wine_TS_Red)
## Warning: Removed 8 rows containing missing values (geom_path).

Wine_TS_SW <- ts(Wine$Sweet.white)
autoplot(Wine_TS_SW)
## Warning: Removed 8 rows containing missing values (geom_path).

Wine_TS_DW <- ts(Wine$Dry.white)
autoplot(Wine_TS_DW)
## Warning: Removed 8 rows containing missing values (geom_path).

Wine_TS_Spark <- ts(Wine$sparkling)
autoplot(Wine_TS_Spark)
## Warning: Removed 8 rows containing missing values (geom_path).