library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.2     ✓ fma       2.4  
## ✓ forecast  8.13      ✓ expsmooth 2.3
## 
library(gridExtra)
library(ggplot2)

7.1 Consider the pigs series — the number of pigs slaughtered in Victoria each month.

a. Use the ses() function in R to find the optimal values of α and ℓ_0, and generate forecasts for the next four months.

pigs_forecast <- ses(pigs,h=4)
autoplot(pigs_forecast) +
  autolayer(fitted(pigs_forecast), series="Fitted") +
  ylab("Pigs slaughtered each month") + xlab("Month")

summary(pigs_forecast)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665 
## 
## Error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8

Answer: The optimal value for α is 0.2971 and for ℓ_0 77260.0561.

b. Compute a 95% prediction interval for the first forecast using y±1.96 s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

pigs_forecast
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8
pigs_forecast$mean[1]+(1.96*sd(pigs_forecast$residuals))
## [1] 118952.8
pigs_forecast$mean[1]-(1.96*sd(pigs_forecast$residuals))
## [1] 78679.97

Answer : Manually calculated confidence interval is : 78679.97<->118952.8 vs R: 78611.97<->119020.8 , so there is a small difference.

7.5 Data set books contains the daily sales of paperback and hardcover books at the same store. The task is to forecast the next four days’ sales for paperback and hardcover books.

a. Plot the series and discuss the main features of the data.

autoplot(books)

## Answer : Both series are upward trending (30 observations points - days).

b. Use the ses() function to forecast each series, and plot the forecasts.

paperback_fcst <- ses(books[,"Paperback"])
hardcover_fcst <- ses(books[,"Hardcover"])
autoplot(books)+autolayer(paperback_fcst,PI=FALSE,series="Paperback")+autolayer(hardcover_fcst,PI=FALSE, series="Hardcover")

c. Compute the RMSE values for the training data in each case.

sqrt(paperback_fcst$model$mse)
## [1] 33.63769
sqrt(hardcover_fcst$model$mse)
## [1] 31.93101

Answer : Paperback = 33.63769, Hardcover = 31.93101

7.5 We will continue with the daily sales of paperback and hardcover books in data set books.

a. Apply Holt’s linear method to the paperback and hardback series and compute four-day forecasts in each case.

paperback_fcst_holt <- holt(books[,"Paperback"],h=4)
hardcover_fcst_holt <- holt(books[,"Hardcover"],h=4)
paperback_fcst_holt
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       209.4668 166.6035 252.3301 143.9130 275.0205
## 32       210.7177 167.8544 253.5811 145.1640 276.2715
## 33       211.9687 169.1054 254.8320 146.4149 277.5225
## 34       213.2197 170.3564 256.0830 147.6659 278.7735
hardcover_fcst_holt
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334

b. Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt’s method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.

sqrt(paperback_fcst_holt$model$mse)
## [1] 31.13692
sqrt(hardcover_fcst_holt$model$mse)
## [1] 27.19358

Answer :Paperback = 31.13692, Hardcover = 31.13692 so lower than in the previous question. The simple method gives us a straight horizonatl line forecast but data shows a trend which Holt captures.

c. Compare the forecasts for the two series using both methods. Which do you think is best?

Answer: Holt is better as it is able to capture a trend, ses is not suitable for trended data.

autoplot(paperback_fcst,  series="ses")+
autolayer(paperback_fcst_holt,series="holt")

autoplot(hardcover_fcst, series="ses")+
  autolayer(hardcover_fcst_holt, series="holt")

d. Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors.

paperback_fcst_holt$mean[1]-(1.96*sqrt(paperback_fcst_holt$model$mse))
## [1] 148.4384
paperback_fcst_holt$mean[1]+(1.96*sqrt(paperback_fcst_holt$model$mse))
## [1] 270.4951
hardcover_fcst_holt$mean[1]-(1.96*sqrt(hardcover_fcst_holt$model$mse))
## [1] 196.8745
hardcover_fcst_holt$mean[1]+(1.96*sqrt(hardcover_fcst_holt$model$mse))
## [1] 303.4733

Answer: Paperback holt: 148.4384 <-> 270.4951 vs Hardcover : 196.8745 <-> 303.4733

e. Compare your intervals with those produced using ses and holt.

paperback_fcst$mean[1]-(1.96*sqrt(paperback_fcst$model$mse))
## [1] 141.1798
paperback_fcst$mean[1]+(1.96*sqrt(paperback_fcst$model$mse))
## [1] 273.0395
paperback_fcst
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1097 162.4882 251.7311 138.8670 275.3523
## 32       207.1097 161.8589 252.3604 137.9046 276.3147
## 33       207.1097 161.2382 252.9811 136.9554 277.2639
## 34       207.1097 160.6259 253.5935 136.0188 278.2005
## 35       207.1097 160.0215 254.1979 135.0945 279.1249
## 36       207.1097 159.4247 254.7946 134.1818 280.0375
## 37       207.1097 158.8353 255.3840 133.2804 280.9389
## 38       207.1097 158.2531 255.9663 132.3899 281.8294
## 39       207.1097 157.6777 256.5417 131.5099 282.7094
## 40       207.1097 157.1089 257.1105 130.6400 283.5793
paperback_fcst_holt$mean[1]-(1.96*sqrt(paperback_fcst_holt$model$mse))
## [1] 148.4384
paperback_fcst_holt$mean[1]+(1.96*sqrt(paperback_fcst_holt$model$mse))
## [1] 270.4951
paperback_fcst_holt
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       209.4668 166.6035 252.3301 143.9130 275.0205
## 32       210.7177 167.8544 253.5811 145.1640 276.2715
## 33       211.9687 169.1054 254.8320 146.4149 277.5225
## 34       213.2197 170.3564 256.0830 147.6659 278.7735
hardcover_fcst$mean[1]-(1.96*sqrt(hardcover_fcst$model$mse))
## [1] 176.9753
hardcover_fcst$mean[1]+(1.96*sqrt(hardcover_fcst$model$mse))
## [1] 302.1449
hardcover_fcst
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       239.5601 197.2026 281.9176 174.7799 304.3403
## 32       239.5601 194.9788 284.1414 171.3788 307.7414
## 33       239.5601 192.8607 286.2595 168.1396 310.9806
## 34       239.5601 190.8347 288.2855 165.0410 314.0792
## 35       239.5601 188.8895 290.2306 162.0662 317.0540
## 36       239.5601 187.0164 292.1038 159.2014 319.9188
## 37       239.5601 185.2077 293.9124 156.4353 322.6848
## 38       239.5601 183.4574 295.6628 153.7584 325.3618
## 39       239.5601 181.7600 297.3602 151.1625 327.9577
## 40       239.5601 180.1111 299.0091 148.6406 330.4795
hardcover_fcst_holt$mean[1]-(1.96*sqrt(hardcover_fcst_holt$model$mse))
## [1] 196.8745
hardcover_fcst_holt$mean[1]+(1.96*sqrt(hardcover_fcst_holt$model$mse))
## [1] 303.4733
hardcover_fcst_holt
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334

Answer: Intervals from R are wider than the ones calculated manually using RMSE.


7.7 For this exercise use data set eggs, the price of a dozen eggs in the United States from 1900–1993. Experiment with the various options in the holt() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each argument is doing to the forecasts.

[Hint: use h=100 when calling holt() so you can clearly see the differences between the various options when plotting the forecasts.]

Which model gives the best RMSE?

eggs_forecast <- holt(eggs, h=100, lambda=0.1,damped = TRUE)
autoplot(eggs_forecast)

accuracy(eggs_forecast)
##                    ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set -0.59707 26.48436 19.27263 -1.97799 9.912668 0.9506899 0.02833915

Answer: All things equal, setting damp trend to TRUE increases the error, setting it to FALSE decreases it. Lowering the lambda (from 0.9 down to 0.01) decreases the RMSE while damp trend is set to TRUE.

–

7.8 Recall your retail time series data (from Exercise 3 in Section 2.10).

a. Why is multiplicative seasonality necessary for this series?

retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349335T"],
  frequency=12, start=c(1982,4))
autoplot(myts)

Answer: Multiplicative term is required as seasonality is increasing with time.

b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

myts_mult_hw <- hw(myts, seasonal = "multiplicative", damped=TRUE)
myts_mult_hw_notd <- hw(myts, seasonal = "multiplicative", damped=FALSE)


autoplot(myts_mult_hw)

autoplot(myts_mult_hw_notd)

c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(myts_mult_hw)
##                  ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 2.9059 25.10059 18.74334 0.2366077 1.993423 0.3011601 -0.1394101
accuracy(myts_mult_hw_notd)
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.9212824 25.20381 18.77683 0.06856226 1.979315 0.3016982
##                    ACF1
## Training set -0.1217931

Answer: The model with the damped set to TRUE has a slightly lower RMSE. On that basis this is the preferred model. Though the non damped version visually appears to follow the trend better and I would rather take this model despite higher RMSE.

d. Check that the residuals from the best method look like white noise.

checkresiduals(myts_mult_hw_notd)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 250.64, df = 8, p-value < 2.2e-16
## 
## Model df: 16.   Total lags used: 24

Answer: Autocorrelation at various lags is present, the residuals do not follow white noise.

e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 8 in Section 3.7?

retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts1 <- ts(retaildata[,"A3349335T"],
  frequency=12, start=c(1982,4), end=c(2010,12))
autoplot(myts1)

myts1.train <- window(myts, end=c(2010,12))
myts1.test <- window(myts, start=2011)

myts1_hw<-hw(myts1.train,h=40,seasonal = "multiplicative", damped=FALSE)
accuracy(myts1_hw,myts1.test)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   2.658058 25.00649 18.25149  0.2228273 1.965971 0.2958851
## Test set     -63.670300 77.04807 65.91346 -2.9161237 3.023249 1.0685599
##                      ACF1 Theil's U
## Training set -0.006737509        NA
## Test set      0.447374940 0.5550438

Answer: The test set RMSE for the seasonal naive approach was 109.62545 vs 77.04807 for holt-winters non-damped. HW wins.


7.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts1 <- ts(retaildata[,"A3349335T"],
  frequency=12, start=c(1982,4), end=c(2010,12))
autoplot(myts1)

myts1.train <- window(myts, end=c(2010,12))
myts1.test <- window(myts, start=2011)

myts1_stl<-stlf(myts1.train,h=40,lambda=0)
accuracy(myts1_stl,myts1.test)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set   -1.644558  21.70898  16.34821 -0.1753908 1.775947 0.2650299
## Test set     -116.234664 133.54400 116.70221 -5.3133332 5.335006 1.8919246
##                     ACF1 Theil's U
## Training set -0.00379309        NA
## Test set      0.75425633 0.9640122
autoplot(myts1_stl)

Answer: The train set RMSE is 21.70898 is better than HW but the test set RMSE of 133.54400 is worse than holt.