library(fma)
library(forecast)
library(kableExtra)
library(corrplot)
library(fpp2)
library(plotly)
library(gridExtra)
library(readxl)
library(seasonal)
library(ggplot2)

Question 7.1

Consider the pigs series — the number of pigs slaughtered in Victoria each month.

  1. Use the ses() function in R to find the optimal values of α and ι0, and generate forecasts for the next four months.
pigs_data <- ses(pigs,h=4)

## Simple exponential smoothing 
pigs_data$model
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665
summary(pigs_data)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665 
## 
## Error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8
forecast(pigs_data)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8
  1. Compute a 95% prediction interval for the first forecast using y^± 1.96 s where s is the standard deviation of the residuals.
s <- sd(pigs_data$residuals)
pigs_data_mean <- pigs_data$mean[1]
lci <- round(pigs_data_mean - (1.96*s), 2)
print(lci)
## [1] 78679.97
uci <- round(pigs_data_mean + (1.96*s), 2)
print(uci)
## [1] 118952.8

Lower Confidence Interval: 7.86799710^{4} and Upper Confidence Interval: 1.189528410^{5}

Plot data with fitted values to forcast

autoplot(pigs_data) + autolayer(pigs_data$fitted)

Question 7.5

Data set books contains the daily sales of paperback and hardcover books at the same store. The task is to forecast the next four days’ sales for paperback and hardcover books.

data("books")
summary(books)
##    Paperback       Hardcover    
##  Min.   :111.0   Min.   :128.0  
##  1st Qu.:167.2   1st Qu.:170.5  
##  Median :189.0   Median :200.5  
##  Mean   :186.4   Mean   :198.8  
##  3rd Qu.:207.2   3rd Qu.:222.0  
##  Max.   :247.0   Max.   :283.0
head(books)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##   Paperback Hardcover
## 1       199       139
## 2       172       128
## 3       111       172
## 4       209       139
## 5       161       191
## 6       119       168

Plot the series and discuss the main features of the data.

autoplot(books) + ggtitle("Daily Book Sales") + xlab("Day") + ylab("Books Sales")

books[,"Paperback"]
## Time Series:
## Start = 1 
## End = 30 
## Frequency = 1 
##  [1] 199 172 111 209 161 119 195 195 131 183 143 141 168 201 155 243 225 167 237
## [20] 202 186 176 232 195 190 182 222 217 188 247
autoplot(books[,"Paperback"]) + ggtitle("Plot of Paperback")

gglagplot(books[,"Paperback"])

Acf(books[,"Paperback"], lag.max=150)

books[,"Hardcover"]
## Time Series:
## Start = 1 
## End = 30 
## Frequency = 1 
##  [1] 139 128 172 139 191 168 170 145 184 135 218 198 230 222 206 240 189 222 158
## [20] 178 217 261 238 240 214 200 201 283 220 259
autoplot(books[,"Hardcover"]) + ggtitle("Hardcover Plot")

gglagplot(books[,"Hardcover"])

Acf(books[,"Hardcover"], lag.max=150)

Use the ses() function to forecast each series, and plot the forecasts.

ses.paperback <- ses(books[, "Paperback"])
ses.hardcover <- ses(books[, "Hardcover"])

forecast(ses.paperback)
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1097 162.4882 251.7311 138.8670 275.3523
## 32       207.1097 161.8589 252.3604 137.9046 276.3147
## 33       207.1097 161.2382 252.9811 136.9554 277.2639
## 34       207.1097 160.6259 253.5935 136.0188 278.2005
## 35       207.1097 160.0215 254.1979 135.0945 279.1249
## 36       207.1097 159.4247 254.7946 134.1818 280.0375
## 37       207.1097 158.8353 255.3840 133.2804 280.9389
## 38       207.1097 158.2531 255.9663 132.3899 281.8294
## 39       207.1097 157.6777 256.5417 131.5099 282.7094
## 40       207.1097 157.1089 257.1105 130.6400 283.5793
autoplot(books[, "Paperback"], series = "Paperback") +
  autolayer(ses.paperback, series = "Paperback") +
  autolayer(books[, "Hardcover"], series = "Hardcover") +
  autolayer(ses.hardcover, series = "Hardcover", PI = FALSE) +
  ylab("Sales Amount") +
  ggtitle("SES Books Sales")

Compute the RMSE values for the training data in each case.

# RMSE for paperback
ses.paperback.rmse <- sqrt(mean(ses.paperback$residuals^2))
ses.paperback.rmse
## [1] 33.63769
# RMSE for hardcover
ses.hardcover.rmse <- sqrt(mean(ses.hardcover$residuals^2))
ses.hardcover.rmse
## [1] 31.93101
accuracy(ses.paperback)
##                    ME     RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
accuracy(ses.hardcover)
##                    ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887 -0.1417763

RMSE: Root-mean-squared-error of a fitted model - Calculates the root-mean-squared-error (RMSE) for objects of class nls, lm, glm, drc or any other models from which residuals can be extacted.

Question 7.6

We will continue with the daily sales of paperback and hardcover books in data set books.

A. Apply Holt’s linear method to the paperback and hardback series and compute four-day forecasts in each case.

fc_paperback <- ses(books[,1], h=4)
fc_hardcover <- ses(books[,2], h=4)

#forecast paperback
forecast(fc_paperback)
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1097 162.4882 251.7311 138.8670 275.3523
## 32       207.1097 161.8589 252.3604 137.9046 276.3147
## 33       207.1097 161.2382 252.9811 136.9554 277.2639
## 34       207.1097 160.6259 253.5935 136.0188 278.2005
# holt method
holt_Paperback <- holt(books[, "Paperback"], h = 4)
holt_Hardcover <- holt(books[, "Hardcover"], h = 4)

#plot
a<- autoplot(holt_Paperback) 

b<- autoplot(holt_Hardcover)
grid.arrange(a, b, nrow = 2)

B. Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt’s method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.

Holt’s method, RMSE values Paperback

# Holt’s method, RMSE values Paperback
round(accuracy(holt_Paperback)[2], 2)
## [1] 31.14

Holt’s method, RMSE values Hardcover

# Holt’s method, RMSE values Hardcover
round(accuracy(holt_Hardcover)[2], 2) 
## [1] 27.19

C. Compare the forecasts for the two series using both methods. Which do you think is best?

After comparing both models, Holt looks like the better model. This is after taking the RMSE values in consideration.

D. Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt.

Paperback

s <- sd(fc_paperback$residuals)
mean_fc <- fc_paperback$mean[1]

# SES, Lower Confidence Interval
round(mean_fc - (1.96*s), 2)
## [1] 141.6
# SES, Upper Confidence Interval
round(mean_fc + (1.96*s), 2)
## [1] 272.62
#from model
#SES, Lower Confidence Interval from formula
round(forecast(fc_paperback)$lower[1, "95%"],2)
##    95% 
## 138.87
# SES, Upper Confidence Interval from formula
round(forecast(fc_paperback)$upper[1, "95%"], 2)
##    95% 
## 275.35
s <- sd(holt_Paperback$residuals)
mean_fc <- holt_Paperback$mean[1]

# Holt, Lower Confidence Interval
round(mean_fc - (1.96*s), 2)
## [1] 147.84
# Holt, Upper Confidence Interval
round(mean_fc + (1.96*s), 2)
## [1] 271.09
#from model
# Holt, Lower Confidence Interval from formula
round(forecast(holt_Paperback)$lower[1, "95%"],2)
##    95% 
## 143.91
# Holt, Upper Confidence Interval from formula
round(forecast(holt_Paperback)$upper[1, "95%"], 2)
##    95% 
## 275.02

After using both ses and holt methods, the Lower and Upper intervals in paperback looks similar

Hardcover

s <- sd(fc_hardcover$residuals)
mean_fc <- fc_hardcover$mean[1]

# SES, Lower Confidence Interval
round(mean_fc - (1.96*s), 2)
## [1] 178.58
# SES, Upper Confidence Interval
round(mean_fc + (1.96*s), 2)
## [1] 300.54
#from model
# SES, Lower Confidence Interval from formula
round(forecast(fc_hardcover)$lower[1, "95%"],2)
##    95% 
## 174.78
# SES, Upper Confidence Interval from formula
round(forecast(fc_hardcover)$upper[1, "95%"], 2)
##    95% 
## 304.34
s <- sd(holt_Hardcover$residuals)
mean_fc <- holt_Hardcover$mean[1]

# Holt, Lower Confidence Interval
round(mean_fc - (1.96*s), 2)
## [1] 195.96
#from model
# Holt, Lower Confidence Interval from formula
round(forecast(holt_Hardcover)$lower[1, "95%"],2)
##    95% 
## 192.92
# Holt, Upper Confidence Interval from formula
round(forecast(holt_Hardcover)$upper[1, "95%"], 2)
##    95% 
## 307.43

There is a difference in the Lower Confidence Interval but looks similar for Upper Confidence Interval in Hardcover for both SES and Holt methods.

Question 7.7

For this exercise use data set eggs, the price of a dozen eggs in the United States from 1900–1993. Experiment with the various options in the holt() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each argument is doing to the forecasts.

[Hint: use h=100 when calling holt() so you can clearly see the differences between the various options when plotting the forecasts.]

fc1 <- holt(eggs, h=100)
fc2 <- holt(eggs, damped=TRUE, h=100)
fc3 <- holt(eggs, lambda="auto", h=100)
fc4 <- holt(eggs, damped=TRUE, lambda="auto", h=100)

a<- autoplot(fc1)
b<- autoplot(fc2)
c<- autoplot(fc3)
d<- autoplot(fc4)

grid.arrange(a,b,c,d, nrow = 2)

Which model gives the best RMSE?

# RMSE - Holt
round(accuracy(fc1)[2], 2)
## [1] 26.58
# RMSE - Holt damped
round(accuracy(fc2)[2], 2)
## [1] 26.54
# RMSE - Holt box-cox
round(accuracy(fc3)[2], 2)
## [1] 26.39
# RMSE - Holt damped box-cox
round(accuracy(fc4)[2], 2)
## [1] 26.53

After analysing the above data, RSME looks similar. Holts with Box-Cox transformation looks good when it comes to low RMSE levels

Question 7.8

Recall your retail time series data (from Exercise 3 in Section 2.10).

retaildata <- read_excel("data/retail.xlsx", skip = 1)

myts <- ts(retaildata[,"A3349335T"],
  frequency=12, start=c(1982,4))
autoplot(myts)

A. Why is multiplicative seasonality necessary for this series?

Seasonality variations are changing as the time increases. Multiplicative Seasonality would be necessary here

  • The general definition of additive or multiplicative seasonality is: level + seasonal indices, or level x seasonal indices. Effectively, with multiplicative seasonality the width of the seasonal pattern is proportional to the level. For additive seasonality it is independent. https://otexts.com/fpp2/holt-winters.html

B. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fc_myts <- hw(myts, seasonal="multiplicative", h=100)
fc_myts_d <- hw(myts, damped=TRUE, seasonal="multiplicative", h=100)

a<- autoplot(fc_myts)

b<- autoplot(fc_myts_d)

grid.arrange(a,b, ncol = 2)

C. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

# RMSE - Holt
rh <- round(accuracy(fc_myts)[2], 2)
rh
## [1] 25.2
# RMSE - Holt damped
rhd <- round(accuracy(fc_myts_d)[2], 2)
rhd
## [1] 25.1

RMSE - Holt is 25.2 and RMSE - Holt damped is 25.1. Both looks similar with slight edge for Hold method

D. Check that the residuals from the best method look like white noise.

checkresiduals(fc_myts_d)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 285.62, df = 7, p-value < 2.2e-16
## 
## Model df: 17.   Total lags used: 24

There does not appear to have any white noise

The residuals are the differences between the fitted model and the data. In a signal-plus-white noise model, if you have a good fit for the signal, the residuals should be white noise. Also ref: https://otexts.com/fpp2/residuals.html

E. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 8 in Section 3.7?

myts_train <- window(myts, end = c(2010, 12))
myts_test <- window(myts, start = 2011)

myts_train_d <- hw(myts_train, damped = TRUE, seasonal = "multiplicative")
  1. RMSE - Holt damped
accuracy(myts_train_d, myts_test)[,2]
## Training set     Test set 
##     25.68057     41.08034
myts_train_sn <- snaive(myts_train, h=100)
  1. RMSE - naïve approach
accuracy(myts_train_sn,myts_test)[,2]
## Training set     Test set 
##     72.20702    145.46662

Looking at the numbers above, the Holt-Winter’s Multiplicative Damped method outperformed seasonal naive forecast.

Question 7.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

retaildata <- read_excel("data/retail.xlsx", skip = 1)
retail_ts <- ts(retaildata[, "A3349873A"], frequency = 12, start = c(1982, 4))
autoplot(retail_ts) + ylab("Retail Sales")

# Window doesn't return a ts object so we need to redefine
retail_train <- ts(as.vector(retail_ts), start=c(1982,4), end=c(2010,12), frequency = 12)
# Get the optimal lambda
lambda <- BoxCox.lambda(retail_train)
# Preform a Box-Cox transformation
bc_retail_train <- BoxCox(retail_train, lambda = lambda)
# Preform the STL decomposition
stl_retail_train <- mstl(bc_retail_train)
## Create a seasonally adjusted series
sa_stl_retail_train <- stl_retail_train[,1] - stl_retail_train[,3]
# Reverse the Box-Cox transformation
sa_retail_train <- InvBoxCox(sa_stl_retail_train, lambda = lambda)
# Check the output
autoplot(retail_train) + autolayer(sa_retail_train)

get_retail_test_rmse <- function(model){
  accuracy(model, retail_test)[4]
}

retail_train <- window(retail_ts, end = c(2010, 12))
retail_test <- window(retail_ts, start = 2011)

Model <- c("Seasonal Naïve (Baseline)", 
           "SES",
           "Holt's Method",
           "Damped Holt's Method",
           "Holt-Winters Additive",
           "Holt-Winters Multiplicative",
           "Damped Holt-Winters Additive",
           "Damped Holt-Winters Multiplicative")

RMSE <- c(get_retail_test_rmse(snaive(retail_train)), 
          get_retail_test_rmse(ses(retail_train)),
          get_retail_test_rmse(holt(retail_train)),
          get_retail_test_rmse(holt(retail_train, damped = TRUE)),
          get_retail_test_rmse(hw(retail_train, seasonal = "additive")),
          get_retail_test_rmse(hw(retail_train, seasonal = "multiplicative")),
          get_retail_test_rmse(hw(retail_train, seasonal = "additive", damped = TRUE)),
          get_retail_test_rmse(hw(retail_train, seasonal = "multiplicative", damped = TRUE)))

rmse_df <- data.frame(Model, RMSE) 
ets_retail_train <- forecast(sa_retail_train)

rmse_df %>%
  mutate(Model = as.character(Model)) %>%
  rbind(c("ETS on STL Seasonally-Adjusted data", get_retail_test_rmse(ets_retail_train))) %>%
  kable() %>%
  kable_styling()
Model RMSE
Seasonal Naïve (Baseline) 71.443089238918
SES 20.3535314695857
Holt’s Method 24.1884222733178
Damped Holt’s Method 19.8612256038912
Holt-Winters Additive 77.4197424359606
Holt-Winters Multiplicative 70.1165860648465
Damped Holt-Winters Additive 78.150902954432
Damped Holt-Winters Multiplicative 81.946499394325
ETS on STL Seasonally-Adjusted data 90.7167828424442

From the above chart we see ETS model did not perform better than the Naïve model