library(fpp3)
library(tidyverse)
library(ggrepel)
library(seasonal)
library(fabletools)
library(corrplot)
library(GGally)
library(patchwork)
library(caret)
theme_set(theme_bw())

8.1 Consider the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

  1. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of \(\alpha\) and l0 , and generate forecasts for the next four months.
data(aus_livestock)
pig_livestock = aus_livestock |> filter(Animal == "Pigs", State == "Victoria")

pig_fit = pig_livestock |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(pig_fit)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07
pig_forecast = pig_fit |> forecast(h = 4)
pig_forecast |> hilo(level = 95) |> select(.mean, `95%`)

Based on the report above, the optimal values of \(\alpha\) is 0.32, and l0 is 100,646.6. The 4-month forecast has a point forecast of 95,186.56 pigs slaughtered each month for the next 4 months.

  1. Compute a 95% prediction interval for the first forecast using \(\hat{y}±1.96s\) where \(s\) is the standard deviation of the residuals. Compare your interval with the interval produced by R.
pig_res_std = sqrt(87480760) #Based on the report from the `pig_fit` above.
pig_low = pig_forecast$.mean[1] - 1.96*pig_res_std
pig_high = pig_forecast$.mean[1] + 1.96*pig_res_std

pig_low
## [1] 76854.45
pig_high
## [1] 113518.7

Looking at the distribution values from the pig_forecast tsibble in part a), the calculated 95% interval is (76,854.45, 113,518.70) compared to the values from the forecast function of (76854.79, 113518.3). Both values are within 0.4 pigs of each other.

8.5 Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

  1. Plot the Exports series and discuss the main features of the data.
data(global_economy)
japan_data = global_economy |> filter(Country == "Japan")

japan_data |>
  autoplot(Exports) +
  labs(title = "Japan Exports since 1960")

japan_data |>
  ACF(Exports, lag_max = 40) |>
  autoplot() +
  labs(title = "ACF Plot of Japan Exports since 1960")

Looking at the first plot above that shows the Exports for Japan plotted over time, there appears to be an overall positive trend. There is no seasonality since the data is yearly data. There does appear to be cyclicity within the observations. For example, in the year 1973, there’s a local minimum, followed by a large spike in the year 1974. After peaks in 1974 and 1976, there is a drop-off in exports for another local peak in 1978. A similar pattern occurs over the year 1978-1998, as well as from 2007-2015. To provide further evidence of this cyclicity, the ACF plot above was generated, which shows cyclic behavior. For example, the correlation is positive for the first 9 lags, but in decreasing magnitude. The correlation then becomes negative with increasing magnitude for 5 lags, then decreasing magnitude for 8 lags. The correlation then becomes positive again, with a similar pattern as the previous lags.

  1. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
japan_fit_ann = japan_data |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(japan_fit_ann)
## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9414732 
## 
##   Initial states:
##    l[0]
##  10.638
## 
##   sigma^2:  1.6245
## 
##      AIC     AICc      BIC 
## 267.6132 268.0577 273.7945
japan_forecast_ann = japan_fit_ann |>
  forecast(h = 10)

japan_forecast_ann |>
  autoplot(japan_data) +
  labs(title = "Japan Exports since 1960 with 10-Year Forecast\nUsing an ETS(A,N,N) Model")

  1. Compute the RMSE values for the training data.
japan_fit_ann |> accuracy() |> select(Country, .type, RMSE)
  1. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
japan_fit_aan = japan_data |>
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(japan_fit_aan)
## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9094125 
##     beta  = 0.0001000003 
## 
##   Initial states:
##      l[0]      b[0]
##  10.50228 0.1047452
## 
##   sigma^2:  1.6731
## 
##      AIC     AICc      BIC 
## 271.2140 272.3679 281.5163
japan_forecast_aan = japan_fit_aan |>
  forecast(h = 10)

japan_forecast_aan |>
  autoplot(japan_data) +
  labs(title = "Japan Exports since 1960 with 10-Year Forecast\nUsing an ETS(A,A,N) Model")

japan_fit_aan |> accuracy() |> select(Country, .type, RMSE)

Looking at the two models above, the RMSE marginally improves from about 1.263 with the ETS(A,N,N) method to about 1.259 with the ETS(A,A,N) method, suggesting the trend component of the model does not add much value to the model as a whole. Although there’s an overall positive trend going back to 1960, there are extended periods of time with decreases in exports, such as in the 80’s and 90’s. As a result, over a short period of time, it is difficult to say in confidence whether exports will continue to increase overall, or if they’ll experience a decrease in exports.

  1. Compare the forecasts from both methods. Which do you think is best?

The ETS(A,N,N) and ETS(A,A,N) models appear to have similar forecasts, with the ETS(A,A,N) having a slightly positive trend compared to the ETS(A,N,N) method. The plots show that the distributions are similar in magnitude. Even though there’s an overall positive trend in the plot, suggesting that the ETS(A,A,N) would be better, there are multiple times in the plot where the trend decreases, including periods exceeding 10 years. As a result, it’s difficult to say whether the trend will increase or decrease over the next 10 years being forecast. Therefore, I think the ETS(A,N,N) method is best.

  1. Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
japan_ann_rmse = japan_fit_ann |> accuracy() |> select(RMSE)
japan_ann_rmse = japan_ann_rmse$RMSE
japan_ann_error_sq = sqrt(japan_ann_rmse)
japan_ann_T = japan_data |> filter(!is.na(Exports)) |> nrow()
japan_ann_K = 3
japan_ann_M = 0
japan_ann_std_dev = sqrt((1/(japan_ann_T - japan_ann_K - japan_ann_M)) *
                           japan_ann_error_sq*japan_ann_T)
japan_ann_lower = japan_forecast_ann$.mean[1] - 1.96*japan_ann_std_dev
japan_ann_higher = japan_forecast_ann$.mean[1] + 1.96*japan_ann_std_dev

japan_ann_lower
## [1] 14.06977
japan_ann_higher
## [1] 18.33956
japan_forecast_ann |> hilo(level = 95) |> select(`95%`) |> head(1)

For the ETS(A,N,N) model, the calculated 95% interval based on the RMSE value was (14.07, 18.34), whereas the model produced a 95% interval of (13.71, 18.70). The calculated values are close to what the model calculated, but slightly narrower.

japan_aan_rmse = japan_fit_aan |> accuracy() |> select(RMSE)
japan_aan_rmse = japan_aan_rmse$RMSE
japan_aan_error_sq = sqrt(japan_aan_rmse)
japan_aan_T = japan_data |> filter(!is.na(Exports)) |> nrow()
japan_aan_K = 5
japan_aan_M = 0
japan_aan_std_dev = sqrt((1/(japan_aan_T - japan_aan_K - japan_aan_M)) *
                           japan_aan_error_sq*japan_aan_T)
japan_aan_lower = japan_forecast_aan$.mean[1] - 1.96*japan_aan_std_dev
japan_aan_higher = japan_forecast_aan$.mean[1] + 1.96*japan_aan_std_dev

japan_aan_lower
## [1] 14.2968
japan_aan_higher
## [1] 18.64419
japan_forecast_aan |> hilo(level = 95) |> select(`95%`) |> head(1)

For the ETS(A,A,N) model, the calculated 95% interval based on the RMSE value was (14.30, 18.64), whereas the model produced a 95% interval of (13.93, 19.01). The calculated values are close to what the model calculated, but, similar to the ETS(A,N,N) model, slightly narrower.

8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

china_data = global_economy |> filter(Country == "China")
china_data_model = china_data |> 
  model(ETS(GDP ~ error("A") + trend("N") + season("N")))
report(china_data_model)
## Series: GDP 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##         l[0]
##  67537060018
## 
##   sigma^2:  1.79001e+23
## 
##      AIC     AICc      BIC 
## 3344.888 3345.332 3351.069
china_data_forecast = china_data_model |> forecast(h = 20)
china_data_forecast |> autoplot(china_data) +
  labs(title = "Chinese GDP with 20 Year Forecast\nUsing an ETS(A,N,N) Model")

The above ETS(A,N,N) model is a naive model in that it uses the last value from the data as the point estimate for all forecast values. The AIC value is 3,344.888 and the AICc value is 3,345.332.

china_data_model = china_data |> 
  model(ETS(GDP ~ error("A") + trend("A") + season("N")))
report(china_data_model)
## Series: GDP 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998964 
##     beta  = 0.5518569 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778074 3288256684
## 
##   sigma^2:  3.87701e+22
## 
##      AIC     AICc      BIC 
## 3258.053 3259.207 3268.356
china_data_forecast = china_data_model |> forecast(h = 20)
china_data_forecast |> 
  autoplot(china_data) +
  labs(title = "Chinese GDP with 20 Year Forecast\nUsing an ETS(A,A,N) Model")

The above ETS(A,A,N) model generates a positively trending point estimate with an increase in the 95% prediction interval as the time-step increases. The AIC value is 3,258.053 and the AICc value is 3,259.207, which are both less than the ETS(A,N,N) model. This suggests the ETS(A,N,N) model results in a better forecast.

china_data_model = china_data |> 
  model(ETS(GDP ~ error("M") + trend("A") + season("N")))
report(china_data_model)
## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
##     beta  = 0.3119984 
## 
##   Initial states:
##         l[0]       b[0]
##  45713434615 3288256682
## 
##   sigma^2:  0.0108
## 
##      AIC     AICc      BIC 
## 3102.064 3103.218 3112.366
china_data_forecast = china_data_model |> forecast(h = 20)
china_data_forecast |> autoplot(china_data) +
  labs(title = "Chinese GDP with 20 Year Forecast\nUsing an ETS(M,A,N) Model")

The above ETS(M,A,N) model generates a positively trending point estimate with an increase in the 95% prediction interval as the time-step increases. This appears to have the same slope for the point estimate, but a much wider prediction interval when compared to the ETS(A,A,N) model. The AIC value is 3,102.064 and the AICc value is 3,103.218, which are both less than the ETS(A,N,N) and ETS(A,A,N) models. This suggests the ETS(M,A,N) model results in a better forecast.

china_data_model = china_data |> 
  model(ETS(GDP ~ error("M") + trend("Ad") + season("N")))
report(china_data_model)
## Series: GDP 
## Model: ETS(M,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.9998952 
##     beta  = 0.3366859 
##     phi   = 0.9799998 
## 
##   Initial states:
##         l[0]       b[0]
##  45713434615 3288256683
## 
##   sigma^2:  0.0113
## 
##      AIC     AICc      BIC 
## 3105.060 3106.707 3117.423
china_data_forecast = china_data_model |> forecast(h = 20)
china_data_forecast |> autoplot(china_data) +
  labs(title = "Chinese GDP with 20 Year Forecast\nUsing an ETS(M,Ad,N) Model")

The above ETS(M,Ad,N) model generates a positively trending point estimate with an increase in the 95% prediction interval as the time-step increases. The slope of the point estimate decreases as the time-step increases. The AIC value is 3,105.060 and the AICc value is 3,106.707, which are both higher than the ETS(M,A,N) model. This suggests the ETS(M,A,N) model results in a better forecast.

BoxCoxTrans(china_data$GDP)
## Box-Cox Transformation
## 
## 58 data points used to estimate Lambda
## 
## Input data summary:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.721e+10 1.455e+11 3.301e+11 1.970e+12 1.613e+12 1.224e+13 
## 
## Largest/Smallest: 259 
## Sample Skewness: 1.89 
## 
## Estimated Lambda: -0.2 
## With fudge factor, Lambda = 0 will be used for transformations

Looking at the output from the BoxCoxTrans function above, the estimated lambda for a Box-Cox transformation is -0.2. Since this is close to zero, a Box-Cox transformation was not applied for the analysis.

The seasonality component was not adjusted since the data is over yearly intervals, so it’s not possible to have a seasonal component.

Overall, the ETS(M,A,N) model resulted in the best model, based on the AIC and AICc values.

8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production |>
  autoplot(Gas) +
  labs(title = "Quarterly Australian Gas Production since 1956")

Looking at the time plot above for Gas production in Australia, multiplicative seasonality is needed because as time goes on, the magnitude of the peaks and valleys within the data increases.

gas_model = aus_production |>
  model(ETS(Gas ~ error("A") + trend("N") + season("M")))
report(gas_model)
## Series: Gas 
## Model: ETS(A,N,M) 
##   Smoothing parameters:
##     alpha = 0.7136896 
##     gamma = 0.2183068 
## 
##   Initial states:
##      l[0]      s[0]    s[-1]    s[-2]     s[-3]
##  6.087823 0.9250565 1.181541 1.087955 0.8054479
## 
##   sigma^2:  19.5837
## 
##      AIC     AICc      BIC 
## 1830.219 1830.753 1853.911
gas_forecast = gas_model |> forecast(h = 40)

gas_forecast |>
  autoplot(aus_production |> select(Quarter, Gas)) +
  labs(title = "Quarterly Australian Gas Production with 10 Year Forecast\nusing an ETS(A,N,M) Model")

Looking at the ETS(A,N,M) model above, the AIC is 1830.219, and the AICc is 1830.753. The forecast has seasonal behavior, with an increase in the prediction distribution as time increases.

gas_model = aus_production |>
  model(ETS(Gas ~ error("A") + trend("A") + season("M")))
report(gas_model)
## Series: Gas 
## Model: ETS(A,A,M) 
##   Smoothing parameters:
##     alpha = 0.6132245 
##     beta  = 0.007863848 
##     gamma = 0.2244134 
## 
##   Initial states:
##      l[0]      b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  3.620487 0.6098927 0.9796171 1.147619 1.075495 0.7972686
## 
##   sigma^2:  18.2237
## 
##      AIC     AICc      BIC 
## 1816.462 1817.328 1846.923
gas_forecast = gas_model |> forecast(h = 40)

gas_forecast |>
  autoplot(aus_production |> select(Quarter, Gas)) +
  labs(title = "Quarterly Australian Gas Production with 10 Year Forecast\nusing an ETS(A,A,M) Model")

Looking at the ETS(A,A,M) model above, the AIC is 1816.462, and the AICc is 1817.328, which are both lower than the ETS(A,N,M) model. This suggests this forecast is better. The forecast has seasonal behavior, with an increase in the prediction distribution as time increases. Unlike the ETS(A,N,M) model, however, the ETS(A,A,M) model shows a positive trend overall as time passes.

gas_model = aus_production |>
  model(ETS(Gas ~ error("M") + trend("A") + season("M")))
report(gas_model)
## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389
gas_forecast = gas_model |> forecast(h = 40)

gas_forecast |>
  autoplot(aus_production |> select(Quarter, Gas)) +
  labs(title = "Quarterly Australian Gas Production with 10 Year Forecast\nusing an ETS(M,A,M) Model")

Looking at the ETS(M,A,M) model above, the AIC is 1680.929, and the AICc is 1681.794, which are both lower than the ETS(A,A,M) model. This suggests this forecast is better. The forecast has seasonal behavior, with an increase in the prediction distribution as time increases. The overall magnitude of the prediction distribution is much larger than that of the ETS(A,A,M) model. Similar to the ETS(A,A,M) model, the ETS(M,A,M) model shows a positive trend overall as time passes.

gas_model = aus_production |>
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
report(gas_model)
## Series: Gas 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6489044 
##     beta  = 0.1551275 
##     gamma = 0.09369372 
##     phi   = 0.98 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]   s[-2]     s[-3]
##  5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
## 
##   sigma^2:  0.0033
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873
gas_forecast = gas_model |> forecast(h = 40)

gas_forecast |>
  autoplot(aus_production |> select(Quarter, Gas)) +
  labs(title = "Quarterly Australian Gas Production with 10 Year Forecast\nusing an ETS(M,Ad,M) Model")

Looking at the ETS(M,Ad,M) model above, the AIC is 1684.028, and the AICc is 1685.091, which are both higher than the ETS(M,A,M) model. This suggests this forecast is worse than the ETS(M,A,M) model. The forecast has seasonal behavior, with an increase in the prediction distribution as time increases. Similar to the ETS(M,A,M) model, the ETS(M,Ad,M) model shows a positive trend overall as time passes. However, the overall trend decreases in magnitude as time passes, which suggests the forecast is approaching a maximum.

Based on the analysis above, the best model for forecasting is the ETS(M,A,M) model. Adding damping to the trend component did not improve the performance of the forecast.

8.8 Recall your retail time series data (from Exercise 7 in Section 2.10).

set.seed(40194)
retail_data <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
  1. Why is multiplicative seasonality necessary for this series?
retail_data |>
  autoplot() +
  labs(title = "Monthly Turnover Data for Other Retailing since 1982")

Looking at the time plot above for turnover in “Other” retailing above, multiplicative seasonality is needed because as time goes on, the magnitude of the peaks and valleys within the data increases.

  1. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
retail_model_mam = retail_data |>
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
report(retail_model_mam)
## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.5913244 
##     beta  = 0.004072921 
##     gamma = 0.1786385 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  35.41983 0.3627867 0.9649705 0.9271432 0.9333664 1.533059 1.091166 1.019064
##     s[-6]     s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  0.929845 0.9369524 0.9069247 0.8789473 0.9544877 0.9240734
## 
##   sigma^2:  0.003
## 
##      AIC     AICc      BIC 
## 4451.136 4452.582 4520.649
retail_forecast_mam = retail_model_mam |> forecast(h = 24)
retail_forecast_mam |> autoplot(retail_data) +
  labs(title = "Monthly Turnover Data for Other Retailing with 2 Year Forecast\nUsing an ETS(M,A,M) Model")

retail_model_madm = retail_data |>
  model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
report(retail_model_madm)
## Series: Turnover 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.5936393 
##     beta  = 0.01262487 
##     gamma = 0.1780248 
##     phi   = 0.9799665 
## 
##   Initial states:
##      l[0]     b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  35.79148 0.159832 0.9747194 0.9267532 0.9342582 1.525581 1.100495 1.017534
##      s[-6]     s[-7]     s[-8]     s[-9]   s[-10]    s[-11]
##  0.9333706 0.9368169 0.9049451 0.8772463 0.951118 0.9171624
## 
##   sigma^2:  0.0031
## 
##      AIC     AICc      BIC 
## 4458.854 4460.475 4532.457
retail_forecast_madm = retail_model_madm |> forecast(h = 24)
retail_forecast_madm |> autoplot(retail_data) +
  labs(title = "Monthly Turnover Data for Other Retailing with 2 Year Forecast\nUsing an ETS(M,Ad,M) Model")

The Holt-Winters multiplicative method without damping has an AIC of 4451.136 and AICc of 4452.582. The Holt-Winters’ multiplicative method with damping has an AIC of 4458.854 and AICc of 4460.475. This suggests that damping does not improve the forecast.

  1. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
retail_model_mam |> accuracy() |> select(.model, RMSE)
retail_model_madm |> accuracy() |> select(.model, RMSE)

The RMSE for the ETS(M,A,M) method of 12.72 is slightly lower than that of the ETS(M,Ad,M) method of 12.75. As a result, I would prefer the modeling without damping.

  1. Check that the residuals from the best method look like white noise.
residuals(retail_model_mam) |> autoplot() +
  labs(title = "Monthly Residuals for Other Retailing Data\nUsing an ETS(M,A,M) Model")

Looking at the plot above, there does not appear to be any discernible trend or pattern, suggesting that the residuals look like white noise.

  1. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
retail_train = retail_data |> filter(Month < yearmonth("2011 Jan"))
retail_train_model = retail_train |>
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
report(retail_train_model)
## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.5839728 
##     beta  = 0.003621987 
##     gamma = 0.0001000053 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  36.08362 0.3143226 0.9514241 0.9006706 0.9224164 1.586292 1.082625 1.028908
##      s[-6]     s[-7]     s[-8]     s[-9]   s[-10]  s[-11]
##  0.9405473 0.9414431 0.9163446 0.8799936 0.944175 0.90516
## 
##   sigma^2:  0.0028
## 
##      AIC     AICc      BIC 
## 3184.441 3186.313 3249.781
retail_train_forecast <- retail_train_model |> 
  forecast(new_data = anti_join(retail_data, retail_train)) 

retail_train_forecast |> autoplot(retail_data) +
  labs(title = "Monthly Turnover Data for Other Retailing, with Forecast Data since 2011\nUsing an ETS(M,A,M) Model")

retail_train_forecast |> accuracy(retail_data) |> select(.model, .type, RMSE)

The above plot shows a time plot with all data, along with a forecast starting in 2011 for comparison. Looking at the forecast data, it appears that the actual data mostly falls within the 95% prediction interval. Additionally, when looking at the accuracy of the model for the test data, the RMSE value is about 73.93, which is lower than the RMSE value of 106.57 produced from the seasonal naive model used in a previous exercise. This suggests that the Holt-Winters multiplicative method without damping is a better model for this data than the seasonal naive model.

8.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

#Check what the best lambda for Box-Cox Transformation is
BoxCoxTrans(retail_data$Turnover)
## Box-Cox Transformation
## 
## 441 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    33.2    58.7   136.2   179.7   262.7   661.3 
## 
## Largest/Smallest: 19.9 
## Sample Skewness: 0.902 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations
#Apply log transformation since lambda = 0
retail_boxcox = retail_data |>
  mutate(Boxcox = log(Turnover))

#Apply STL model to the transformed data
retail_stl_model = retail_boxcox |> model(STL(Boxcox))

#Gather components of the STL model output
retail_season_adj = retail_stl_model |> components() |> select(-.model)

#Split data into training and test data
retail_ets_train = retail_season_adj |> filter(Month < yearmonth("2011 Jan"))
retail_ets_test = retail_season_adj |> filter(Month >= yearmonth("2011 Jan"))

#Apply ETS model to the seasonally adjusted training data
retail_ets_model = retail_ets_train |> model(ETS(season_adjust))
report(retail_ets_model)
## Series: season_adjust 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.6016089 
##     beta  = 0.0001000001 
## 
##   Initial states:
##      l[0]        b[0]
##  3.588573 0.006065729
## 
##   sigma^2:  0.002
## 
##        AIC       AICc        BIC 
## -115.72101 -115.54401  -96.50328
#Forecast the test data using the ETS Model
retail_ets_forecast <- retail_ets_model |> 
  forecast(new_data = retail_ets_test) 

#Transform forecast data back to original scale
retail_forecast_scale = retail_ets_forecast |>
  mutate(across(season_adjust:remainder, \(x) exp(x))) |>
  select(-Boxcox)

#Transform seasonally adjusted data back to original scale
retail_rescaled = retail_season_adj |>
  mutate(across(Boxcox:season_adjust, \(x) exp(x))) |>
  select(-season_adjust) |>
  rename("season_adjust" = Boxcox)

#Plot original data and the forecast
retail_forecast_scale |>
  autoplot(retail_rescaled) +
  labs(y = "Turnover",
       title = "Monthly Turnover Data for Other Retailing, with Forecast Data\nsince 2011 Using ETS(A,A,N) Model of Seasonally Adjusted Data")

#Check accuracy of the model on the test data
retail_forecast_scale |> accuracy(retail_rescaled) |> select(.model, .type, RMSE)

Above is the plot of the original data with the forecast of data starting in 2011 using an ETS model on the seasonally adjusted data. Based on the RMSE value above of 69.35, this is a better forecast than what was produced in the Holt-Winters multiplicative without damping method.