library(fpp3)
library(tidyverse)
library(forecast)
library(kableExtra)
library(reactable)
options(scipen = 999)
set.seed(123456)

1. Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

a. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of \(\alpha\) and \(\ell_0\), and generate forecasts for the next four months.

data("aus_livestock")
victoria_pigs <- aus_livestock |>
  filter(State == "Victoria", Animal == "Pigs")
victoria_pigs |>
  autoplot(Count) + 
  labs(title = "Count of Pigs Slaughtered in Victoria") +
  theme(plot.title = element_text(hjust = 0.5))

ses_model <- victoria_pigs |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc_ses <- ses_model |>
  forecast(h = 4)
report(ses_model)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07
ses_model |>
  forecast(h=4)
## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                        Month               Count  .mean
##   <fct>  <fct>    <chr>                         <mth>              <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\")… 2019 Jan  N(95187, 87480760) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\")… 2019 Feb  N(95187, 96558142) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\")… 2019 Mar N(95187, 105635524) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\")… 2019 Apr N(95187, 114712906) 95187.
report(ses_model)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

\(\alpha\): 0.3221247 \(\ell_0\): 100646.6

fc_ses |>
  autoplot(victoria_pigs) +
  geom_line(aes(y = .fitted), col="blue",
            data = augment(ses_model)) +
  labs(y="Count", title="Four-Month Forecast of Pigs Slaughtered in Victoria Using SES") +
  guides(color="none")

b. Compute a 95% prediction interval for the first forecast using \(\hat{y}\pm1.96s\) where \(s\) is the standard deviation of the residuals. Compare your interval with the interval produced by R.

95% Prediction Interval produced by R:

pred_int_r = unpack_hilo(hilo(fc_ses, 95) , "95%" )
head(pred_int_r[7:8], 1)
## # A tibble: 1 × 2
##   `95%_lower` `95%_upper`
##         <dbl>       <dbl>
## 1      76855.     113518.

Computing 95% Prediction Interval

reactable(accuracy(ses_model))

The standard deviation of the residuals is represented by the Root Mean Squared Error (RMSE). When using the accuracy() function, the RMSE value is 9336.338.

acc <- accuracy(ses_model)
mean <- fc_ses$.mean
lower_pi <- mean - (1.96 * acc$RMSE)
upper_pi <- mean + (1.96 * acc$RMSE)
lower_pi[1]
## [1] 76887.33
upper_pi[1]
## [1] 113485.8

5. Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

a. Plot the Exports series and discuss the main features of the data.

denmark_exports <- global_economy |>
  filter(Country == "Denmark")
denmark_exports |>
  autoplot(Exports) +
  labs(x="Year", y="Exports", title="Yearly Exports in Denmark") +
  theme(plot.title = element_text(hjust=0.5))

b. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

denmark_fit <- denmark_exports |>
    model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(denmark_fit)
## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998996 
## 
##   Initial states:
##      l[0]
##  32.31581
## 
##   sigma^2:  4.1743
## 
##      AIC     AICc      BIC 
## 322.3492 322.7936 328.5305
reactable(tidy(denmark_fit))
fc_denmark <- denmark_fit |>
  forecast(h = 10)
fc_denmark |>
  autoplot(denmark_exports) +
  geom_line(aes(y = .fitted), col="blue",
            data = augment(denmark_fit)) +
  labs(y="Exports", title="Four-Year Forecast of Exports in Denmark") +
  guides(color="none") +
  theme(plot.title = element_text(hjust=0.5))

c. Compute the RMSE values for the training data.

reactable(accuracy(denmark_fit))

The RMSE value of the training set is 2.00757267292722.

d. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

denmark_fit2 <- denmark_exports |>
    model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(denmark_fit2)
## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
##     beta  = 0.0001000284 
## 
##   Initial states:
##      l[0]      b[0]
##  32.46829 0.4192062
## 
##   sigma^2:  4.1654
## 
##      AIC     AICc      BIC 
## 324.1161 325.2699 334.4183
tidy(denmark_fit2)
## # A tibble: 4 × 4
##   Country .model                                                  term  estimate
##   <fct>   <chr>                                                   <chr>    <dbl>
## 1 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… alpha  1.00e+0
## 2 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… beta   1.00e-4
## 3 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… l[0]   3.25e+1
## 4 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… b[0]   4.19e-1
fc_denmark2 <- denmark_fit2 |>
  forecast(h = 10)
fc_denmark2 |>
  autoplot(denmark_exports) +
  geom_line(aes(y = .fitted), col="blue",
            data = augment(denmark_fit2)) +
  labs(y="Exports", title="Four-Year Forecast of Exports in Denmark Using Holt's Linear Method") +
  guides(color="none") +
  theme(plot.title = element_text(hjust=0.5))

e. Compare the forecasts from both methods. Which do you think is best?

denmark_compare <- denmark_exports |>
    model(
      `SES` = ETS(Exports ~ error("A") + trend("N") + season("N")),
      `Holt's Linear Method` = ETS(Exports ~ error("A") + trend("A") + season("N")))
reactable(accuracy(denmark_compare))

When comparing the MAE and RMSE values for each method, Holt’s Linear Method has a lower RMSE than the SES method, but has a slightly higher MAE than the SES method. Based on the lower RMSE, I would pick the Holt’s Linear Method as the best forecast method for this dataset.

f. Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

95% Prediction Interval produced by R for SES Method:

pred_int_r_denmark = unpack_hilo(hilo(fc_denmark, 95) , "95%" )
head(pred_int_r_denmark[6:7], 1)
## # A tibble: 1 × 2
##   `95%_lower` `95%_upper`
##         <dbl>       <dbl>
## 1        51.2        59.2

95% Prediction Interval using RMSE for SES Method:

ses_denmark_acc <- accuracy(denmark_fit)
fc_mean_ses <- fc_denmark$.mean
lower_pi_den <- fc_mean_ses - (1.96 * ses_denmark_acc$RMSE)
upper_pi_den <- fc_mean_ses + (1.96 * ses_denmark_acc$RMSE)
lower_pi_den[1]
## [1] 51.27509
upper_pi_den[1]
## [1] 59.14477

95% Prediction Interval produced by R for Holt’s Linear Method:

pred_int_r_denmark2 = unpack_hilo(hilo(fc_denmark2, 95) , "95%" )
head(pred_int_r_denmark2[6:7], 1)
## # A tibble: 1 × 2
##   `95%_lower` `95%_upper`
##         <dbl>       <dbl>
## 1        51.6        59.6

95% Prediction Interval using RMSE for SES Method:

ses_denmark_acc2 <- accuracy(denmark_fit2)
fc_mean_ses2 <- fc_denmark2$.mean
lower_pi_den2 <- fc_mean_ses2 - (1.96 * ses_denmark_acc2$RMSE)
upper_pi_den2 <- fc_mean_ses2 + (1.96 * ses_denmark_acc2$RMSE)
lower_pi_den2[1]
## [1] 51.76921
upper_pi_den2[1]
## [1] 59.48885

For each model, the 95% prediction interval produced by R is almost identical to the 95% prediction interval calculated using the RMSE value.

6. Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

china_gdp <- global_economy |>
  filter(Country == "China")
china_gdp |>
  autoplot(GDP) +
  labs(x="Year", y="GDP", title="GDP of China") +
  theme(plot.title = element_text(hjust = 0.5))

Using Holt’s Linear Method

china_fit <- china_gdp |>
    model(ETS(GDP ~ error("A") + trend("A") + season("N")))
report(china_fit)
## Series: GDP 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998964 
##     beta  = 0.5518569 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778074 3288256684
## 
##   sigma^2:  38770101526925682933760
## 
##      AIC     AICc      BIC 
## 3258.053 3259.207 3268.356
reactable(tidy(china_fit))
fc_china_ses <- china_fit |>
  forecast(h=20)
fc_china_ses |>
  autoplot(china_gdp) +
  geom_line(aes(y = .fitted), col="blue",
            data = augment(china_fit)) +
  labs(y="Exports", title="20-Year Forecast of China's GDP Using Holt's Linear Method") +
  guides(color="none") +
  theme(plot.title = element_text(hjust=0.5))

Using Box-Cox Transformation:

# Finding Lambda
lambda <- china_gdp |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)

Comparing Holt’s Linear Method, Box-Cox, and Damped Holt’s Linear Method:

china_fit_models <- china_gdp |>
  model(
    "Holt's Linear Method" = ETS(GDP ~ error('A') + trend('A') + season('N')),
    "Damped Holt's Linear Method" = ETS(GDP ~ error('A') + trend('Ad') + season('N')),
    "Box-Cox Transformation Method" = ETS(box_cox(GDP, lambda) ~ error('A') + trend('N') + season('N'))
  )
reactable(tidy(china_fit_models))
china_fit_models |>
  forecast(h = 20) |>
  autoplot(china_gdp, level = NULL) +
  labs(x="Year", y="GDP", title = "Comparison of Methods to Forecast 20 Years of China GDP") +
  guides(colour = guide_legend(title = "Forecasts"))

The forecasts produced by the Box-Cox Transformation and Holt’s Linear Method show more of an upward constant trend, while the Damped Holt’s Method has less of an upward trend.

7. Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

data("aus_production")
aus_production %>%
  autoplot(Gas) +
  labs(title = "Gas Production in Australia") +
  theme(plot.title = element_text(hjust = 0.5))

gas_fit <- aus_production |>
  model(
    "additive" = ETS(Gas ~ error("A") + trend("A") + season("A")),
    "multiplicative" = ETS(Gas ~ error("M") + trend("A") + season("M")),
    "Damped_Method" = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
    )
fc_gas <- gas_fit |> 
  forecast(h = "5 years")

fc_gas |>
  autoplot(aus_production, level = NULL) +
  labs(title="10-Year Forecast of Quarterly Gas Production in Australia",
       y="Gas Production") +
  guides(colour = guide_legend(title = "Forecast"))

reactable(accuracy(gas_fit))

Multiplicative seasonality is necessary because the seasonal variations are changing proportional to the level of the series. Additive seasonality would be necessary if the trend was constant through the series. However, the seasonal variations shown here does not have a constant trend, increasing in size over time.

When analyzing each of the forecasts, the Damped Holt’s Method has the lowest RMSE value, slightly outperforming the RMSE of the multiplicative method. However, the MAE value of the multiplicative method is slightly lower than the Damped Holt’s method, which indicates that the forecast model did not fully improve when applying the Damped Holt’s method.

8. Recall your retail time series data (from Exercise 7 in Section 2.10).

a. Why is multiplicative seasonality necessary for this series?

data("aus_retail")
set.seed(123456)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
  autoplot(Turnover) +
  labs(title = "Australia Retail Trade Turnover") +
  theme(plot.title = element_text(hjust = 0.5))

fit_retail <- myseries |>
  model(
    "Additive" = ETS(Turnover ~ error('A') + trend('A') + season('A')),
    "Multiplicative" = ETS(Turnover ~ error('M') + trend('A') + season('M'))
  )

fc_retail<- fit_retail |>
  forecast(h = "5 years")

fc_retail |> 
  autoplot(myseries, level = NULL) +
  labs(title = "5-Year Forecast of Retail Trade Turnover in Australia") +
  theme(plot.title = element_text(hjust = 0.5)) +
  guides(colour = guide_legend(title = "Forecasts"))

Multiplicative seasonality is necessary because the seasonal variations are not constant over time. If they were, additive seasonality would be the more appropriate method. There are changes to width and height of seasonal periods over time, and therefore multiplicative seasonality would be more appropriate and necessary for this series.

b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit_retail2 <- myseries |>
  model(
    "Holt-Winters' Multiplicative Method" = ETS(Turnover ~ error('M') + trend('A') + season('M')),
    "Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
  )

fc_retail2 <- fit_retail2 |>
  forecast(h = "10 years")

fc_retail2 |> 
  autoplot(myseries, level = NULL) +
  labs(title = "Australia Retail Trade Turnover ") +
  guides(colour = guide_legend(title = "Forecasts"))

c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

reactable(accuracy(fit_retail2))
accuracy(fit_retail2) |>
  select(.model, RMSE)
## # A tibble: 2 × 2
##   .model                               RMSE
##   <chr>                               <dbl>
## 1 Holt-Winters' Multiplicative Method  13.8
## 2 Damped Holt's Multiplicative Method  13.6

The RMSE for each method is relatively high, but the Damped Holt’s Multiplicative Method would be the preferred method with the lower RMSE value.

d. Check that the residuals from the best method look like white noise.

best_fit_method <- myseries |>
  model("Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))

best_fc_method <- best_fit_method |>
  forecast(h = 20)

best_fit_method |> 
  gg_tsresiduals()

The histogram plot appears to show a normal distribution of residuals. The innovation residuals plot shows some spikes around January 2000 and around January 1987. The ACF plot shows multiple spikes outside of the blue dash line. Taken altogether, the plots displayed may indicate that the residuals does not fully look like white noise.

e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries_train_retail <- myseries |>
  filter(year(Month) < 2011)

fit_train_retail <- myseries_train_retail |>
  model("SNAIVE" = SNAIVE(Turnover),
        "Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))

fc_train <- fit_train_retail |>
  forecast(h = "10 years")

fc_train |> 
  autoplot(myseries_train_retail, level = NULL) +
  labs(title = "Australian Retail Trade Turnover Forecast") +
  theme(plot.title = element_text(hjust = 0.5)) +
  guides(colour = guide_legend(title = "Forecasts"))

reactable(accuracy(fit_train_retail))

The Damped Holt’s Multiplicative Method performed better than the Seasonal Naive model, having lower RMSE and MAE values.

9. For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

lambda <- myseries_train_retail |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
myseries_train_retail |>
  model("STL Box-Cox Model" = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) |>
  components() |>
  autoplot() +
  ggtitle("STL Decomposition using Box-Cox Transformation of Australian Retail Trade Turnover") +
  theme(plot.title = element_text(hjust=0.5))

bc_turnover <- myseries_train_retail |>
  mutate(Turnover = box_cox(Turnover, lambda))
fit_retail3 <- bc_turnover |>
  model("STL Box-Cox Model" = STL(Turnover ~ season(window = "periodic"), robust=TRUE),
        "ETS Box-Cox Model" = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fit_ets <- bc_turnover |>
  model("Damped Holt's Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))


fc_fit_bc <- fit_ets |>
  forecast(h = "5 years")

fc_fit_bc |> 
  autoplot(bc_turnover, level = NULL) +
  labs(title = "ETS Box-Cox Model of Australian Retail Trade Turnover") +
  theme(plot.title = element_text(hjust=0.5)) +
  guides(colour = guide_legend(title = "Forecasts"))

reactable(rbind(accuracy(fit_retail2),accuracy(fit_retail3)))

When comparing the RMSE and MAE values of the transformed Box-Cox STL and ETS models with the Damped Holt’s Multiplicative Method, it’s clear that the Box-Cox Transformation has greatly improved the model, with much lower RMSE and MAE values for the STL and ETS models. The STL Box-Cox model performed slightly better than ther ETS Box-Cox model, having lower RMSE and MAE values.