Hyndman: 8.1, 8.5, 8.6, 8.7, 8.8, 8.9

library(fpp3)

8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.

pigs <- aus_livestock %>% 
  filter(Animal=="Pigs", State=="Victoria")
fit <- pigs |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc <- fit |>
  forecast(h = 4)
report(fit)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

α=0.3221247 ℓ0=100646.6

Four month forecast:

fc
## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.
unpack_hilo(hilo(fc, 95) , "95%" )
## # A tsibble: 4 x 8 [1M]
## # Key:       Animal, State, .model [1]
##   Animal State  .model    Month             Count  .mean `95%_lower` `95%_upper`
##   <fct>  <fct>  <chr>     <mth>            <dist>  <dbl>       <dbl>       <dbl>
## 1 Pigs   Victo… "ETS(… 2019 Jan N(95187, 8.7e+07) 95187.      76855.     113518.
## 2 Pigs   Victo… "ETS(… 2019 Feb N(95187, 9.7e+07) 95187.      75927.     114446.
## 3 Pigs   Victo… "ETS(… 2019 Mar N(95187, 1.1e+08) 95187.      75042.     115331.
## 4 Pigs   Victo… "ETS(… 2019 Apr N(95187, 1.1e+08) 95187.      74195.     116179.
mean <- 95186.56
sd <- sqrt(87480760)
lower <- mean - 1.96*sd
upper <- mean + 1.96*sd
paste0("[",lower,", ",upper,"]")
## [1] "[76854.4546212935, 113518.665378707]"

8.5

usa_economy <- global_economy %>% 
  filter(Country == "United States")

usa_economy %>% autoplot(Exports)
## Warning: Removed 1 row containing missing values (`geom_line()`).

usa_economy <- usa_economy[complete.cases(usa_economy), ]
fit <- usa_economy |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc <- fit |>
  forecast(h = 20)
report(fit)
## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l[0]
##  4.89991
## 
##   sigma^2:  0.4142
## 
##      AIC     AICc      BIC 
## 180.0245 180.4861 186.1006
usa_economy |>
  stretch_tsibble(.init = 10) |>
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N")),
  ) |>
  forecast(h = 5) |>
  accuracy(usa_economy)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 5 observations are missing between 2017 and 2021
## # A tibble: 1 × 11
##   .model Country       .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>         <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES    United States Test  0.475  1.36  1.10  4.29  11.5  2.31  2.13 0.739
usa_economy |>
  stretch_tsibble(.init = 10) |>
  model(
    SES = ETS(Exports ~ error("A") + trend("A") + season("N")),
  ) |>
  forecast(h = 5) |>
  accuracy(usa_economy)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 5 observations are missing between 2017 and 2021
## # A tibble: 1 × 11
##   .model Country       .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>         <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES    United States Test  0.0156  1.52  1.19 -0.605  12.9  2.49  2.39 0.790

RMSE for an ANN model is 1.357 vs 1.523 for AAN. The ANN model is better because the RMSE is lower.

8.6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

china_ge <- global_economy %>% 
  filter(Country == "China")
train <- china_ge |>
  filter_index("2000" ~ "2017")
# Fit the models
china_fit <- train |>
  model(
    SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
    Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
    Damped = ETS(GDP ~ error("A") + trend("Ad") +
                   season("N"))
  )

china_fc <- china_fit |> forecast(h = 20)

china_fc |>
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(china_ge, "2018" ~ .),
    colour = "black"
  ) +
  labs(
    y = "Dollars",
    title = "Forecasts, China GDP"
  ) +
  guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = GDP`

* SES produced a naive forecast. It chose the last value for all future values. * You can see a slight curve for the Damped model. It is making the exponential shape less pronounced over time. It may plateau in the future.

8.7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_gas <- aus_production %>% select(c(Quarter, Gas))
fit <- aus_gas |>
  model(
    additive = ETS(Gas ~ error("A") + trend("A") +
                                                season("A")),
    multiplicative = ETS(Gas ~ error("M") + trend("A") +
                                                season("M")),
    Damped = ETS(Gas ~ error("A") + trend("Ad") +
               season("N"))
  )
fc <- fit |> forecast(h = "8 years")
fc |>
  autoplot(aus_gas, level = NULL) +
  labs(title="Australian Gas Production",
       y="Gas Production (petajoules)") +
  guides(colour = guide_legend(title = "Forecast"))

* A Damped model will not produce accurate forecasts because it will not account for the seasonality (no zigzags). * The additive and multiplicative model forecasts appear similar. They both accurately capture the seasonality.

8.8

Recall your retail time series data (from Exercise 7 in Section 2.10).

Why is multiplicative seasonality necessary for this series? Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer? Check that the residuals from the best method look like white noise. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

set.seed(50)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
fit <- myseries |>
  model(
    additive = ETS(Turnover ~ error("A") + trend("A") +
                                                season("A")),
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M")),
    Damped = ETS(Turnover ~ error("A") + trend("Ad") +
               season("N"))
  )
fc <- fit |> forecast(h = "8 years")
fc |>
  autoplot(myseries, level = NULL) +
  labs(title="Australian Retail Turnover",
       y="Turnover ($Million AUD)") +
  guides(colour = guide_legend(title = "Forecast"))

The seasonality changes proportional to the level of the series. A multiplicative model captures this pattern better than additive (no level change, but does have seasonality) and damped models.

One-step forecasts for Holt-winer’s multiplcative model and damped model

fit <- myseries %>% 
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M")),
    Damped = ETS(Turnover ~ error("A") + trend("Ad") +season("N"))
  )
fc <- fit |> forecast(h = "1 month")

fc |>
  autoplot(myseries, level = NULL) +
  labs(title="Australian Retail Turnover, One-Month Forecasts",
       y="Turnover ($Million AUD)") +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(fit)
## # A tibble: 2 × 12
##   State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Queen… Other r… multi… Trai… 0.0520  5.09  3.46 -0.493  5.51 0.558 0.581 0.155
## 2 Queen… Other r… Damped Trai… 1.61   17.8   9.50 -0.653 12.6  1.53  2.03  0.272

The multiplicative model has lower RMSE, so it’s better than the damped model.

Damped model

fit_damped <- myseries %>% 
  model(
    Damped = ETS(Turnover ~ error("A") + trend("Ad") +season("N"))
  )
fc_damped <- fit_damped |> forecast(h = "1 month")
fit_damped %>% gg_tsresiduals()

### Multiplicative model

fit_mult <- myseries %>% 
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M"))
  )
fc_mult <- fit_mult |> forecast(h = "1 month")
fit_mult %>% gg_tsresiduals()

The multiplicative model does look more like white noise, but it’s not a solid fit.

8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

lambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
myseries |>
  model(
    STL(box_cox(Turnover, lambda) ~ trend(window = 12) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

Seasonally-adjusted Turnover Data

dcmp <- myseries |>
  model(stl = STL(Turnover))
components(dcmp)
## # A dable: 441 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        Turnover = trend + season_year + remainder
##    State      Industry      .model    Month Turnover trend season_year remainder
##    <chr>      <chr>         <chr>     <mth>    <dbl> <dbl>       <dbl>     <dbl>
##  1 Queensland Other recrea… stl    1982 Apr     11.1  12.3     -1.30      0.0975
##  2 Queensland Other recrea… stl    1982 May     11.7  12.4     -0.278    -0.387 
##  3 Queensland Other recrea… stl    1982 Jun     11.5  12.4     -1.07      0.145 
##  4 Queensland Other recrea… stl    1982 Jul     13.1  12.5     -0.171     0.784 
##  5 Queensland Other recrea… stl    1982 Aug     13    12.6      0.0759    0.365 
##  6 Queensland Other recrea… stl    1982 Sep     13    12.6     -0.488     0.856 
##  7 Queensland Other recrea… stl    1982 Oct     12    12.7      0.264    -0.968 
##  8 Queensland Other recrea… stl    1982 Nov     13.2  12.8      0.639    -0.216 
##  9 Queensland Other recrea… stl    1982 Dec     16.2  12.8      5.53     -2.17  
## 10 Queensland Other recrea… stl    1983 Jan     12    12.9     -0.371    -0.551 
## # ℹ 431 more rows
## # ℹ 1 more variable: season_adjust <dbl>
components(dcmp) |>
  as_tsibble() |>
  autoplot(Turnover, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Turnover",
       title = "Turnover in Australian Retail")

adj <- components(dcmp) %>% select(c(Month, Turnover, season_adjust))
fit <- adj |>
  model(
    additive = ETS(Turnover ~ error("A") + trend("A") +
                                                season("A")),
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M")),
    Damped = ETS(Turnover ~ error("A") + trend("Ad") +
               season("N"))
  )
fc <- fit |> forecast(h = "8 years")
fc |>
  autoplot(adj, level = NULL) +
  labs(title="Australian Retail Turnover, ETL Forecasts",
       y="Turnover") +
  guides(colour = guide_legend(title = "Forecast"))