HW5

HW 5

Exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman

library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing.

pigs_victoria <- aus_livestock |>
  filter(Animal == "Pigs", State == "Victoria")

fit <- pigs_victoria |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

components(fit) |>
  left_join(fitted(fit))

## Joining with `by = join_by(Animal, State, .model, Month)`

## # A dable: 559 x 8 [1M]
## # Key:     Animal, State, .model [1]
## # :        Count = lag(level, 1) + remainder
##    Animal State    .model                  Month  Count  level remainder .fitted
##    <fct>  <fct>    <chr>                   <mth>  <dbl>  <dbl>     <dbl>   <dbl>
##  1 Pigs   Victoria "ETS(Count ~ error(… 1972 Jun     NA 1.01e5       NA      NA 
##  2 Pigs   Victoria "ETS(Count ~ error(… 1972 Jul  94200 9.86e4    -6447. 100647.
##  3 Pigs   Victoria "ETS(Count ~ error(… 1972 Aug 105700 1.01e5     7130.  98570.
##  4 Pigs   Victoria "ETS(Count ~ error(… 1972 Sep  96500 9.95e4    -4367. 100867.
##  5 Pigs   Victoria "ETS(Count ~ error(… 1972 Oct 117100 1.05e5    17640.  99460.
##  6 Pigs   Victoria "ETS(Count ~ error(… 1972 Nov 104600 1.05e5     -542. 105142.
##  7 Pigs   Victoria "ETS(Count ~ error(… 1972 Dec 100500 1.04e5    -4468. 104968.
##  8 Pigs   Victoria "ETS(Count ~ error(… 1973 Jan  94700 1.01e5    -8829. 103529.
##  9 Pigs   Victoria "ETS(Count ~ error(… 1973 Feb  93900 9.85e4    -6785. 100685.
## 10 Pigs   Victoria "ETS(Count ~ error(… 1973 Mar  93200 9.68e4    -5299.  98499.
## # ℹ 549 more rows

fc <- fit |>
  forecast(h = 1)
fc

## # A fable: 1 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                                                   Month
##   <fct>  <fct>    <chr>                                                    <mth>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## # ℹ 2 more variables: Count <dist>, .mean <dbl>

components(fit) |> autoplot()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

fc <- fit |>
  forecast(h = 4)
print(fc)

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                                                   Month
##   <fct>  <fct>    <chr>                                                    <mth>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>

Compute a 95% prediction interval for the first forecast using ^y ± 1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

mean = 95186.56
s = 87480760
lower_pi = mean - (1.96 * sqrt(87480760))
print(lower_pi)

## [1] 76854.45

upper_pi = mean + (1.96 * sqrt(87480760))
print(upper_pi)

## [1] 113518.7

What R plots:

fc |>
  autoplot(pigs_victoria) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  guides(colour = "none")

The plot reflects the confidence interval computation.

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

norway_economy <- global_economy |>
  filter(Country == "Norway")

Plot the Exports series and discuss the main features of the data.

norway_economy |>
  autoplot(Exports) +
  geom_point() +
  labs(y = "% of GDP", title = "Total Norwegian exports")

There does not seem to be any general trend or seasonality here.

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fit <- norway_economy |>
  model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
## 
##   Initial states:
##      l[0]
##  36.26082
## 
##   sigma^2:  6.3961
## 
##      AIC     AICc      BIC 
## 347.1007 347.5452 353.2821

fc <- fit |>
  forecast(h = 5)
print(fc)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country .model  Year
##   <fct>   <chr>  <dbl>
## 1 Norway  ANN     2018
## 2 Norway  ANN     2019
## 3 Norway  ANN     2020
## 4 Norway  ANN     2021
## 5 Norway  ANN     2022
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>

fit |>
  forecast(h = 5) |>
  autoplot(norway_economy) +
  labs(y = "% of GDP", title = "Exports: Norway")

Compute the RMSE values for the training data.

fit |> accuracy()

## # A tibble: 1 × 11
##   Country .model .type         ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>  <chr>      <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Norway  ANN    Training -0.0136  2.49  1.75 -0.244  4.56 0.983 0.991 0.0849

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fit2 <- norway_economy |>
  model(AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit2)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.0001000105 
## 
##   Initial states:
##      l[0]        b[0]
##  35.26623 -0.07375855
## 
##   sigma^2:  6.6583
## 
##      AIC     AICc      BIC 
## 351.3208 352.4746 361.6230

fc2 <- fit2 |>
  forecast(h = 5)
print(fc2)

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country .model  Year
##   <fct>   <chr>  <dbl>
## 1 Norway  AAN     2018
## 2 Norway  AAN     2019
## 3 Norway  AAN     2020
## 4 Norway  AAN     2021
## 5 Norway  AAN     2022
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>

fit2 |>
  forecast(h = 5) |>
  autoplot(norway_economy) +
  labs(y = "% of GDP", title = "Exports: Norway (AAN)")

fit2 |> accuracy()

## # A tibble: 1 × 11
##   Country .model .type        ME  RMSE   MAE      MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>  <chr>     <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Norway  AAN    Training 0.0768  2.49  1.77 -0.00463  4.62 0.995 0.993 0.0812

Compare the forecasts from both methods. Which do you think is best?

The ETS(A,A,N) model has a higher RMSE than ETS(A,N,N) model so ETS(A,N,N) is better.

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

# ETS(A,N,N) model
mean = 35.47229
s = 6.3961
lower_pi = mean - (1.96 * sqrt(s))
print(lower_pi)

## [1] 30.51535

upper_pi = mean + (1.96 * sqrt(s))
print(upper_pi)

## [1] 40.42923

ETS(A,A,N) model:

# ETS(A,A,N) model
mean = 35.10572
s = 6.3961
lower_pi = mean - (1.96 * sqrt(s))
print(lower_pi)

## [1] 30.14878

upper_pi = mean + (1.96 * sqrt(s))
print(upper_pi)

## [1] 40.06266

These confidence intervals are very close to each other which is reflected in the graphs.

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

To account for population, I chose to adjust to give per-capita data:

global_economy_china<- global_economy |>
  filter(Country == "China") |>
  mutate(GDPPerCapita = GDP/Population)
autoplot(global_economy_china, GDPPerCapita) +
  labs(title= "GDP per capita", y = "$US")

Looks like the original data has a positive trend without any seasonality. Let’s try different models for non-seasonality:

global_economy_china |>
  model(
    ses = ETS(GDPPerCapita ~ error("A") + trend("N") + season("N")),
    holt = ETS(GDPPerCapita ~ error("A") + trend("A") + season("N")),
    damped = ETS(GDPPerCapita ~ error("A") + trend("Ad") + season("N")),
    multiplicative = ETS(GDPPerCapita ~ error("M") + trend("A") + season("N"))
  ) |>
  forecast(h = 25) |>
  autoplot(global_economy_china, level = NULL) +
  labs(title = "GDP Per Capita - China") +
  guides(colour = guide_legend(title = "Forecast"))

Multiplicative and holt seem to produce the same forecast, and are the best fit. Damped looks good as well, but it might be too soon to start the decrease of the rate of increase. SES definitely does not fit this graph.

Let’s look at RMSE values to see what might be best fit:

global_economy_china |>
  stretch_tsibble(.init = 5) |>
  model(
    ses = ETS(GDPPerCapita ~ error("A") + trend("N") + season("N")),
    holt = ETS(GDPPerCapita ~ error("A") + trend("A") + season("N")),
    damped = ETS(GDPPerCapita ~ error("A") + trend("Ad") + season("N")),
    multiplicative = ETS(GDPPerCapita ~ error("M") + trend("A") + season("N"))
  ) |>
  forecast(h = 25) |>
  accuracy(global_economy_china)

## Warning: 1 error encountered for holt
## [1] Not enough data to estimate this ETS model.

## Warning: 2 errors (1 unique) encountered for damped
## [2] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for multiplicative
## [1] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 25 observations are missing between 2018 and 2042

## # A tibble: 4 × 11
##   .model         Country .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <fct>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damped         China   Test  1172. 2264. 1224.  43.4  45.1  7.78  7.52 0.886
## 2 holt           China   Test  1071. 2146. 1139.  37.2  42.5  7.24  7.13 0.886
## 3 multiplicative China   Test  1207. 2286. 1231.  44.1  44.8  7.82  7.59 0.889
## 4 ses            China   Test  1449. 2591. 1449.  55.0  55.3  9.21  8.61 0.893

According to this method, holt is the winner with the lowest RMSE.

Let’s take a look at the model selection, minimizing the AICc:

fit <- global_economy_china |>
  model(
    ets = ETS(GDPPerCapita)
  )
report(fit)

## Series: GDPPerCapita 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.3198561 
## 
##   Initial states:
##      l[0]     b[0]
##  86.62461 2.771429
## 
##   sigma^2:  0.0093
## 
##      AIC     AICc      BIC 
## 684.3176 685.4715 694.6198

fit |>
  forecast (h = 25)

## # A fable: 25 x 5 [1Y]
## # Key:     Country, .model [1]
##    Country .model  Year
##    <fct>   <chr>  <dbl>
##  1 China   ets     2018
##  2 China   ets     2019
##  3 China   ets     2020
##  4 China   ets     2021
##  5 China   ets     2022
##  6 China   ets     2023
##  7 China   ets     2024
##  8 China   ets     2025
##  9 China   ets     2026
## 10 China   ets     2027
## # ℹ 15 more rows
## # ℹ 2 more variables: GDPPerCapita <dist>, .mean <dbl>

The model selected is ETS(M,A,N). Since this takes in the likelihood, ETS(M,A,N) might be the best model.

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

Original data:

aus_production |>
  autoplot(Gas)

As you can see there is a lot of variation here (and there is a trend), so we should focus on multiplicative seasonality:

aus_production |>
  model(
    aan = ETS(Gas ~ error("A") + trend("A") + season("M")),
    aadm = ETS(Gas ~ error("A") + trend("Ad") + season("M")),
    mam = ETS(Gas ~ error("M") + trend("A") + season("M")),
    madm = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  ) |>
  forecast(h = 15) |>
  autoplot(aus_production, level = NULL) +
  labs(title = "Gas Austrailia") +
  guides(colour = guide_legend(title = "Forecast"))

All these graphs are very close to each other. Let’s see what ETS() outputs:

fit <- aus_production |>
  model(
    ets = ETS(Gas)
  )
report(fit)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

fit |>
  forecast (h = 15)

## # A fable: 15 x 4 [1Q]
## # Key:     .model [1]
##    .model Quarter
##    <chr>    <qtr>
##  1 ets    2010 Q3
##  2 ets    2010 Q4
##  3 ets    2011 Q1
##  4 ets    2011 Q2
##  5 ets    2011 Q3
##  6 ets    2011 Q4
##  7 ets    2012 Q1
##  8 ets    2012 Q2
##  9 ets    2012 Q3
## 10 ets    2012 Q4
## 11 ets    2013 Q1
## 12 ets    2013 Q2
## 13 ets    2013 Q3
## 14 ets    2013 Q4
## 15 ets    2014 Q1
## # ℹ 2 more variables: Gas <dist>, .mean <dbl>

Now let’s look at RMSE:

aus_production |>
  stretch_tsibble(.init = 5) |>
  model(
    mam = ETS(Gas ~ error("M") + trend("A") + season("M")),
    madm = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  ) |>
  forecast(h = 15) |>
  accuracy(aus_production)

## Warning: 5 errors (1 unique) encountered for mam
## [5] Not enough data to estimate this ETS model.

## Warning: 6 errors (1 unique) encountered for madm
## [6] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 15 observations are missing between 2010 Q3 and 2014 Q1

## # A tibble: 2 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 madm   Test  1.47   13.8  9.24  3.66  10.5  1.66  1.82 0.858
## 2 mam    Test  0.278  14.4  9.44  2.01  10.7  1.69  1.90 0.861

What’s interesting is that the RMSE shows that the damped version is the better method with a lower RMSE value.

Recall your retail time series data (from Exercise 7 in Section 2.10).

Why is multiplicative seasonality necessary for this series?

Original data:

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries |>
  autoplot(Turnover)

Multiplicative seasonality is necessary for this series because the seasonal variation is changing with the level of the series (not constant).

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

Holt-Winters’ multiplicative method:

# Holt-Winters’ multiplicative method
fit <- myseries |>
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M"))
  )
fc <- fit |> forecast(h = "3 years")
fc |>
  autoplot(myseries, level = NULL) +
  labs(title = "Holt Winters multiplicative method") +
  guides(colour = guide_legend(title = "Forecast"))

Holt-Winters’ multiplicative method with trend damped:

# Holt-Winters’ multiplicative method -- trend damped
fit <- myseries |>
  model(
    multiplicative_damp = ETS(Turnover ~ error("M") + trend("Ad") +
                                                season("M"))
  )
fc <- fit |> forecast(h = "3 years")
fc |>
  autoplot(myseries, level = NULL) +
  labs(title = "Holt Winters multiplicative method - Damped") +
  guides(colour = guide_legend(title = "Forecast"))

Since there is a positive trend, based off the graphs, it appears the non-damped method is the better choice as it includes the trend.

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

myseries |>
  stretch_tsibble(.init = 10) |>
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    multiplicative_damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  ) |>
  forecast(h = 3) |>
  accuracy(myseries)

## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: 9 errors (2 unique) encountered for multiplicative_damp
## [3] A seasonal ETS model cannot be used for this data.
## [6] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 3 observations are missing between 2019 Jan and 2019 Mar

## # A tibble: 2 × 12
##   .model     State Industry .type      ME  RMSE   MAE      MPE  MAPE  MASE RMSSE
##   <chr>      <chr> <chr>    <chr>   <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>
## 1 multiplic… Nort… Clothin… Test  -0.0204 0.760 0.557 -0.618    6.27 0.636 0.656
## 2 multiplic… Nort… Clothin… Test   0.0349 0.771 0.568  0.00382  6.33 0.648 0.665
## # ℹ 1 more variable: ACF1 <dbl>

The multiplicative non-damped trend has a lower RMSE of 0.7600786 which makes it the better method.

Check that the residuals from the best method look like white noise.

components(fit) |> autoplot() + labs(title = "ETS(M,A,M) components")

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

Now let’s check the residuals for white noise. Since this has seasonality, we pick l = 2m. The seasonal period is 12 months (1 year), so l = 2 * 12 which is 24.

# Box-Pierce test
augment(fit) |> features(.innov, box_pierce, lag = 24)

## # A tibble: 1 × 5
##   State              Industry                           .model bp_stat bp_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi…    22.9     0.526

# Ljung-Box test
augment(fit) |> features(.innov, ljung_box, lag = 24)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi…    23.8     0.472

For both methods, the p value is larger than 0.05, so it’s likely to occur by chance. These residuals are not easily distinguished from white noise. We accept the white noise hypothesis.

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries_train <- myseries |>
  filter(year(Month) < 2011)

myseries_train |>
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
  )

## # A mable: 1 x 3
## # Key:     State, Industry [1]
##   State              Industry                                     multiplicative
##   <chr>              <chr>                                               <model>
## 1 Northern Territory Clothing, footwear and personal accessory r…   <ETS(M,A,M)>

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train)) # test data

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

fit |> accuracy() # myseries_train

## # A tibble: 1 × 12
##   State       Industry .model .type     ME  RMSE   MAE     MPE  MAPE  MASE RMSSE
##   <chr>       <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multi… Trai… 0.0398 0.616 0.444 -0.0723  5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>

fc |> accuracy(myseries) # test data

## # A tibble: 1 × 12
##   .model   State Industry .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>    <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multipl… Nort… Clothin… Test  -0.998  1.45  1.22 -8.33  9.84  1.33  1.19 0.785

The test data on this graphs looks way more accurate than the seasonal naïve graph. The RMSE for this test data is significantly smaller than the seasonal naïve as well.

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

Let’s first Box-Cox transform myseries:

# Box-cox method
lambda_myseries <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
lambda_myseries

## [1] 0.08303631

myseries |>
  autoplot(box_cox(Turnover, lambda_myseries))

Now let’s use the STL decomposition on this new series:

dcmp <- myseries |>
  model(stl = STL(box_cox(Turnover, lambda_myseries)))
components(dcmp)

## # A dable: 369 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        box_cox(Turnover, lambda_myseries) = trend + season_year + remainder
##    State       Industry .model    Month box_cox(Turnover, la…¹ trend season_year
##    <chr>       <chr>    <chr>     <mth>                  <dbl> <dbl>       <dbl>
##  1 Northern T… Clothin… stl    1988 Apr                  0.862 0.989    -0.148  
##  2 Northern T… Clothin… stl    1988 May                  1.11  1.00      0.00543
##  3 Northern T… Clothin… stl    1988 Jun                  0.994 1.02      0.0504 
##  4 Northern T… Clothin… stl    1988 Jul                  1.07  1.04      0.203  
##  5 Northern T… Clothin… stl    1988 Aug                  1.11  1.05      0.117  
##  6 Northern T… Clothin… stl    1988 Sep                  1.15  1.07      0.0667 
##  7 Northern T… Clothin… stl    1988 Oct                  1.19  1.08      0.0583 
##  8 Northern T… Clothin… stl    1988 Nov                  1.15  1.09      0.00542
##  9 Northern T… Clothin… stl    1988 Dec                  1.52  1.11      0.370  
## 10 Northern T… Clothin… stl    1989 Jan                  1.04  1.12     -0.199  
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda_myseries)`
## # ℹ 2 more variables: remainder <dbl>, season_adjust <dbl>

components(dcmp) |>
  as_tsibble() |>
  autoplot(`box_cox(Turnover, lambda_myseries)`, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(title = "Australian retail trade turnover")

Now let’s creating the training and test data (ETS model – the same M, A, M method):

dcmp_tsibble <- components(dcmp) |>
  as_tsibble()

dcmp_tsibble <- dcmp_tsibble[,-3]
dcmp_tsibble

## # A tsibble: 369 x 8 [1M]
## # Key:       State, Industry [1]
##    State    Industry    Month box_cox(Turnover, la…¹ trend season_year remainder
##    <chr>    <chr>       <mth>                  <dbl> <dbl>       <dbl>     <dbl>
##  1 Norther… Clothin… 1988 Apr                  0.862 0.989    -0.148      0.0212
##  2 Norther… Clothin… 1988 May                  1.11  1.00      0.00543    0.103 
##  3 Norther… Clothin… 1988 Jun                  0.994 1.02      0.0504    -0.0769
##  4 Norther… Clothin… 1988 Jul                  1.07  1.04      0.203     -0.165 
##  5 Norther… Clothin… 1988 Aug                  1.11  1.05      0.117     -0.0547
##  6 Norther… Clothin… 1988 Sep                  1.15  1.07      0.0667     0.0179
##  7 Norther… Clothin… 1988 Oct                  1.19  1.08      0.0583     0.0478
##  8 Norther… Clothin… 1988 Nov                  1.15  1.09      0.00542    0.0507
##  9 Norther… Clothin… 1988 Dec                  1.52  1.11      0.370      0.0461
## 10 Norther… Clothin… 1989 Jan                  1.04  1.12     -0.199      0.112 
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda_myseries)`
## # ℹ 1 more variable: season_adjust <dbl>

dcmp_tsibble_train <- dcmp_tsibble |>
  filter(year(Month) < 2011) |>
  as_tsibble()

fit3 <- dcmp_tsibble_train |>
  model(
    multiplicative = ETS(season_adjust ~ error("M") + trend("A") + season("M"))
  )
  
fc3 <- fit3 |>
  forecast(new_data = anti_join(dcmp_tsibble, dcmp_tsibble_train)) # test data

## Joining with `by = join_by(State, Industry, Month, `box_cox(Turnover,
## lambda_myseries)`, trend, season_year, remainder, season_adjust)`

fc3 |> autoplot(dcmp_tsibble)

fit3 |> accuracy() # train

## # A tibble: 1 × 12
##   State    Industry .model .type       ME   RMSE    MAE    MPE  MAPE  MASE RMSSE
##   <chr>    <chr>    <chr>  <chr>    <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… multi… Trai… -0.00902 0.0800 0.0599 -0.580  3.11 0.395 0.393
## # ℹ 1 more variable: ACF1 <dbl>

fc3 |> accuracy(dcmp_tsibble) # test data

## # A tibble: 1 × 12
##   .model   State Industry .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>    <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multipl… Nort… Clothin… Test  -0.140 0.159 0.143 -4.90  5.00 0.941 0.779 0.619

The RMSE is now 0.159 which is lower than both previous RMSEs.

What’s interesting is that if you just do ETS, the method picked is (A,A,N), but the RMSE is higher than (M,A,M):

fit2 <- dcmp_tsibble_train |>
  model(
    ets = ETS(season_adjust)
  )
report(fit2)

## Series: season_adjust 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.6352821 
##     beta  = 0.0001000504 
## 
##   Initial states:
##     l[0]        b[0]
##  1.01283 0.006490657
## 
##   sigma^2:  0.005
## 
##       AIC      AICc       BIC 
##  88.69163  88.91635 106.73899

fit2 |>
  forecast (h = 84) |>
  accuracy(dcmp_tsibble)

## # A tibble: 1 × 12
##   .model State   Industry .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>   <chr>    <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets    Northe… Clothin… Test  -0.217 0.242 0.218 -7.61  7.63  1.43  1.19 0.820

HW5

Gillian McGovern

2025-03-04

HW 5