Exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
aus_livestock
dataset.ETS()
function to estimate the equivalent model
for simple exponential smoothing.pigs_victoria <- aus_livestock |>
filter(Animal == "Pigs", State == "Victoria")
fit <- pigs_victoria |>
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
components(fit) |>
left_join(fitted(fit))
## Joining with `by = join_by(Animal, State, .model, Month)`
## # A dable: 559 x 8 [1M]
## # Key: Animal, State, .model [1]
## # : Count = lag(level, 1) + remainder
## Animal State .model Month Count level remainder .fitted
## <fct> <fct> <chr> <mth> <dbl> <dbl> <dbl> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(… 1972 Jun NA 1.01e5 NA NA
## 2 Pigs Victoria "ETS(Count ~ error(… 1972 Jul 94200 9.86e4 -6447. 100647.
## 3 Pigs Victoria "ETS(Count ~ error(… 1972 Aug 105700 1.01e5 7130. 98570.
## 4 Pigs Victoria "ETS(Count ~ error(… 1972 Sep 96500 9.95e4 -4367. 100867.
## 5 Pigs Victoria "ETS(Count ~ error(… 1972 Oct 117100 1.05e5 17640. 99460.
## 6 Pigs Victoria "ETS(Count ~ error(… 1972 Nov 104600 1.05e5 -542. 105142.
## 7 Pigs Victoria "ETS(Count ~ error(… 1972 Dec 100500 1.04e5 -4468. 104968.
## 8 Pigs Victoria "ETS(Count ~ error(… 1973 Jan 94700 1.01e5 -8829. 103529.
## 9 Pigs Victoria "ETS(Count ~ error(… 1973 Feb 93900 9.85e4 -6785. 100685.
## 10 Pigs Victoria "ETS(Count ~ error(… 1973 Mar 93200 9.68e4 -5299. 98499.
## # ℹ 549 more rows
fc <- fit |>
forecast(h = 1)
fc
## # A fable: 1 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## # ℹ 2 more variables: Count <dist>, .mean <dbl>
components(fit) |> autoplot()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
fc <- fit |>
forecast(h = 4)
print(fc)
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>
mean = 95186.56
s = 87480760
lower_pi = mean - (1.96 * sqrt(87480760))
print(lower_pi)
## [1] 76854.45
upper_pi = mean + (1.96 * sqrt(87480760))
print(upper_pi)
## [1] 113518.7
What R plots:
fc |>
autoplot(pigs_victoria) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
guides(colour = "none")
The plot reflects the confidence interval computation.
global_economy
contains the annual Exports
from many countries. Select one country to analyse.norway_economy <- global_economy |>
filter(Country == "Norway")
norway_economy |>
autoplot(Exports) +
geom_point() +
labs(y = "% of GDP", title = "Total Norwegian exports")
There does not seem to be any general trend or seasonality here.
fit <- norway_economy |>
model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998998
##
## Initial states:
## l[0]
## 36.26082
##
## sigma^2: 6.3961
##
## AIC AICc BIC
## 347.1007 347.5452 353.2821
fc <- fit |>
forecast(h = 5)
print(fc)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 Norway ANN 2018
## 2 Norway ANN 2019
## 3 Norway ANN 2020
## 4 Norway ANN 2021
## 5 Norway ANN 2022
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>
fit |>
forecast(h = 5) |>
autoplot(norway_economy) +
labs(y = "% of GDP", title = "Exports: Norway")
fit |> accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norway ANN Training -0.0136 2.49 1.75 -0.244 4.56 0.983 0.991 0.0849
fit2 <- norway_economy |>
model(AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.0001000105
##
## Initial states:
## l[0] b[0]
## 35.26623 -0.07375855
##
## sigma^2: 6.6583
##
## AIC AICc BIC
## 351.3208 352.4746 361.6230
fc2 <- fit2 |>
forecast(h = 5)
print(fc2)
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 Norway AAN 2018
## 2 Norway AAN 2019
## 3 Norway AAN 2020
## 4 Norway AAN 2021
## 5 Norway AAN 2022
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>
fit2 |>
forecast(h = 5) |>
autoplot(norway_economy) +
labs(y = "% of GDP", title = "Exports: Norway (AAN)")
fit2 |> accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norway AAN Training 0.0768 2.49 1.77 -0.00463 4.62 0.995 0.993 0.0812
The ETS(A,A,N) model has a higher RMSE than ETS(A,N,N) model so ETS(A,N,N) is better.
# ETS(A,N,N) model
mean = 35.47229
s = 6.3961
lower_pi = mean - (1.96 * sqrt(s))
print(lower_pi)
## [1] 30.51535
upper_pi = mean + (1.96 * sqrt(s))
print(upper_pi)
## [1] 40.42923
ETS(A,A,N) model:
# ETS(A,A,N) model
mean = 35.10572
s = 6.3961
lower_pi = mean - (1.96 * sqrt(s))
print(lower_pi)
## [1] 30.14878
upper_pi = mean + (1.96 * sqrt(s))
print(upper_pi)
## [1] 40.06266
These confidence intervals are very close to each other which is reflected in the graphs.
global_economy
data
set using an ETS model. Experiment with the various options in the ETS()
function to see how much the forecasts change with damped trend, or with
a Box-Cox transformation. Try to develop an intuition of what each is
doing to the forecasts.[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
To account for population, I chose to adjust to give per-capita data:
global_economy_china<- global_economy |>
filter(Country == "China") |>
mutate(GDPPerCapita = GDP/Population)
autoplot(global_economy_china, GDPPerCapita) +
labs(title= "GDP per capita", y = "$US")
Looks like the original data has a positive trend without any seasonality. Let’s try different models for non-seasonality:
global_economy_china |>
model(
ses = ETS(GDPPerCapita ~ error("A") + trend("N") + season("N")),
holt = ETS(GDPPerCapita ~ error("A") + trend("A") + season("N")),
damped = ETS(GDPPerCapita ~ error("A") + trend("Ad") + season("N")),
multiplicative = ETS(GDPPerCapita ~ error("M") + trend("A") + season("N"))
) |>
forecast(h = 25) |>
autoplot(global_economy_china, level = NULL) +
labs(title = "GDP Per Capita - China") +
guides(colour = guide_legend(title = "Forecast"))
Multiplicative and holt seem to produce the same forecast, and are the best fit. Damped looks good as well, but it might be too soon to start the decrease of the rate of increase. SES definitely does not fit this graph.
Let’s look at RMSE values to see what might be best fit:
global_economy_china |>
stretch_tsibble(.init = 5) |>
model(
ses = ETS(GDPPerCapita ~ error("A") + trend("N") + season("N")),
holt = ETS(GDPPerCapita ~ error("A") + trend("A") + season("N")),
damped = ETS(GDPPerCapita ~ error("A") + trend("Ad") + season("N")),
multiplicative = ETS(GDPPerCapita ~ error("M") + trend("A") + season("N"))
) |>
forecast(h = 25) |>
accuracy(global_economy_china)
## Warning: 1 error encountered for holt
## [1] Not enough data to estimate this ETS model.
## Warning: 2 errors (1 unique) encountered for damped
## [2] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for multiplicative
## [1] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 25 observations are missing between 2018 and 2042
## # A tibble: 4 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damped China Test 1172. 2264. 1224. 43.4 45.1 7.78 7.52 0.886
## 2 holt China Test 1071. 2146. 1139. 37.2 42.5 7.24 7.13 0.886
## 3 multiplicative China Test 1207. 2286. 1231. 44.1 44.8 7.82 7.59 0.889
## 4 ses China Test 1449. 2591. 1449. 55.0 55.3 9.21 8.61 0.893
According to this method, holt is the winner with the lowest RMSE.
Let’s take a look at the model selection, minimizing the AICc:
fit <- global_economy_china |>
model(
ets = ETS(GDPPerCapita)
)
report(fit)
## Series: GDPPerCapita
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.3198561
##
## Initial states:
## l[0] b[0]
## 86.62461 2.771429
##
## sigma^2: 0.0093
##
## AIC AICc BIC
## 684.3176 685.4715 694.6198
fit |>
forecast (h = 25)
## # A fable: 25 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 China ets 2018
## 2 China ets 2019
## 3 China ets 2020
## 4 China ets 2021
## 5 China ets 2022
## 6 China ets 2023
## 7 China ets 2024
## 8 China ets 2025
## 9 China ets 2026
## 10 China ets 2027
## # ℹ 15 more rows
## # ℹ 2 more variables: GDPPerCapita <dist>, .mean <dbl>
The model selected is ETS(M,A,N). Since this takes in the likelihood, ETS(M,A,N) might be the best model.
aus_production
and forecast the next few years. Why is multiplicative seasonality
necessary here? Experiment with making the trend damped. Does it improve
the forecasts?Original data:
aus_production |>
autoplot(Gas)
As you can see there is a lot of variation here (and there is a trend), so we should focus on multiplicative seasonality:
aus_production |>
model(
aan = ETS(Gas ~ error("A") + trend("A") + season("M")),
aadm = ETS(Gas ~ error("A") + trend("Ad") + season("M")),
mam = ETS(Gas ~ error("M") + trend("A") + season("M")),
madm = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
) |>
forecast(h = 15) |>
autoplot(aus_production, level = NULL) +
labs(title = "Gas Austrailia") +
guides(colour = guide_legend(title = "Forecast"))
All these graphs are very close to each other. Let’s see what
ETS()
outputs:
fit <- aus_production |>
model(
ets = ETS(Gas)
)
report(fit)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
fit |>
forecast (h = 15)
## # A fable: 15 x 4 [1Q]
## # Key: .model [1]
## .model Quarter
## <chr> <qtr>
## 1 ets 2010 Q3
## 2 ets 2010 Q4
## 3 ets 2011 Q1
## 4 ets 2011 Q2
## 5 ets 2011 Q3
## 6 ets 2011 Q4
## 7 ets 2012 Q1
## 8 ets 2012 Q2
## 9 ets 2012 Q3
## 10 ets 2012 Q4
## 11 ets 2013 Q1
## 12 ets 2013 Q2
## 13 ets 2013 Q3
## 14 ets 2013 Q4
## 15 ets 2014 Q1
## # ℹ 2 more variables: Gas <dist>, .mean <dbl>
Now let’s look at RMSE:
aus_production |>
stretch_tsibble(.init = 5) |>
model(
mam = ETS(Gas ~ error("M") + trend("A") + season("M")),
madm = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
) |>
forecast(h = 15) |>
accuracy(aus_production)
## Warning: 5 errors (1 unique) encountered for mam
## [5] Not enough data to estimate this ETS model.
## Warning: 6 errors (1 unique) encountered for madm
## [6] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 15 observations are missing between 2010 Q3 and 2014 Q1
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 madm Test 1.47 13.8 9.24 3.66 10.5 1.66 1.82 0.858
## 2 mam Test 0.278 14.4 9.44 2.01 10.7 1.69 1.90 0.861
What’s interesting is that the RMSE shows that the damped version is the better method with a lower RMSE value.
Original data:
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
autoplot(Turnover)
Multiplicative seasonality is necessary for this series because the seasonal variation is changing with the level of the series (not constant).
Holt-Winters’ multiplicative method:
# Holt-Winters’ multiplicative method
fit <- myseries |>
model(
multiplicative = ETS(Turnover ~ error("M") + trend("A") +
season("M"))
)
fc <- fit |> forecast(h = "3 years")
fc |>
autoplot(myseries, level = NULL) +
labs(title = "Holt Winters multiplicative method") +
guides(colour = guide_legend(title = "Forecast"))
Holt-Winters’ multiplicative method with trend damped:
# Holt-Winters’ multiplicative method -- trend damped
fit <- myseries |>
model(
multiplicative_damp = ETS(Turnover ~ error("M") + trend("Ad") +
season("M"))
)
fc <- fit |> forecast(h = "3 years")
fc |>
autoplot(myseries, level = NULL) +
labs(title = "Holt Winters multiplicative method - Damped") +
guides(colour = guide_legend(title = "Forecast"))
Since there is a positive trend, based off the graphs, it appears the non-damped method is the better choice as it includes the trend.
myseries |>
stretch_tsibble(.init = 10) |>
model(
multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
multiplicative_damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
) |>
forecast(h = 3) |>
accuracy(myseries)
## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 9 errors (2 unique) encountered for multiplicative_damp
## [3] A seasonal ETS model cannot be used for this data.
## [6] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 3 observations are missing between 2019 Jan and 2019 Mar
## # A tibble: 2 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multiplic… Nort… Clothin… Test -0.0204 0.760 0.557 -0.618 6.27 0.636 0.656
## 2 multiplic… Nort… Clothin… Test 0.0349 0.771 0.568 0.00382 6.33 0.648 0.665
## # ℹ 1 more variable: ACF1 <dbl>
The multiplicative non-damped trend has a lower RMSE of 0.7600786 which makes it the better method.
components(fit) |> autoplot() + labs(title = "ETS(M,A,M) components")
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
Now let’s check the residuals for white noise. Since this has seasonality, we pick l = 2m. The seasonal period is 12 months (1 year), so l = 2 * 12 which is 24.
# Box-Pierce test
augment(fit) |> features(.innov, box_pierce, lag = 24)
## # A tibble: 1 × 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi… 22.9 0.526
# Ljung-Box test
augment(fit) |> features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi… 23.8 0.472
For both methods, the p value is larger than 0.05, so it’s likely to occur by chance. These residuals are not easily distinguished from white noise. We accept the white noise hypothesis.
myseries_train <- myseries |>
filter(year(Month) < 2011)
myseries_train |>
model(
multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
)
## # A mable: 1 x 3
## # Key: State, Industry [1]
## State Industry multiplicative
## <chr> <chr> <model>
## 1 Northern Territory Clothing, footwear and personal accessory r… <ETS(M,A,M)>
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train)) # test data
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
fit |> accuracy() # myseries_train
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multi… Trai… 0.0398 0.616 0.444 -0.0723 5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>
fc |> accuracy(myseries) # test data
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multipl… Nort… Clothin… Test -0.998 1.45 1.22 -8.33 9.84 1.33 1.19 0.785
The test data on this graphs looks way more accurate than the seasonal naïve graph. The RMSE for this test data is significantly smaller than the seasonal naïve as well.
Let’s first Box-Cox transform myseries
:
# Box-cox method
lambda_myseries <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
lambda_myseries
## [1] 0.08303631
myseries |>
autoplot(box_cox(Turnover, lambda_myseries))
Now let’s use the STL decomposition on this new series:
dcmp <- myseries |>
model(stl = STL(box_cox(Turnover, lambda_myseries)))
components(dcmp)
## # A dable: 369 x 9 [1M]
## # Key: State, Industry, .model [1]
## # : box_cox(Turnover, lambda_myseries) = trend + season_year + remainder
## State Industry .model Month box_cox(Turnover, la…¹ trend season_year
## <chr> <chr> <chr> <mth> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… stl 1988 Apr 0.862 0.989 -0.148
## 2 Northern T… Clothin… stl 1988 May 1.11 1.00 0.00543
## 3 Northern T… Clothin… stl 1988 Jun 0.994 1.02 0.0504
## 4 Northern T… Clothin… stl 1988 Jul 1.07 1.04 0.203
## 5 Northern T… Clothin… stl 1988 Aug 1.11 1.05 0.117
## 6 Northern T… Clothin… stl 1988 Sep 1.15 1.07 0.0667
## 7 Northern T… Clothin… stl 1988 Oct 1.19 1.08 0.0583
## 8 Northern T… Clothin… stl 1988 Nov 1.15 1.09 0.00542
## 9 Northern T… Clothin… stl 1988 Dec 1.52 1.11 0.370
## 10 Northern T… Clothin… stl 1989 Jan 1.04 1.12 -0.199
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda_myseries)`
## # ℹ 2 more variables: remainder <dbl>, season_adjust <dbl>
components(dcmp) |>
as_tsibble() |>
autoplot(`box_cox(Turnover, lambda_myseries)`, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(title = "Australian retail trade turnover")
Now let’s creating the training and test data (ETS model – the same M, A, M method):
dcmp_tsibble <- components(dcmp) |>
as_tsibble()
dcmp_tsibble <- dcmp_tsibble[,-3]
dcmp_tsibble
## # A tsibble: 369 x 8 [1M]
## # Key: State, Industry [1]
## State Industry Month box_cox(Turnover, la…¹ trend season_year remainder
## <chr> <chr> <mth> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… 1988 Apr 0.862 0.989 -0.148 0.0212
## 2 Norther… Clothin… 1988 May 1.11 1.00 0.00543 0.103
## 3 Norther… Clothin… 1988 Jun 0.994 1.02 0.0504 -0.0769
## 4 Norther… Clothin… 1988 Jul 1.07 1.04 0.203 -0.165
## 5 Norther… Clothin… 1988 Aug 1.11 1.05 0.117 -0.0547
## 6 Norther… Clothin… 1988 Sep 1.15 1.07 0.0667 0.0179
## 7 Norther… Clothin… 1988 Oct 1.19 1.08 0.0583 0.0478
## 8 Norther… Clothin… 1988 Nov 1.15 1.09 0.00542 0.0507
## 9 Norther… Clothin… 1988 Dec 1.52 1.11 0.370 0.0461
## 10 Norther… Clothin… 1989 Jan 1.04 1.12 -0.199 0.112
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda_myseries)`
## # ℹ 1 more variable: season_adjust <dbl>
dcmp_tsibble_train <- dcmp_tsibble |>
filter(year(Month) < 2011) |>
as_tsibble()
fit3 <- dcmp_tsibble_train |>
model(
multiplicative = ETS(season_adjust ~ error("M") + trend("A") + season("M"))
)
fc3 <- fit3 |>
forecast(new_data = anti_join(dcmp_tsibble, dcmp_tsibble_train)) # test data
## Joining with `by = join_by(State, Industry, Month, `box_cox(Turnover,
## lambda_myseries)`, trend, season_year, remainder, season_adjust)`
fc3 |> autoplot(dcmp_tsibble)
fit3 |> accuracy() # train
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… multi… Trai… -0.00902 0.0800 0.0599 -0.580 3.11 0.395 0.393
## # ℹ 1 more variable: ACF1 <dbl>
fc3 |> accuracy(dcmp_tsibble) # test data
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multipl… Nort… Clothin… Test -0.140 0.159 0.143 -4.90 5.00 0.941 0.779 0.619
The RMSE is now 0.159 which is lower than both previous RMSEs.
What’s interesting is that if you just do ETS, the method picked is (A,A,N), but the RMSE is higher than (M,A,M):
fit2 <- dcmp_tsibble_train |>
model(
ets = ETS(season_adjust)
)
report(fit2)
## Series: season_adjust
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.6352821
## beta = 0.0001000504
##
## Initial states:
## l[0] b[0]
## 1.01283 0.006490657
##
## sigma^2: 0.005
##
## AIC AICc BIC
## 88.69163 88.91635 106.73899
fit2 |>
forecast (h = 84) |>
accuracy(dcmp_tsibble)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets Northe… Clothin… Test -0.217 0.242 0.218 -7.61 7.63 1.43 1.19 0.820