library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of and l_0 , and generate forecasts for the next four months.
head(aus_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
pig_data <- aus_livestock %>%
filter(Animal == "Pigs",State == "Victoria")
pig_fit <- pig_data %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
pig_fc <- pig_fit %>%
forecast(h = 4, level = 95)
pig_fit %>% report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
pig_fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>
From the output above we can see that the optimal value for alpha was calculated to be about .322 and the optimal value for l0 is about 100646.
pig_data %>%
autoplot(Count) +
autolayer(pig_fc, series = "Forecast", PI = TRUE) +
ggtitle("Forecast of Pigs Slaughtered in Victoria") +
ylab("Number of Pigs") +
xlab("Year Month")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series` and `PI`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series` and `PI`
Compute a 95% prediction interval for the first forecast using ^y ±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
# Compute standard deviation of residuals
residuals <- augment(pig_fit)$.resid
s <- sd(residuals, na.rm = TRUE)
# Compute first forecast and its manual 95% prediction interval
first_forecast <- pig_fc$.mean[1]
manual_lower <- first_forecast - 1.96 * s
manual_upper <- first_forecast + 1.96 * s
# Print manual interval
print(paste("Manual 95% Prediction Interval:", manual_lower, "to", manual_upper))
## [1] "Manual 95% Prediction Interval: 76871.0124775157 to 113502.102384467"
# Extract first forecast's mean and R's computed 95% prediction interval
r_forecast <- pig_fc %>% slice(1) # Get first forecasted value
# Compute 95% prediction interval (2.5% and 97.5% quantiles)
r_lower <- quantile(r_forecast$Count, p = 0.025)
r_upper <- quantile(r_forecast$Count, p = 0.975)
# Print R's computed prediction interval
print(paste("R Computed 95% Prediction Interval:", r_lower, "to", r_upper))
## [1] "R Computed 95% Prediction Interval: 76854.7888896402 to 113518.325972343"
From the output above we can see that the R computed are slighlty larger but overall they are very similar.
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
Plot the Exports series and discuss the main features of the data.
head(global_economy)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
us_econ <- global_economy %>%
filter(Country == "United States")
head(us_econ)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States USA 1960 543300000000 NA 13.6 4.20 4.97 180671000
## 2 United States USA 1961 563300000000 2.30 13.7 4.03 4.90 183691000
## 3 United States USA 1962 605100000000 6.10 13.9 4.13 4.81 186538000
## 4 United States USA 1963 638600000000 4.40 14.0 4.09 4.87 189242000
## 5 United States USA 1964 685800000000 5.80 14.2 4.10 5.10 191889000
## 6 United States USA 1965 743700000000 6.40 14.4 4.24 4.99 194303000
us_econ %>%
autoplot(Exports) +
labs(title = "US Exports", x="Year", y="Exports of goods and services (% of GDP)")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
The plot above shows United States Exports of goods and services as a %
of GDP. We can see that there is a clear overall rise in exports as a
percent of GDP over the years with some cyclic patterns like a big drop
off in the 1980s and then another in the early 2000s and one more around
2020 which of course was when covid was.
Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
us_econ <- us_econ %>% filter(!is.na(Exports))
us_fit <- us_econ %>%
model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
us_forecast <- us_fit %>%
forecast(h=6)
us_forecast %>%
autoplot(us_econ) +
labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")
Compute the RMSE values for the training data.
accuracy(us_fit)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN Training 0.125 0.627 0.465 1.37 5.10 0.990 0.992 0.239
From the output above we can see the RMSE is 0.63.
Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
us_fit_comp <- us_econ %>%
model(
ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
)
us_forecast_comp <- us_fit_comp %>%
forecast(h=6)
us_forecast_comp %>%
autoplot(us_econ) +
labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")
accuracy(us_fit_comp)
## # A tibble: 2 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN Train… 0.125 0.627 0.465 1.37 5.10 0.990 0.992 0.239
## 2 United States AAN Train… 0.0108 0.615 0.466 -0.0570 5.19 0.992 0.973 0.238
From the output above we can see that the AAN model has a slightly lower RMSE which would suggest that it is the better fit to the data. Also, since there is a obvious overall positive trend to the data I think that the AAN model also has more merit for this data set because it has that trend component.
Compare the forecasts from both methods. Which do you think is best?
us_forecast_comp %>%
autoplot(us_econ, level = NULL) +
labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")
Comparing the results from both methods above I think that the AAN model is better. The ANN models predictions seem to be a constant last observation prediction while the AAN model is actually accounting for the positive overall trend in the data and giving a more meaningful forecast.
Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
acc_fit_comp <- accuracy(us_fit_comp)
ann_rmse <- acc_fit_comp$RMSE[1]
aan_rmse <- acc_fit_comp$RMSE[2]
ann_first_forecast <- us_forecast_comp %>%
filter(.model == "ANN") %>%
slice(1)
aan_first_forecast <- us_forecast_comp %>%
filter(.model == "AAN") %>%
slice(1)
ann_mean <- ann_first_forecast$.mean
aan_mean <- aan_first_forecast$.mean
lower_ann <- ann_mean - 1.96 * ann_rmse
upper_ann <- ann_mean + 1.96 * ann_rmse
lower_aan <- aan_mean - 1.96 * aan_rmse
upper_aan <- aan_mean + 1.96 * aan_rmse
# Print results
print(paste("Manual 95% PI for ETS(A,N,N):", lower_ann, "to", upper_ann))
## [1] "Manual 95% PI for ETS(A,N,N): 10.6616317815007 to 13.1197353879122"
print(paste("Manual 95% PI for ETS(A,A,N):", lower_aan, "to", upper_aan))
## [1] "Manual 95% PI for ETS(A,A,N): 10.8012985927343 to 13.2119283075914"
ann_r_lower <- quantile(ann_first_forecast$Exports, p = 0.025)
ann_r_upper <- quantile(ann_first_forecast$Exports, p = 0.975)
aan_r_lower <- quantile(aan_first_forecast$Exports, p = 0.025)
aan_r_upper <- quantile(aan_first_forecast$Exports, p = 0.975)
print(paste("R 95% PI for ETS(A,N,N):", ann_r_lower, "to", ann_r_upper))
## [1] "R 95% PI for ETS(A,N,N): 10.6395079134527 to 13.1418592559602"
print(paste("R 95% PI for ETS(A,A,N):", aan_r_lower, "to", aan_r_upper))
## [1] "R 95% PI for ETS(A,A,N): 10.7566652294566 to 13.2565616708691"
We can see from the output above that the prediction intervals that were manually computed are tighter than the ones R produced. The ones that R produced are a bit wider for both models with 10.66 to 13.12 for ANN manual and 10.64 to 13.14 for what R produced and we see similar behavior in the AAN model with a 10.80 to 13.21 for manual and 10.75 to 13.25 for Rs.
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
chinese_gdp <- global_economy %>%
filter(Country == "China") %>%
select(Year, GDP)
chinese_gdp %>%
autoplot(GDP) +
labs(title = "Chinese GDP", x="Year", y="Gross domestic product (in $USD February 2019)")
chinese_lambda <- chinese_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
china_fit_comp <- chinese_gdp %>%
model(
auto = ETS(),
ETS_Box = ETS(box_cox(GDP, chinese_lambda)),
AAN = ETS(GDP ~ error("A") + trend("A") + season("N")),
AADN = ETS(GDP ~ error("A")+ trend("Ad") + season("N"))
)
## Model not specified, defaulting to automatic modelling of the `GDP` variable.
## Override this using the model formula.
china_forecast_comp <- china_fit_comp %>%
forecast(h=30) %>%
mutate(.mean = if_else(.model == "ETS_Box", inv_box_cox(.mean, chinese_lambda), .mean))
china_forecast_comp %>%
autoplot(chinese_gdp, level = NULL) +
labs(title="Chinese GDP Forecast", x="Year", y="Gross domestic product (in $USD February 2019)")
accuracy(china_fit_comp)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto Training 4.13e10 2.00e11 9.66e10 2.11 7.72 0.446 0.477 0.268
## 2 ETS_Box Training -2.75e10 2.88e11 1.25e11 0.607 7.17 0.578 0.688 0.665
## 3 AAN Training 2.36e10 1.90e11 9.59e10 1.41 7.62 0.442 0.453 0.00905
## 4 AADN Training 2.95e10 1.90e11 9.49e10 1.62 7.62 0.438 0.454 -0.00187
From the output above we can see that the Box-cox transformed model is really taking an exponential trend while the other models give more reasonable more linear forecast.
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
aus_gas <- aus_production %>%
select(Quarter, Gas)
head(aus_gas)
## # A tsibble: 6 x 2 [1Q]
## Quarter Gas
## <qtr> <dbl>
## 1 1956 Q1 5
## 2 1956 Q2 6
## 3 1956 Q3 7
## 4 1956 Q4 6
## 5 1957 Q1 5
## 6 1957 Q2 7
aus_gas %>%
autoplot(Gas) +
labs(title = "Austrialian Gas production in petajoules" , x="Year Quarter", y="Gas Production in petajoules")
aus_gas_fit_comp <- aus_gas %>%
model(
auto = ETS(),
additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
aDamped = ETS(Gas ~ error("A") + trend("Ad") + season("A")),
mDamped = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
## Model not specified, defaulting to automatic modelling of the `Gas` variable.
## Override this using the model formula.
aus_gas_forecast_comp <- aus_gas_fit_comp %>%
forecast(h=20)
aus_gas_forecast_comp %>%
autoplot(aus_gas, level = NULL) +
labs(title="Austrialian Gas production in petajoules", x="Year Quarter", y="Gas Production in petajoules")
accuracy(aus_gas_fit_comp)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## 2 additive Training 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
## 3 multiplicative Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## 4 aDamped Training 0.600 4.58 3.16 0.460 7.39 0.566 0.604 0.0660
## 5 mDamped Training -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
report(aus_gas_fit_comp)
## Warning in report.mdl_df(aus_gas_fit_comp): Model reporting is only supported
## for individual models, so a glance will be shown. To see the report for a
## specific model, use `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
## 2 additive 23.6 -927. 1872. 1873. 1903. 22.7 29.7 3.35
## 3 multiplicative 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
## 4 aDamped 21.9 -919. 1857. 1858. 1891. 21.0 29.6 3.16
## 5 mDamped 0.00329 -832. 1684. 1685. 1718. 21.1 32.0 0.0417
I decided to fit 5 different models to the Australian gas data. I fitted a auto ETS model, Additive, Multiplicative, Additive Damped and Multiplicative Damped model. Looking at the RMSE we can see that they all give similar RMSE with the Additive Damped giving the lowest. Then, looking at AIC/BICs we can see that the auto and Multiplicative models gave the lowest AICs and BICs they actually gave the exact same results for both hinting that they may be the exact same models. So, if going by AIC/BIC take the Multiplicative model and if you want to go off of RMSE go for the Additive damped model. Multiplicative seasonality is needed in this case because we can see in the graph that as time goes on the seasonal variance also increases.
Recall your retail time series data (from Exercise 7 in Section 2.10).
Why is multiplicative seasonality necessary for this series?
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
Multiplicative seasonality is necessary for this series because the
variance changes over time, it increases as time increases with the
seasonality. At the start we see small seasonal variance and then as
time goes on the season variance increases.
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
holts_models <- myseries %>%
model(
hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
hw_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
holts_forecasts <- holts_models %>%
forecast(h=20)
holts_forecasts %>%
autoplot(myseries, level = NULL) +
labs(title="Austrialian Retail Turnover Forecasts", x="Year Month", y="Retail turnover in $Million AUD")
accuracy(holts_models)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… hw Trai… -0.0128 0.613 0.450 -0.469 5.15 0.513 0.529
## 2 Northern … Clothin… hw_da… Trai… 0.0398 0.616 0.444 -0.0723 5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>
report(holts_models)
## Warning in report.mdl_df(holts_models): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 11
## State Industry .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern… Clothin… hw 0.00447 -870. 1775. 1776. 1841. 0.376 0.473 0.0511
## 2 Northern… Clothin… hw_da… 0.00457 -872. 1781. 1783. 1851. 0.379 0.479 0.0505
The regular Holts Winters method appears to have a little more variation and seems to have higher peaks and lower drops than the dampened model.
Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
accuracy(holts_models) %>%
select(.model, RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 hw 0.613
## 2 hw_damped 0.616
From the output above we can see that the RMSE is slightly lower for the Non damped model so I would prefer this model as opposed to the damped one in this case.
Check that the residuals from the best method look like white noise.
resids_best <- myseries %>%
model(hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))
resids_best %>%
augment() %>%
autoplot(.resid) +
labs(title = "Residuals from the Best Model",
y = "Residuals",
x = "Time")
resids_best %>% augment() %>%
features(.innov, ljung_box, lag = 10)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… hw 11.0 0.359
Looking at the above plot and results from the lijung box test the residuals do appear to be white noise. In the plot we can see that they are centered about zero and there doesnt seem to be any trend or seasonality apparent in the residuals. Then, when looking at the box test we can see that the p value shows that the residuals are white noise.
Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
myseries_train <- myseries %>%
filter(year(Month) < 2011)
fit_train <- myseries_train %>%
model(
best_model = ETS(Turnover ~ error("M") + trend("A") + season("M")),
snaive = SNAIVE(Turnover)
)
forecasts <- fit_train %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
forecasts %>% autoplot(myseries_train, level = NULL)
forecasts %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test best_model 1.78
## 2 Test snaive 1.55
Based on the output above we can see that the seasonal naive method was able to fit to the data better giving the smaller RMSE of 1.55 compared to 1.78 so I would say that I was not able to beat the seasonal naive model in this case it is the better model.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
train_data <- myseries %>%
filter(Month < yearmonth("2011 Jan"))
test_data <- myseries %>%
filter(Month >= yearmonth("2011 Jan"))
lambda <- train_data %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
train_data %>%
model(STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
components() %>%
autoplot() +
ggtitle("STL with Box-Cox Transformation")
train_decomp <- train_data %>%
model(STL_box = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
# Seasonally Adjusted
train_data$Turnover <- train_decomp$season_adjust
fit <- train_data %>%
model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
fit %>% gg_tsresiduals() +
ggtitle("Australian Retail Turnover Residual")
test_forecast <- fit %>%
forecast(new_data = anti_join(myseries, train_data))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
test_forecast %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 1 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test "ETS(Turnover ~ error(\"M\") + trend(\"A\") + season(\"M\"))" 5.21
From the output above we can see that this model performed much worse than the original model giving a RSME of 5.21 compared to 1.78 and 1.55 from the previous question. Doing the ETS on the seasonally adjusted data seems to have decreased the model greatly.