library(tidyverse)
library(fpp3)
library(seasonal)
library(fable)
Forecasts produced using exponential smoothing methods are weighted averages of past observations, with the weights decaying exponentially as the observations get older.
head(aus_livestock %>% filter(Animal == "Pigs" & State == "Victoria"))
## # A tsibble: 6 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1972 Jul Pigs Victoria 94200
## 2 1972 Aug Pigs Victoria 105700
## 3 1972 Sep Pigs Victoria 96500
## 4 1972 Oct Pigs Victoria 117100
## 5 1972 Nov Pigs Victoria 104600
## 6 1972 Dec Pigs Victoria 100500
pigs_victoria <- aus_livestock %>% filter(Animal == "Pigs" & State == "Victoria")
pigs_victoria%>% autoplot(Count) +
labs(title = "Number of Pigs Slaughtered in Victoria")
fit <- pigs_victoria %>% model(ets = ETS(Count))
report(fit)
## Series: Count
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.3579401
## gamma = 0.0001000139
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -579.8107 1215.464 -2827.091 1739.465 6441.989
##
## sigma^2: 60742898
##
## AIC AICc BIC
## 13545.38 13546.26 13610.24
fit %>% forecast(h = 4) %>% autoplot(pigs_victoria) + labs(title = "Forcast of the Number of Pigs Slaughtered: A,N,A")
The 95% prediction interval using the calculation is as follows:
# Manually calculate the 95% prediction interval using the above equation
variance = 60742898
upper_bound = 95487.5 + 1.96 * sqrt(variance)
lower_bound = 95487.5 - 1.96 * sqrt(variance)
print(paste("The 95% prediction interval is", round(lower_bound, 2), " - ", round(upper_bound, 2)))
## [1] "The 95% prediction interval is 80211.7 - 110763.3"
Using R by piping in the fable or forecast object into the hilo() function
# Forecast the next 4 periods
forecast_pigs <- fit %>% forecast(h = 4)
# Pull CI from the forecast using the hilo function
pigs_hilo <- forecast_pigs %>% hilo(level = 95)
pigs_hilo %>% slice(1) %>% select("95%")
## # A tsibble: 1 x 2 [1M]
## `95%` Month
## <hilo> <mth>
## 1 [69149.19, 99700.22]95 2019 Jan
Comparing the intervals for the first forecast (January 2019), the interval calculated using \(y \pm 1.96s\) is slightly higher. There is significant overlap between each interval.
# Create a tibble with the intervals
lb <- c(69149.19, 80211.7)
ub <- c(99700.22, 110763.3)
intervals <- tibble(calculation = c("R", "Manual"), lower = lb,
upper = ub)
intervals
## # A tibble: 2 × 3
## calculation lower upper
## <chr> <dbl> <dbl>
## 1 R 69149. 99700.
## 2 Manual 80212. 110763.
# Create line segment plot to visualize the
intervals %>% ggplot() + geom_segment(aes(x = lower, y = calculation, xend = upper, yend = calculation, color = calculation)) +
geom_point(aes(x = c(lower[2],upper[2]), y = 1, color = calculation[2])) +
geom_point(aes(x = c(lower[1],upper[1]), y = 2, color = calculation[1])) +
labs(title = "95% Interval Prediction")
canada_econ <- global_economy %>% filter(Country == "Canada")
head(canada_econ)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Canada CAN 1960 41093453545. NA 13.3 18.1 17.0 17909009
## 2 Canada CAN 1961 40767969454. 3.16 13.5 18.1 17.8 18271000
## 3 Canada CAN 1962 41978852041. 7.12 13.6 17.8 17.8 18614000
## 4 Canada CAN 1963 44657169109. 5.18 13.8 17.4 18.3 18964000
## 5 Canada CAN 1964 48882938810. 6.70 14.1 18.2 19.2 19325000
## 6 Canada CAN 1965 53909570342. 6.64 14.4 18.7 18.5 19678000
canada_econ %>% autoplot(Exports) + labs(title = "Canadian Exports", x = "Year", y = "Exports (% GDP)")
In this time series of Canadian exports there is general upward trend with some peaks and valleys up until the early 90’s. At this point there is a large spike in exports (roughly a 20% increase) until 2000, when we steep decline over the next 8/9 years, until the trend switches direction.
# Fit an ANN model with the Canadian export data
fit_ann <- canada_econ %>% model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
# Print out model report
report(fit_ann)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 16.96593
##
## sigma^2: 2.7105
##
## AIC AICc BIC
## 297.3032 297.7477 303.4846
#components(fit_ann)
augment(fit_ann)
## # A tsibble: 58 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .fitted .resid .innov
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Canada ANN 1960 17.0 17.0 0.000124 0.000124
## 2 Canada ANN 1961 17.8 17.0 0.787 0.787
## 3 Canada ANN 1962 17.8 17.8 0.0297 0.0297
## 4 Canada ANN 1963 18.3 17.8 0.471 0.471
## 5 Canada ANN 1964 19.2 18.3 0.935 0.935
## 6 Canada ANN 1965 18.5 19.2 -0.651 -0.651
## 7 Canada ANN 1966 19.3 18.5 0.801 0.801
## 8 Canada ANN 1967 20.3 19.3 0.932 0.932
## 9 Canada ANN 1968 21.2 20.3 0.929 0.929
## 10 Canada ANN 1969 21.2 21.2 0.0483 0.0483
## # ℹ 48 more rows
# Forecast the series for the next 5 years
fit_ann_fc <- fit_ann %>% forecast(h = 5)
# Visualize forecast with prediction intervals
fit_ann_fc %>% autoplot(canada_econ) + labs(title = "Forecasted Canadian Exports: ANN Model", x = "Year", y = "Exports (% GDP)")
# Use the accuracy function to compute various diagnostic values including RMSE
accuracy(fit_ann) %>% select(RMSE)
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 1.62
fit_models <- canada_econ %>% model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit_models)
## Warning in report.mdl_df(fit_models): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 10
## Country .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Canada ANN 2.71 -146. 297. 298. 303. 2.62 7.32 1.19
## 2 Canada AAN 2.84 -146. 302. 303. 312. 2.64 8.53 1.19
accuracy(fit_models)
## # A tibble: 2 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Canada ANN Training 0.240 1.62 1.19 0.885 4.15 0.983 0.991 0.310
## 2 Canada AAN Training -0.0135 1.63 1.19 0.0538 4.22 0.981 0.996 0.160
The two models perform relatively similar with the simple exponential smoothing (SES) model without a trend component having a slightly lower AIC score. The two used here are the simple exponential smoothing method and Holt’s linear trend method. Both methods can work for this data set for a several reasons, the first being that neither model adds a seasonal component which matches with the data. When visualizing the time series we can see a general trend but there seems to be some variation within the trend as well as a pretty significant spike that does not match the previous trend. It can be argued that because the large spike interrupts any clear trend, the SES method works since it is a suitable technique for forecasting data with no clear trend. On the other hand, we can use a Holt linear trend method (AAN) to capture any effect the upward trajectory of the percent of exports has on training a model.
fit_models %>%
forecast(h = 5) |>
autoplot(canada_econ, level = NULL) +
labs(title = "Canadian Exports", y = "% GDP") +
guides(colour = guide_legend(title = "Forecast"))
I think using the ANN model is preferable with this data set as the
methods uses the last observation as the forecast. This can be
particularly useful with economic series. The AIC is lower for this
method as well which is an indication that the ANN model finds a better
balance of fit with generalizability.
# Use the hilo() function to get the 95% prediction intervals
hilo_tibble <- fit_models %>%
forecast(h = 5) %>% hilo(level = 95) %>% filter(Year == 2018)
hilo_tibble
## # A tsibble: 2 x 6 [1Y]
## # Key: Country, .model [2]
## Country .model Year
## <fct> <chr> <dbl>
## 1 Canada ANN 2018
## 2 Canada AAN 2018
## # ℹ 3 more variables: Exports <dist>, .mean <dbl>, `95%` <hilo>
One way we can use the RMSE to calculate the 95% intervals is by adding and subtracting 2 * RMSE from our point estimate. RMSE values were found from checking the accuracy of our triaing models. The manually calculated predictive intervals are quite similar to the ones generated by R as we can see in the table below.
# Calculate the 95% predictive intervals using the RMSE for the ANN model
RMSE_ann <- 1.617713
mean_ann <- 30.89403
lower_ann <- mean_ann - (RMSE_ann * 2)
upper_ann <- mean_ann + (RMSE_ann * 2)
# Calculate the 95% predictive intervals using the RMSE for the AAN model
RMSE_aan <- 1.625568
mean_aan <- 30.77976
lower_aan <- mean_aan - (RMSE_aan * 2)
upper_aan <- mean_aan + (RMSE_aan * 2)
# Create a tibble with all predictive intervals
tibble(method = c("R", "Manual", "R", "Manual"),
model = c("ANN", "ANN", "AAN", "AAN"),
lower = c(27.66725, lower_ann, 27.47781, lower_aan),
upper = c(34.12082, 34.08170, upper_ann, upper_aan))
## # A tibble: 4 × 4
## method model lower upper
## <chr> <chr> <dbl> <dbl>
## 1 R ANN 27.7 34.1
## 2 Manual ANN 27.7 34.1
## 3 R AAN 27.5 34.1
## 4 Manual AAN 27.5 34.0
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
china_gdp <- global_economy %>% filter(Country == "China") %>% select(GDP)
head(china_gdp)
## # A tsibble: 6 x 2 [1Y]
## GDP Year
## <dbl> <dbl>
## 1 59716467625. 1960
## 2 50056868958. 1961
## 3 47209359006. 1962
## 4 50706799903. 1963
## 5 59708343489. 1964
## 6 70436266147. 1965
# Plot the time series
china_gdp %>% autoplot(GDP) + labs(title = "China GDP")
# Fit a model with ETS function with out setting any paremeters.
fit <- china_gdp %>% model(ets = ETS(GDP))
report(fit)
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9998998
## beta = 0.3119984
##
## Initial states:
## l[0] b[0]
## 45713434615 3288256682
##
## sigma^2: 0.0108
##
## AIC AICc BIC
## 3102.064 3103.218 3112.366
# Forecast over the next 15 years
fit %>% forecast(h = 15) %>% autoplot(china_gdp) + labs(title = "China GDP: MAN Model")
The first model used a Holt’s linear method with multiplicative
errors.
# Fit a model using multiplicative error models with an additive dampened method
china_gdp %>% model(`Multiplicative error_Dampen method` = ETS(GDP ~ error("M") + trend("Ad") + season("N"))) %>%
forecast(h = 15) %>% autoplot(china_gdp) + labs(title = "China GDP: MAdN with phi = 0.98")
# Fit a model using additive error models with an additive dampened method - phi = default
china_gdp %>% model(`Additive error_Damped method` = ETS(GDP ~ error("A") + trend("Ad") + season("N"))) %>%
forecast(h = 15) %>% autoplot(china_gdp) + labs(title = "China GDP: AAdN with phi = 0.98")
# Fit a model using additive error models with an additive dampened method - phi = 0.8
china_gdp %>% model(`Additive error_Damped method` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N"))) %>%
forecast(h = 15) %>% autoplot(china_gdp) + labs(title = "China GDP: AAdN with phi = 0.8")
# Transforming the data using a lambda value through the guerrero method and then forecasting
lambda <- china_gdp |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
china_gdp %>% autoplot(box_cox(GDP, lambda)) +
labs(y = "",
title = paste0(
"Transformed China GDP with lambda = ", round(lambda, 4)))
# Save the transformations into a new column
china_gdp$GDP_transformed <- box_cox(china_gdp$GDP, lambda)
# Fit a model using additive error model with an additive dampened method and the transformed GDP
china_gdp %>% model(`Additive error_Damped method` = ETS(GDP_transformed ~ error("A") + trend("Ad", phi = 0.9) + season("N"))) %>%
forecast(h = 15) %>%
autoplot(china_gdp) + labs("China GDP Transformed: AAdN")
# Filter aus_production of the gas data only
aus_gas_production <- aus_production %>% select(Gas)
# Visualize the time series
aus_gas_production %>% autoplot(Gas) + labs(title = "Gas production in Austrailia")
aus_gas_fit <- aus_gas_production %>% model(ets = ETS(Gas))
report(aus_gas_fit)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
# Forecast the next 12 periods which would equal the next three years
aus_gas_fc <- aus_gas_fit %>% forecast(h = 12)
aus_gas_fc %>% autoplot(aus_gas_production) + labs(title = "Australian Gas Production Forecast: MAM")
According to the book The multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series. We see from the plot that there is a consistent increase in variation as time goes on.
aus_gas_production %>% model(MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M")))%>%
forecast(h = 12) %>%
autoplot(aus_gas_production) + labs(title = "Australian Gas Production Forecast: MAdM")
aus_gas_production %>% model(MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M")),
MAM = ETS(Gas ~ error("M") + trend("A") + season("M"))) %>% report()
## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAdM 0.00329 -832. 1684. 1685. 1718. 21.1 32.0 0.0417
## 2 MAM 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
aus_gas_production %>% model(MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M")),
MAM = ETS(Gas ~ error("M") + trend("A") + season("M"))) %>% accuracy()
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAdM Training -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
## 2 MAM Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
Visually it does not look like making the trend damp improved the forecasts or the prediction intervals. Looking the accuracy measures, almost all are identical except for MPE, ACF1. The AIC and AICc is actually slightly lower than the damped model.
aus_houshold_goods <- aus_retail %>% filter(State == "Victoria" & Industry == "Household goods retailing")
head(aus_houshold_goods)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Victoria Household goods retailing A3349643V 1982 Apr 173.
## 2 Victoria Household goods retailing A3349643V 1982 May 180.
## 3 Victoria Household goods retailing A3349643V 1982 Jun 167.
## 4 Victoria Household goods retailing A3349643V 1982 Jul 174.
## 5 Victoria Household goods retailing A3349643V 1982 Aug 178.
## 6 Victoria Household goods retailing A3349643V 1982 Sep 180.
aus_houshold_goods %>% autoplot(Turnover) + labs(title = "Household Goods Retailing Turnover in Victoria, AUS")
Multiplicative seasonality is necessary for this data set as there is a
consistent change in the variability of the seasonal component.
# Holt-Winters multiplicative method
fit_hwm <- aus_houshold_goods %>% model(hw_multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))
tidy(fit_hwm)
## # A tibble: 17 × 5
## State Industry .model term estimate
## <chr> <chr> <chr> <chr> <dbl>
## 1 Victoria Household goods retailing hw_multiplicative alpha 0.241
## 2 Victoria Household goods retailing hw_multiplicative beta 0.000836
## 3 Victoria Household goods retailing hw_multiplicative gamma 0.257
## 4 Victoria Household goods retailing hw_multiplicative l[0] 179.
## 5 Victoria Household goods retailing hw_multiplicative b[0] 1.56
## 6 Victoria Household goods retailing hw_multiplicative s[0] 1.01
## 7 Victoria Household goods retailing hw_multiplicative s[-1] 0.980
## 8 Victoria Household goods retailing hw_multiplicative s[-2] 0.982
## 9 Victoria Household goods retailing hw_multiplicative s[-3] 1.43
## 10 Victoria Household goods retailing hw_multiplicative s[-4] 1.05
## 11 Victoria Household goods retailing hw_multiplicative s[-5] 0.969
## 12 Victoria Household goods retailing hw_multiplicative s[-6] 0.944
## 13 Victoria Household goods retailing hw_multiplicative s[-7] 0.936
## 14 Victoria Household goods retailing hw_multiplicative s[-8] 0.903
## 15 Victoria Household goods retailing hw_multiplicative s[-9] 0.878
## 16 Victoria Household goods retailing hw_multiplicative s[-10] 1.01
## 17 Victoria Household goods retailing hw_multiplicative s[-11] 0.919
# Dampen trend
fit_hwm_damped <- aus_houshold_goods %>% model(hw_multiplicative_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
tidy(fit_hwm_damped)
## # A tibble: 18 × 5
## State Industry .model term estimate
## <chr> <chr> <chr> <chr> <dbl>
## 1 Victoria Household goods retailing hw_multiplicative_damped alpha 0.506
## 2 Victoria Household goods retailing hw_multiplicative_damped beta 0.00752
## 3 Victoria Household goods retailing hw_multiplicative_damped gamma 0.192
## 4 Victoria Household goods retailing hw_multiplicative_damped phi 0.980
## 5 Victoria Household goods retailing hw_multiplicative_damped l[0] 180.
## 6 Victoria Household goods retailing hw_multiplicative_damped b[0] 1.56
## 7 Victoria Household goods retailing hw_multiplicative_damped s[0] 0.977
## 8 Victoria Household goods retailing hw_multiplicative_damped s[-1] 0.894
## 9 Victoria Household goods retailing hw_multiplicative_damped s[-2] 0.918
## 10 Victoria Household goods retailing hw_multiplicative_damped s[-3] 1.50
## 11 Victoria Household goods retailing hw_multiplicative_damped s[-4] 1.05
## 12 Victoria Household goods retailing hw_multiplicative_damped s[-5] 0.975
## 13 Victoria Household goods retailing hw_multiplicative_damped s[-6] 0.922
## 14 Victoria Household goods retailing hw_multiplicative_damped s[-7] 0.940
## 15 Victoria Household goods retailing hw_multiplicative_damped s[-8] 0.935
## 16 Victoria Household goods retailing hw_multiplicative_damped s[-9] 0.921
## 17 Victoria Household goods retailing hw_multiplicative_damped s[-10] 1.04
## 18 Victoria Household goods retailing hw_multiplicative_damped s[-11] 0.936
rbind(accuracy(fit_hwm), accuracy(fit_hwm_damped)) %>% select(State, Industry, `.model`, `.type`, RMSE)
## # A tibble: 2 × 5
## State Industry .model .type RMSE
## <chr> <chr> <chr> <chr> <dbl>
## 1 Victoria Household goods retailing hw_multiplicative Training 24.3
## 2 Victoria Household goods retailing hw_multiplicative_damped Training 23.7
fit_hwm_damped %>% gg_tsresiduals() + labs(title = "Residual Plot")
Evaluating the residuals, the histogram appears normal with a mean close to 0. There appears to be more variability around 0 in the beginning of the time series than later. Many of the residuals pass the boundaries in the autocorrelation plot, but have low correlation. These findings indicate that there might be more information in the data that could be beneficial in the forecast.
# Using 2m - m = seasonal periods
augment(fit_hwm_damped) %>% features(.innov, ljung_box, lag=24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Household goods retailing hw_multiplicative_damped 101. 1.86e-11
The ljung-box test can also provide useful information about the correlations between the lags. Using the lb value and the p-value from running this test we see that the auto correlations probably don’t come come from white noise suggesting there is information left in the residuals.
train <- aus_houshold_goods %>% filter(year(Month) <= 2010)
# Check to see that the data was split correctly
autoplot(aus_houshold_goods, Turnover, colour = "red") +
autolayer(train, Turnover)
# Fit the training set with a Holt's - Winter multiplicative model with a damped trend
fit <- train %>% model(hw_multiplicative_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
report(fit)
## Series: Turnover
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.4624894
## beta = 0.008759179
## gamma = 0.2129117
## phi = 0.9799998
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 179.0674 2.041315 0.9944021 0.9008521 0.9146229 1.493028 1.059505 0.9779083
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9127158 0.9469247 0.9302421 0.9089104 1.027267 0.9336205
##
## sigma^2: 0.0021
##
## AIC AICc BIC
## 4113.500 4115.598 4182.684
fit_fc <- fit |> forecast(h = "8 years")
fit_fc %>%
autoplot(train, level = NULL, color = "blue") +
autolayer(
filter_index(aus_houshold_goods, "2011 Jan" ~ .),
colour = "red"
)
## Plot variable not specified, automatically selected `.vars = Turnover`
fit_fc %>% accuracy(aus_houshold_goods)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw_multi… Vict… Househo… Test 75.1 134. 106. 5.82 9.30 3.00 2.94 0.935
In Exercise 5.7 the RMSE was 213.41 when we fit a NIAVE model and forecasted the next 8 years. Using a dampened Holt’s - Winter multiplicative model our RMSE was 133.88.
# Transform the data
lambda <- aus_houshold_goods %>%
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
aus_houshold_goods <- aus_houshold_goods %>% mutate(Turnover_transformed = box_cox(Turnover, lambda))
aus_houshold_goods %>%
autoplot(Turnover_transformed) +
labs(y = "",
title = paste0(
"Transformed Passengers with lambda = ", round(lambda, 4)))
# Decompose time series - STL
aus_houshold_goods_slt <- aus_houshold_goods |>
model(
STL(Turnover_transformed ~ trend() +
season(),
robust = TRUE)) |>
components()
# Visualize the decomposition
aus_houshold_goods_slt %>% autoplot()
Use ETS on the seasonally adjusted data
# Split the data
train <- aus_houshold_goods_slt %>% select(season_adjust) %>% filter(year(Month) <= 2010)
# Fit a model
fit <- train %>% model(ETS = ETS(season_adjust))
report(fit)
## Series: season_adjust
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.4827131
## beta = 0.000100001
##
## Initial states:
## l[0] b[0]
## 9.612983 0.01764095
##
## sigma^2: 0.0198
##
## AIC AICc BIC
## 668.4457 668.6227 687.6635
# Forecast and evaluate
fit_fc <- fit |> forecast(h = "8 years")
train %>%
autoplot(season_adjust, level = NULL, color = "black") +
autolayer(fit_fc)
## Warning in geom_line(...): Ignoring unknown parameters: `level`
# Get model evaluation
fit_fc %>% accuracy(aus_houshold_goods_slt)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 96 observations are missing between 2011 Jan and 2018 Dec
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test -0.269 0.306 0.269 -1.68 1.68 NaN NaN 0.756
How does that compare with your best previous forecasts on the test set?
After transforming the series using a box-cox transformation, decomposing the series and then training a model using the ETS function on the season adjusted data improved the RMSE score by a significant amount. The RMSE score for the model (AAN) is 0.305. This model also had a much lower AIC of 668.4457 compared to 4113.5 in the previous best model (MAdM) on the originial data.