library(fpp3)
library(tidyverse)
library(forecast)
library(kableExtra)
library(reactable)
options(scipen = 999)
set.seed(123456)
data("aus_livestock")
victoria_pigs <- aus_livestock |>
filter(State == "Victoria", Animal == "Pigs")
victoria_pigs |>
autoplot(Count) +
labs(title = "Count of Pigs Slaughtered in Victoria") +
theme(plot.title = element_text(hjust = 0.5))
ses_model <- victoria_pigs |>
model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc_ses <- ses_model |>
forecast(h = 4)
report(ses_model)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
ses_model |>
forecast(h=4)
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\")… 2019 Jan N(95187, 87480760) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\")… 2019 Feb N(95187, 96558142) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\")… 2019 Mar N(95187, 105635524) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\")… 2019 Apr N(95187, 114712906) 95187.
report(ses_model)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
\(\alpha\): 0.3221247 \(\ell_0\): 100646.6
fc_ses |>
autoplot(victoria_pigs) +
geom_line(aes(y = .fitted), col="blue",
data = augment(ses_model)) +
labs(y="Count", title="Four-Month Forecast of Pigs Slaughtered in Victoria Using SES") +
guides(color="none")
95% Prediction Interval produced by R:
pred_int_r = unpack_hilo(hilo(fc_ses, 95) , "95%" )
head(pred_int_r[7:8], 1)
## # A tibble: 1 × 2
## `95%_lower` `95%_upper`
## <dbl> <dbl>
## 1 76855. 113518.
Computing 95% Prediction Interval
reactable(accuracy(ses_model))
The standard deviation of the residuals is represented by the Root Mean Squared Error (RMSE). When using the accuracy() function, the RMSE value is 9336.338.
acc <- accuracy(ses_model)
mean <- fc_ses$.mean
lower_pi <- mean - (1.96 * acc$RMSE)
upper_pi <- mean + (1.96 * acc$RMSE)
lower_pi[1]
## [1] 76887.33
upper_pi[1]
## [1] 113485.8
global_economy contains the annual Exports
from many countries. Select one country to analyse.denmark_exports <- global_economy |>
filter(Country == "Denmark")
denmark_exports |>
autoplot(Exports) +
labs(x="Year", y="Exports", title="Yearly Exports in Denmark") +
theme(plot.title = element_text(hjust=0.5))
denmark_fit <- denmark_exports |>
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(denmark_fit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998996
##
## Initial states:
## l[0]
## 32.31581
##
## sigma^2: 4.1743
##
## AIC AICc BIC
## 322.3492 322.7936 328.5305
reactable(tidy(denmark_fit))
fc_denmark <- denmark_fit |>
forecast(h = 10)
fc_denmark |>
autoplot(denmark_exports) +
geom_line(aes(y = .fitted), col="blue",
data = augment(denmark_fit)) +
labs(y="Exports", title="Four-Year Forecast of Exports in Denmark") +
guides(color="none") +
theme(plot.title = element_text(hjust=0.5))
reactable(accuracy(denmark_fit))
The RMSE value of the training set is 2.00757267292722.
denmark_fit2 <- denmark_exports |>
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(denmark_fit2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998998
## beta = 0.0001000284
##
## Initial states:
## l[0] b[0]
## 32.46829 0.4192062
##
## sigma^2: 4.1654
##
## AIC AICc BIC
## 324.1161 325.2699 334.4183
tidy(denmark_fit2)
## # A tibble: 4 × 4
## Country .model term estimate
## <fct> <chr> <chr> <dbl>
## 1 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… alpha 1.00e+0
## 2 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… beta 1.00e-4
## 3 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… l[0] 3.25e+1
## 4 Denmark "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"… b[0] 4.19e-1
fc_denmark2 <- denmark_fit2 |>
forecast(h = 10)
fc_denmark2 |>
autoplot(denmark_exports) +
geom_line(aes(y = .fitted), col="blue",
data = augment(denmark_fit2)) +
labs(y="Exports", title="Four-Year Forecast of Exports in Denmark Using Holt's Linear Method") +
guides(color="none") +
theme(plot.title = element_text(hjust=0.5))
denmark_compare <- denmark_exports |>
model(
`SES` = ETS(Exports ~ error("A") + trend("N") + season("N")),
`Holt's Linear Method` = ETS(Exports ~ error("A") + trend("A") + season("N")))
reactable(accuracy(denmark_compare))
When comparing the MAE and RMSE values for each method, Holt’s Linear Method has a lower RMSE than the SES method, but has a slightly higher MAE than the SES method. Based on the lower RMSE, I would pick the Holt’s Linear Method as the best forecast method for this dataset.
95% Prediction Interval produced by R for SES Method:
pred_int_r_denmark = unpack_hilo(hilo(fc_denmark, 95) , "95%" )
head(pred_int_r_denmark[6:7], 1)
## # A tibble: 1 × 2
## `95%_lower` `95%_upper`
## <dbl> <dbl>
## 1 51.2 59.2
95% Prediction Interval using RMSE for SES Method:
ses_denmark_acc <- accuracy(denmark_fit)
fc_mean_ses <- fc_denmark$.mean
lower_pi_den <- fc_mean_ses - (1.96 * ses_denmark_acc$RMSE)
upper_pi_den <- fc_mean_ses + (1.96 * ses_denmark_acc$RMSE)
lower_pi_den[1]
## [1] 51.27509
upper_pi_den[1]
## [1] 59.14477
95% Prediction Interval produced by R for Holt’s Linear Method:
pred_int_r_denmark2 = unpack_hilo(hilo(fc_denmark2, 95) , "95%" )
head(pred_int_r_denmark2[6:7], 1)
## # A tibble: 1 × 2
## `95%_lower` `95%_upper`
## <dbl> <dbl>
## 1 51.6 59.6
95% Prediction Interval using RMSE for SES Method:
ses_denmark_acc2 <- accuracy(denmark_fit2)
fc_mean_ses2 <- fc_denmark2$.mean
lower_pi_den2 <- fc_mean_ses2 - (1.96 * ses_denmark_acc2$RMSE)
upper_pi_den2 <- fc_mean_ses2 + (1.96 * ses_denmark_acc2$RMSE)
lower_pi_den2[1]
## [1] 51.76921
upper_pi_den2[1]
## [1] 59.48885
For each model, the 95% prediction interval produced by R is almost identical to the 95% prediction interval calculated using the RMSE value.
global_economy
data set using an ETS model. Experiment with the various options in the
ETS() function to see how much the forecasts change with
damped trend, or with a Box-Cox transformation. Try to develop an
intuition of what each is doing to the forecasts.[Hint: use a relatively large value of h when
forecasting, so you can clearly see the differences between the various
options when plotting the forecasts.]
china_gdp <- global_economy |>
filter(Country == "China")
china_gdp |>
autoplot(GDP) +
labs(x="Year", y="GDP", title="GDP of China") +
theme(plot.title = element_text(hjust = 0.5))
Using Holt’s Linear Method
china_fit <- china_gdp |>
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
report(china_fit)
## Series: GDP
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998964
## beta = 0.5518569
##
## Initial states:
## l[0] b[0]
## 50284778074 3288256684
##
## sigma^2: 38770101526925682933760
##
## AIC AICc BIC
## 3258.053 3259.207 3268.356
reactable(tidy(china_fit))
fc_china_ses <- china_fit |>
forecast(h=20)
fc_china_ses |>
autoplot(china_gdp) +
geom_line(aes(y = .fitted), col="blue",
data = augment(china_fit)) +
labs(y="Exports", title="20-Year Forecast of China's GDP Using Holt's Linear Method") +
guides(color="none") +
theme(plot.title = element_text(hjust=0.5))
Using Box-Cox Transformation:
# Finding Lambda
lambda <- china_gdp |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
Comparing Holt’s Linear Method, Box-Cox, and Damped Holt’s Linear Method:
china_fit_models <- china_gdp |>
model(
"Holt's Linear Method" = ETS(GDP ~ error('A') + trend('A') + season('N')),
"Damped Holt's Linear Method" = ETS(GDP ~ error('A') + trend('Ad') + season('N')),
"Box-Cox Transformation Method" = ETS(box_cox(GDP, lambda) ~ error('A') + trend('N') + season('N'))
)
reactable(tidy(china_fit_models))
china_fit_models |>
forecast(h = 20) |>
autoplot(china_gdp, level = NULL) +
labs(x="Year", y="GDP", title = "Comparison of Methods to Forecast 20 Years of China GDP") +
guides(colour = guide_legend(title = "Forecasts"))
The forecasts produced by the Box-Cox Transformation and Holt’s Linear Method show more of an upward constant trend, while the Damped Holt’s Method has less of an upward trend.
aus_production and forecast the next few years. Why is
multiplicative seasonality necessary here? Experiment with making the
trend damped. Does it improve the forecasts?data("aus_production")
aus_production %>%
autoplot(Gas) +
labs(title = "Gas Production in Australia") +
theme(plot.title = element_text(hjust = 0.5))
gas_fit <- aus_production |>
model(
"additive" = ETS(Gas ~ error("A") + trend("A") + season("A")),
"multiplicative" = ETS(Gas ~ error("M") + trend("A") + season("M")),
"Damped_Method" = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
fc_gas <- gas_fit |>
forecast(h = "5 years")
fc_gas |>
autoplot(aus_production, level = NULL) +
labs(title="10-Year Forecast of Quarterly Gas Production in Australia",
y="Gas Production") +
guides(colour = guide_legend(title = "Forecast"))
reactable(accuracy(gas_fit))
Multiplicative seasonality is necessary because the seasonal variations are changing proportional to the level of the series. Additive seasonality would be necessary if the trend was constant through the series. However, the seasonal variations shown here does not have a constant trend, increasing in size over time.
When analyzing each of the forecasts, the Damped Holt’s Method has the lowest RMSE value, slightly outperforming the RMSE of the multiplicative method. However, the MAE value of the multiplicative method is slightly lower than the Damped Holt’s method, which indicates that the forecast model did not fully improve when applying the Damped Holt’s method.
data("aus_retail")
set.seed(123456)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
autoplot(Turnover) +
labs(title = "Australia Retail Trade Turnover") +
theme(plot.title = element_text(hjust = 0.5))
fit_retail <- myseries |>
model(
"Additive" = ETS(Turnover ~ error('A') + trend('A') + season('A')),
"Multiplicative" = ETS(Turnover ~ error('M') + trend('A') + season('M'))
)
fc_retail<- fit_retail |>
forecast(h = "5 years")
fc_retail |>
autoplot(myseries, level = NULL) +
labs(title = "5-Year Forecast of Retail Trade Turnover in Australia") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(colour = guide_legend(title = "Forecasts"))
Multiplicative seasonality is necessary because the seasonal variations are not constant over time. If they were, additive seasonality would be the more appropriate method. There are changes to width and height of seasonal periods over time, and therefore multiplicative seasonality would be more appropriate and necessary for this series.
fit_retail2 <- myseries |>
model(
"Holt-Winters' Multiplicative Method" = ETS(Turnover ~ error('M') + trend('A') + season('M')),
"Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
)
fc_retail2 <- fit_retail2 |>
forecast(h = "10 years")
fc_retail2 |>
autoplot(myseries, level = NULL) +
labs(title = "Australia Retail Trade Turnover ") +
guides(colour = guide_legend(title = "Forecasts"))
reactable(accuracy(fit_retail2))
accuracy(fit_retail2) |>
select(.model, RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Holt-Winters' Multiplicative Method 13.8
## 2 Damped Holt's Multiplicative Method 13.6
The RMSE for each method is relatively high, but the Damped Holt’s Multiplicative Method would be the preferred method with the lower RMSE value.
best_fit_method <- myseries |>
model("Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))
best_fc_method <- best_fit_method |>
forecast(h = 20)
best_fit_method |>
gg_tsresiduals()
The histogram plot appears to show a normal distribution of residuals. The innovation residuals plot shows some spikes around January 2000 and around January 1987. The ACF plot shows multiple spikes outside of the blue dash line. Taken altogether, the plots displayed may indicate that the residuals does not fully look like white noise.
myseries_train_retail <- myseries |>
filter(year(Month) < 2011)
fit_train_retail <- myseries_train_retail |>
model("SNAIVE" = SNAIVE(Turnover),
"Damped Holt's Multiplicative Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))
fc_train <- fit_train_retail |>
forecast(h = "10 years")
fc_train |>
autoplot(myseries_train_retail, level = NULL) +
labs(title = "Australian Retail Trade Turnover Forecast") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(colour = guide_legend(title = "Forecasts"))
reactable(accuracy(fit_train_retail))
The Damped Holt’s Multiplicative Method performed better than the Seasonal Naive model, having lower RMSE and MAE values.
lambda <- myseries_train_retail |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
myseries_train_retail |>
model("STL Box-Cox Model" = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) |>
components() |>
autoplot() +
ggtitle("STL Decomposition using Box-Cox Transformation of Australian Retail Trade Turnover") +
theme(plot.title = element_text(hjust=0.5))
bc_turnover <- myseries_train_retail |>
mutate(Turnover = box_cox(Turnover, lambda))
fit_retail3 <- bc_turnover |>
model("STL Box-Cox Model" = STL(Turnover ~ season(window = "periodic"), robust=TRUE),
"ETS Box-Cox Model" = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fit_ets <- bc_turnover |>
model("Damped Holt's Method" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')))
fc_fit_bc <- fit_ets |>
forecast(h = "5 years")
fc_fit_bc |>
autoplot(bc_turnover, level = NULL) +
labs(title = "ETS Box-Cox Model of Australian Retail Trade Turnover") +
theme(plot.title = element_text(hjust=0.5)) +
guides(colour = guide_legend(title = "Forecasts"))
reactable(rbind(accuracy(fit_retail2),accuracy(fit_retail3)))
When comparing the RMSE and MAE values of the transformed Box-Cox STL and ETS models with the Damped Holt’s Multiplicative Method, it’s clear that the Box-Cox Transformation has greatly improved the model, with much lower RMSE and MAE values for the STL and ETS models. The STL Box-Cox model performed slightly better than ther ETS Box-Cox model, having lower RMSE and MAE values.