library(fpp3)
library(dplyr)
library(ggplot2)
pigs_victoria <- aus_livestock %>%
filter(Animal == "Pigs", State == "Victoria")
autoplot(pigs_victoria, Count) +
labs(title = "Pigs Slaughtered in Victoria")
fit <- pigs_victoria %>% model(ets = ETS(Count))
report(fit)
## Series: Count
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.3579401
## gamma = 0.0001000139
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -579.8107 1215.464 -2827.091 1739.465 6441.989
##
## sigma^2: 60742898
##
## AIC AICc BIC
## 13545.38 13546.26 13610.24
The ETS(A,N,N) model is the same as simple exponential smoothing.
#b
fit %>% forecast(h = 4) %>% autoplot(pigs_victoria) + labs(title = "Forcast of the Number of Pigs Slaughtered: A,N,A")
The output above gives the estimated values of alpha and l[0], and the forecasts for the next four months.
variance = 60742898
upper_bound = 95487.5 + 1.96 * sqrt(variance)
lower_bound = 95487.5 - 1.96 * sqrt(variance)
print(paste("The 95% prediction interval is", round(lower_bound, 2), " - ", round(upper_bound, 2)))
## [1] "The 95% prediction interval is 80211.7 - 110763.3"
“The 95% prediction interval is 80211.7 - 110763.3”
#Using R by piping in the fable or forecast object into the hilo() function
# Forecast the next 4 periods
forecast_pigs <- fit %>% forecast(h = 4)
# Pull CI from the forecast using the hilo function
pigs_hilo <- forecast_pigs %>% hilo(level = 95)
pigs_hilo %>% slice(1) %>% select("95%")
Comparing the intervals for the first forecast (January 2019), the interval calculated using y±1.96s is slightly higher. There is significant overlap between each interval.
# Create a tibble with the intervals
lb <- c(69149.19, 80211.7)
ub <- c(99700.22, 110763.3)
intervals <- tibble(calculation = c("R", "Manual"), lower = lb,
upper = ub)
intervals
# Create line segment plot to visualize the
intervals %>% ggplot() + geom_segment(aes(x = lower, y = calculation, xend = upper, yend = calculation, color = calculation)) +
geom_point(aes(x = c(lower[2],upper[2]), y = 1, color = calculation[2])) +
geom_point(aes(x = c(lower[1],upper[1]), y = 2, color = calculation[1])) +
labs(title = "95% Interval Prediction")
canada_econ <- global_economy %>%
filter(Country == "Canada")
autoplot(canada_econ, Exports) +
labs(title = "Canadian Exports", y = "Exports (% GDP)")
The series has no seasonality because it is annual data. It moves up and down over time, so both a simple model and a trend model can be compared.
fit_ann <- canada_econ %>%
model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit_ann)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 16.96593
##
## sigma^2: 2.7105
##
## AIC AICc BIC
## 297.3032 297.7477 303.4846
fc_ann <- fit_ann %>% forecast(h = 5)
autoplot(fc_ann, canada_econ) +
labs(title = "Canadian Exports Forecast: ANN")
accuracy(fit_ann) %>% select(RMSE)
fit_models <- canada_econ %>%
model(
ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
)
accuracy(fit_models) %>% select(.model, RMSE)
fit_models %>%
forecast(h = 5) %>%
autoplot(canada_econ, level = NULL) +
labs(title = "Canadian Exports: ANN vs AAN", y = "% GDP")
The ANN and AAN models are similar, but ANN may be preferred if it has slightly lower RMSE and is simpler.
fc_exports <- fit_models %>% forecast(h = 5)
rmse_table <- accuracy(fit_models) %>% select(.model, RMSE)
fc_first <- fc_exports %>% as_tibble() %>% group_by(.model) %>% slice(1)
left_join(fc_first, rmse_table, by = ".model") %>%
mutate(
lower = .mean - 1.96 * RMSE,
upper = .mean + 1.96 * RMSE
) %>%
select(.model, .mean, lower, upper)
fc_exports %>% hilo(level = 95)
The manual intervals are close to the intervals from R.
china_gdp <- global_economy %>%
filter(Country == "China")
autoplot(china_gdp, GDP) +
labs(title = "China GDP")
fit_china <- china_gdp %>%
model(
ETS_auto = ETS(GDP),
ETS_damped = ETS(GDP ~ trend("Ad")),
ETS_boxcox = ETS(box_cox(GDP, 0.3))
)
fc_china <- fit_china %>% forecast(h = 15)
autoplot(fc_china, china_gdp) +
labs(title = "China GDP Forecasts")
The standard ETS model keeps strong growth, the damped model slows the growth, and the Box-Cox version smooths the series.
gas_data <- aus_production %>%
select(Quarter, Gas)
autoplot(gas_data, Gas) +
labs(title = "Australian Gas Production")
Multiplicative seasonality is needed because the seasonal pattern gets bigger when the level gets bigger.
fit_gas <- gas_data %>%
model(
MAM = ETS(Gas ~ error("M") + trend("A") + season("M")),
MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
accuracy(fit_gas) %>% select(.model, RMSE)
fit_gas %>%
forecast(h = 12) %>%
autoplot(gas_data) +
labs(title = "Gas Forecasts")
If the damped model does not lower RMSE, then it does not improve the forecasts.
aus_household_goods <- aus_retail %>%
filter(State == "Victoria", Industry == "Household goods retailing")
autoplot(aus_household_goods, Turnover) +
labs(title = "Victoria Household Goods Retailing")
Multiplicative seasonality is needed because the seasonal changes become larger as turnover increases.
fit_hw <- aus_household_goods %>%
model(
HW = ETS(Turnover ~ error("M") + trend("A") + season("M")),
HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
accuracy(fit_hw) %>% select(.model, RMSE)
The model with the lower RMSE is preferred.
fit_hw %>%
select(HW_damped) %>%
gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
If the residuals show no strong pattern, then they are close to white noise.
train <- aus_household_goods %>%
filter(year(Month) <= 2010)
test <- aus_household_goods %>%
filter(year(Month) > 2010)
fit_train <- train %>%
model(HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fc_train <- fit_train %>% forecast(h = "8 years")
accuracy(fc_train, aus_household_goods)
fit_snaive <- train %>% model(SNAIVE(Turnover))
fc_snaive <- fit_snaive %>% forecast(h = "8 years")
accuracy(fc_snaive, aus_household_goods)
If the Holt-Winters RMSE is smaller than the seasonal naive RMSE, then Holt-Winters is better.
##(e)
# Train set up to end of 2010
train <- aus_household_goods %>%
filter(year(Month) <= 2010)
# Fit damped Holt-Winters multiplicative model
fit_train <- train %>%
model(
HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# Forecast next 8 years
fc_train <- fit_train %>%
forecast(h = "8 years")
# Test set RMSE
accuracy(fc_train, aus_household_goods)
# Seasonal naive comparison
fit_snaive <- train %>%
model(SNAIVE(Turnover))
fc_snaive <- fit_snaive %>%
forecast(h = "8 years")
accuracy(fc_snaive, aus_household_goods)
The Holt-Winters multiplicative damped model has a lower test RMSE than the seasonal naïve model, so it gives better forecasts.
fit_stl <- train %>%
model(
STL_ETS = decomposition_model(
STL(box_cox(Turnover, 0.3)),
ETS(season_adjust)
)
)
fc_stl <- fit_stl %>% forecast(h = nrow(test))
accuracy(fc_stl, test)
If STL + ETS gives a smaller test RMSE than the best Holt-Winters model, then STL + ETS is better.