Hyndman chapter 8
library(fpp3)
data(aus_livestock)
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.
Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
# pigs slaughtered in Victoria
pigs_vic <- aus_livestock %>%
filter(State == "Victoria", Animal == "Pigs")
# ETS model for simple exponential smoothing (ETS(A,N,N))
ets_model <- pigs_vic %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
# alpha (smoothing parameter) from the model
alpha_value <- tidy(ets_model) %>%
filter(term == "alpha") %>%
pull(estimate)
# initial level (ℓ0) from model states
initial_level <- tidy(ets_model) %>%
filter(term == "l[0]") %>%
pull(estimate)
# forecasts for next 4 months
forecasts <- forecast(ets_model, h = 4)
# residuals
residuals_data <- augment(ets_model)
residuals_data <- residuals_data %>%
filter(!is.na(.resid))
# standard deviation of residuals
residuals_sd <- sd(residuals_data$.resid)
# first forecasted value
first_forecast <- forecasts %>% slice(1) %>% pull(.mean)
# manual 95% prediction interval
lower_manual <- first_forecast - 1.96 * residuals_sd
upper_manual <- first_forecast + 1.96 * residuals_sd
# results
list(
Alpha = alpha_value,
Initial_Level = initial_level,
First_Forecast = first_forecast,
Residual_SD = residuals_sd,
Manual_95PI = c(lower_manual, upper_manual),
Forecast_Intervals = forecasts)
## $Alpha
## [1] 0.3221247
##
## $Initial_Level
## [1] 100646.6
##
## $First_Forecast
## [1] 95186.56
##
## $Residual_SD
## [1] 9344.666
##
## $Manual_95PI
## [1] 76871.01 113502.10
##
## $Forecast_Intervals
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>
data(global_economy)
# a country for analysis (Australia)
country_name <- "Australia"
exports_data <- global_economy %>%
filter(Country == country_name) %>%
select(Year, Exports)
exports_data %>%
autoplot(Exports) +
labs(title = paste("Exports of", country_name),
y = "Exports (% of GDP)",
x = "Year")
# an ETS(A,N,N) model (simple exponential smoothing)
ets_ann_model <- exports_data %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
# Generate forecasts using ETS(A,N,N)
ets_ann_forecasts <- forecast(ets_ann_model, h = 10)
# Plot forecasts for ETS(A,N,N)
autoplot(exports_data, Exports) +
autolayer(ets_ann_forecasts, series="ETS(A,N,N) Forecasts") +
labs(title = paste("Exports Forecasts for", country_name),
y = "Exports (% of GDP)",
x = "Year")
# Compute RMSE for ETS(A,N,N) model
ets_ann_rmse <- accuracy(ets_ann_model)[1, "RMSE"]
ets_ann_rmse
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 1.15
# an ETS(A,A,N) model (Holt’s Linear Trend Method)
ets_aan_model <- exports_data %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
# RMSE for ETS(A,A,N) model
ets_aan_rmse <- accuracy(ets_aan_model)[1, "RMSE"]
# RMSE values
list(
RMSE_ETS_A_N_N = ets_ann_rmse,
RMSE_ETS_A_A_N = ets_aan_rmse)
## $RMSE_ETS_A_N_N
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 1.15
##
## $RMSE_ETS_A_A_N
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 1.12
# forecasts using ETS(A,A,N)
ets_aan_forecasts <- forecast(ets_aan_model, h = 10)
# forecasts from both models
autoplot(exports_data, Exports) +
autolayer(ets_ann_forecasts, series="ETS(A,N,N) Forecasts", color="blue") +
autolayer(ets_aan_forecasts, series="ETS(A,A,N) Forecasts", color="red") +
labs(title = paste("Comparison of ETS(A,N,N) vs ETS(A,A,N) Forecasts for", country_name),
y = "Exports (% of GDP)",
x = "Year")
Looking at the plot:
# first forecasted values
first_forecast_ann <- ets_ann_forecasts %>% slice(1) %>% pull(.mean)
first_forecast_aan <- ets_aan_forecasts %>% slice(1) %>% pull(.mean)
# manual 95% prediction intervals
lower_manual_ann <- first_forecast_ann - 1.96 * ets_ann_rmse
upper_manual_ann <- first_forecast_ann + 1.96 * ets_ann_rmse
lower_manual_aan <- first_forecast_aan - 1.96 * ets_aan_rmse
upper_manual_aan <- first_forecast_aan + 1.96 * ets_aan_rmse
# results
list(
Manual_95PI_ETS_A_N_N = c(lower_manual_ann, upper_manual_ann),
Manual_95PI_ETS_A_A_N = c(lower_manual_aan, upper_manual_aan))
## $Manual_95PI_ETS_A_N_N
## $Manual_95PI_ETS_A_N_N$RMSE
## [1] 18.35944
##
## $Manual_95PI_ETS_A_N_N$RMSE
## [1] 22.85488
##
##
## $Manual_95PI_ETS_A_A_N
## $Manual_95PI_ETS_A_A_N$RMSE
## [1] 18.64986
##
## $Manual_95PI_ETS_A_A_N$RMSE
## [1] 23.02743
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
data(global_economy)
# China's GDP
china_gdp <- global_economy %>%
filter(Country == "China") %>%
select(Year, GDP)
# GDP series
china_gdp %>%
autoplot(GDP) +
labs(title = "China's GDP Over Time",
y = "GDP (US Dollars)",
x = "Year")
# standard ETS model
ets_auto <- china_gdp %>%
model(ETS(GDP))
# forecasts for a long horizon (30 years)
ets_auto_forecast <- forecast(ets_auto, h = 30)
# forecasts
autoplot(china_gdp, GDP) +
autolayer(ets_auto_forecast, series="Automatic ETS Forecast") +
labs(title = "China's GDP Forecast Using Automatic ETS",
y = "GDP (US Dollars)",
x = "Year")
This model automatically selects the best ETS model based on the data.
# ETS model with additive damped trend
ets_damped <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))
# Forecast with a long horizon
ets_damped_forecast <- forecast(ets_damped, h = 30)
# forecasts
autoplot(china_gdp, GDP) +
autolayer(ets_damped_forecast, series="ETS with Damped Trend") +
labs(title = "China's GDP Forecast with Damped Trend",
y = "GDP (US Dollars)",
x = "Year")
This model assumes GDP will continue growing but at a slower rate. It incorporates a trend but damps it, meaning it will not grow indefinitely.
# an ETS model with Box-Cox transformation (lambda = 0.3)
ets_boxcox <- china_gdp %>%
model(ETS(box_cox(GDP, 0.3) ~ error("A") + trend("A") + season("N")))
# Forecast with a long horizon
ets_boxcox_forecast <- forecast(ets_boxcox, h = 30) %>%
mutate(.mean = inv_box_cox(.mean, 0.3))
# forecasts
autoplot(china_gdp, GDP) +
autolayer(ets_boxcox_forecast, series="ETS with Box-Cox Transformation") +
labs(title = "China's GDP Forecast with Box-Cox Transformation",
y = "GDP (US Dollars)",
x = "Year")
The Box-Cox transformation is applied to stabilize variance. This model assumes continued rapid growth, leading to an exponential forecast.
autoplot(china_gdp, GDP) +
autolayer(ets_auto_forecast, series="Auto ETS", color="blue", alpha=0.5, linetype="solid") +
autolayer(ets_damped_forecast, series="Damped ETS", color="red", alpha=0.5, linetype="dashed") +
autolayer(ets_boxcox_forecast, series="Box-Cox ETS", color="green", alpha=0.5, linetype="dotdash") +
labs(title = "Comparison of Different ETS Models for China's GDP",
y = "GDP (US Dollars)",
x = "Year") +
theme_minimal() +
guides(color = guide_legend(title="ETS Models"))
Looking at the different plots, especially the last one:
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
data(aus_production)
# Gas production
gas_data <- aus_production %>%
select(Quarter, Gas)
# Gas series
gas_data %>%
autoplot(Gas) +
labs(title = "Quarterly Gas Production in Australia",
y = "Gas Production (Petajoules)",
x = "Year")
Gas production has an upward trend. Seasonality is clearly increasing over time, meaning multiplicative seasonality is necessary. Using an additive seasonality model would underestimate seasonal variations in later years.
# ETS model with multiplicative seasonality
ets_mult_seasonal <- gas_data %>%
model(ETS(Gas ~ error("A") + trend("A") + season("M")))
# Forecast for next few years
ets_mult_forecast <- forecast(ets_mult_seasonal, h = "3 years")
# forecasts
autoplot(gas_data, Gas) +
autolayer(ets_mult_forecast, series="ETS with Multiplicative Seasonality") +
labs(title = "Forecast for Gas Production Using ETS with Multiplicative Seasonality",
y = "Gas Production (Petajoules)",
x = "Year")
This model correctly captures the seasonal pattern and the upward trend. The forecast shows continued growth with seasonal fluctuations. The confidence intervals remain reasonable, not growing excessively wide.
# ETS model with a damped trend
ets_damped <- gas_data %>%
model(ETS(Gas ~ error("A") + trend("Ad") + season("M")))
# Forecast with a damped trend
ets_damped_forecast <- forecast(ets_damped, h = "3 years")
# forecasts
autoplot(gas_data, Gas) +
autolayer(ets_mult_forecast, series="ETS with Multiplicative Seasonality") +
autolayer(ets_damped_forecast, series="ETS with Damped Trend", color="red") +
labs(title = "Comparison of ETS with Multiplicative Seasonality and Damped Trend",
y = "Gas Production (Petajoules)",
x = "Year")
Trend damping slows down future growth, assuming a stabilization effect. The forecast still respects multiplicative seasonality, so fluctuations remain realistic. The confidence intervals are slightly narrower, suggesting less uncertainty in long-term projections.
# RMSE values
rmse_mult <- accuracy(ets_mult_seasonal)[1, "RMSE"]
rmse_damped <- accuracy(ets_damped)[1, "RMSE"]
list(RMSE_ETS_Multiplicative = rmse_mult,
RMSE_ETS_Damped = rmse_damped)
## $RMSE_ETS_Multiplicative
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 4.19
##
## $RMSE_ETS_Damped
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 4.22
The difference is very small, meaning both models perform almost equally well in fitting past data. ETS with a damped trend slightly increases RMSE, meaning it may not provide a better fit to historical data. If long-term growth is expected, damping is not necessary (use ETS with multiplicative seasonality). If growth is expected to slow, damping may improve real-world forecasts.
data(aus_retail)
# a single retail category ("Department stores" in "New South Wales")
retail_data <- aus_retail %>%
filter(Industry == "Department stores", State == "New South Wales") %>%
select(Month, Turnover)
# retail turnover time series
retail_data %>%
autoplot(Turnover) +
labs(title = "Monthly Retail Turnover (Department Stores, NSW)",
y = "Turnover (Million AUD)",
x = "Year")
# Holt-Winters' method (ETS with multiplicative seasonality)
hw_model <- retail_data %>%
model(ETS(Turnover ~ error("A") + trend("A") + season("M")))
# Holt-Winters' method with a damped trend
hw_damped_model <- retail_data %>%
model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
# forecasts for both models
hw_forecast <- forecast(hw_model, h = "2 years")
hw_damped_forecast <- forecast(hw_damped_model, h = "2 years")
# forecasts for comparison
autoplot(retail_data, Turnover) +
autolayer(hw_forecast, series="Holt-Winters Multiplicative", color="blue") +
autolayer(hw_damped_forecast, series="Holt-Winters Damped", color="red") +
labs(title = "Comparison of Holt-Winters' Multiplicative & Damped Trend",
y = "Turnover (Million AUD)",
x = "Year")
# RMSE for both models
rmse_hw <- accuracy(hw_model)[1, "RMSE"]
rmse_hw_damped <- accuracy(hw_damped_model)[1, "RMSE"]
# RMSE values
list(RMSE_Holt_Winters = rmse_hw,
RMSE_Holt_Winters_Damped = rmse_hw_damped)
## $RMSE_Holt_Winters
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 18.5
##
## $RMSE_Holt_Winters_Damped
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 18.4
# best model is correctly assigned
if (rmse_hw < rmse_hw_damped) {
best_model <- hw_model} else {
best_model <- hw_damped_model}
# residuals
residuals_data <- best_model %>%
augment()
residuals_data %>%
autoplot(.resid) +
labs(title = "Residuals of the Best Model",
y = "Residual",
x = "Year")
# autocorrelation function (ACF) for residuals
residuals_data %>%
ACF(.resid) %>%
autoplot() +
labs(title = "ACF of Residuals")
# Ljung-Box test for white noise
ljung_box_test <- residuals_data %>%
features(.resid, ljung_box, lag = 20)
ljung_box_test
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(\"A\") + trend(\"Ad\") + season(\"M\"… 51.0 0.000157
# splitting into training (until end of 2010) and test sets (after 2010)
train_data <- retail_data %>% filter(Month <= yearmonth("2010 Dec"))
test_data <- retail_data %>% filter(Month > yearmonth("2010 Dec"))
# training the best model on training data
best_model_train <- train_data %>%
model(ETS(Turnover ~ error("A") + trend("A") + season("M")))
# Forecast on the test set period
test_forecast <- forecast(best_model_train, new_data = test_data)
# RMSE on test data
test_rmse <- accuracy(test_forecast, test_data)[1, "RMSE"]
# Comparing with Seasonal Naïve (SNAIVE) model
snaive_model <- train_data %>%
model(SNAIVE(Turnover ~ lag("year")))
# Forecast with SNAIVE
snaive_forecast <- forecast(snaive_model, new_data = test_data)
# Computing RMSE for SNAIVE
snaive_rmse <- accuracy(snaive_forecast, test_data)[1, "RMSE"]
# Comparing RMSE values
list(RMSE_Test_Best_Model = test_rmse,
RMSE_Test_SNAIVE = snaive_rmse)
## $RMSE_Test_Best_Model
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 54.2
##
## $RMSE_Test_SNAIVE
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 25.5
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
retail_data <- aus_retail %>%
filter(Industry == "Department stores", State == "New South Wales") %>%
select(Month, Turnover)
# Split data
train_data <- retail_data %>% filter(Month <= yearmonth("2010 Dec"))
test_data <- retail_data %>% filter(Month > yearmonth("2010 Dec"))
# Box-Cox lambda
lambda <- train_data %>%
features(Turnover, guerrero) %>%
pull(lambda_guerrero)
### STL Decomposition on Box-Cox Transformed Series
stl_model <- train_data %>%
mutate(Turnover_transformed = box_cox(Turnover, lambda)) %>%
model(STL(Turnover_transformed ~ season(window = "periodic")))
stl_components <- components(stl_model)
season_adjusted <- stl_components %>% select(Month, season_adjust)
# Plot the STL decomposition
autoplot(stl_components) +
labs(title = "STL Decomposition of Box-Cox Transformed Retail Turnover (Train Data)",
y = "Transformed Turnover", x = "Year")
### ETS on the Seasonally Adjusted Data
ets_model <- season_adjusted %>%
model(ETS(season_adjust ~ error("A") + trend("A") + season("N")))
ets_forecast_adj <- forecast(ets_model, new_data = test_data)
### Reintroduce the Seasonal Component
# average seasonal effect by month from the STL components (training data)
seasonality <- stl_components %>%
mutate(month = month(Month)) %>%
group_by(month) %>%
summarise(avg_season = mean(season_year)) %>%
ungroup()
# test data:
test_with_season <- test_data %>%
as_tibble() %>%
mutate(month = month(Month)) %>%
left_join(seasonality, by = "month")
# ETS forecast (on seasonally adjusted data) with seasonality
ets_forecast_with_season <- ets_forecast_adj %>%
as_tibble() %>%
mutate(month = month(Month)) %>%
left_join(test_with_season %>% select(month, avg_season), by = "month") %>%
mutate(final_mean = inv_box_cox(.mean + avg_season, lambda))
ets_forecast_with_season_unique <- ets_forecast_with_season %>%
distinct(Month, .keep_all = TRUE)
# resulting tibble back to a tsibble
ets_forecast_with_season_ts <- as_tsibble(ets_forecast_with_season_unique, index = Month)
# Plot final forecast against actual data
autoplot(retail_data, Turnover) +
autolayer(ets_forecast_with_season_ts, final_mean, series = "STL + ETS Forecast", color = "blue") +
labs(title = "Forecast Using STL Decomposition + ETS",
y = "Turnover (Million AUD)", x = "Year")
### Compute Test RMSE for STL+ETS Forecasts
test_data_tbl <- as_tibble(test_data)
forecast_tbl <- as_tibble(ets_forecast_with_season_ts)
test_comparison <- test_data_tbl %>%
left_join(forecast_tbl, by = "Month")
#print(colnames(test_comparison))
# forecast errors and RMSE
test_comparison <- test_comparison %>%
mutate(error = Turnover.x - final_mean)
# RMSE manually:
rmse_stl_ets <- sqrt(mean(test_comparison$error^2, na.rm = TRUE))
rmse_stl_ets
## [1] 27.01179
The STL+ETS approach produced a test RMSE of about 27.01, which is much lower than the best ETS (Holt‐Winters) model’s test RMSE of roughly 54.12. However, it is still slightly higher than the Seasonal Naïve method’s RMSE of approximately 25.53.
In other words, while the STL+ETS method greatly improves upon the Holt-Winters forecasts by isolating and modeling the seasonally adjusted components separately, the simple Seasonal Naïve approach remains the strongest performer on the test set for this data.