Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
data <- aus_livestock %>%
filter(State == "Victoria", Animal == "Pigs")
fit <- data %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
fc <- fit %>% forecast(h = 4)
autoplot(data, Count) +
autolayer(fc, level = 95) +
labs(title = "Simple Exponential Smoothing Forecast",
y = "Number of pigs slaughtered",
x = "Year")
residuals <- augment(fit) %>% pull(.resid)
s <- sd(residuals, na.rm = TRUE)
first_forecast <- fc %>% slice_head(n = 1) %>% pull(.mean)
lower_bound <- first_forecast - 1.96 * s
upper_bound <- first_forecast + 1.96 * s
c("Lower Bound" = lower_bound, "Forecast" = first_forecast, "Upper Bound" = upper_bound)
## Lower Bound Forecast Upper Bound
## 76871.01 95186.56 113502.10
fc_intervals <- fc %>%
mutate(interval = hilo(Count, 95)) %>%
select(Count, interval) %>%
unpack_hilo(interval)
fc_intervals %>% slice_head(n = 1)
## # A fable: 1 x 4 [1M]
## Count
## <dist>
## 1 N(95187, 8.7e+07)
## # ℹ 3 more variables: interval_lower <dbl>, interval_upper <dbl>, Month <mth>
colnames(fc_intervals)
## [1] "Count" "interval_lower" "interval_upper" "Month"
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
Exports as a percentage of GDP have shown a long-term increase since 1960. There has been an obvious decline after peaks in 1980 and 2000.
aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Exports)
autoplot(aus_exports, Exports) +
labs(title = "Exports as Percentage of GDP for Australia",
x = "Year", y = "Exports (% of GDP)")
fit_aus <- aus_exports %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc_aus <- fit_aus %>%
forecast(h = 10)
fc_aus %>%
autoplot(aus_exports) +
labs(title = "Forecasts of Exports for Australia using ETS(A,N,N)",
x = "Year", y = "Exports (% of GDP)")
fitted_values <- fitted(fit_aus)
#residuals <- residuals(fit_aus)
training_data <- aus_exports %>%
left_join(fitted_values, by = "Year") %>%
rename(observed = Exports, fitted = .fitted)
rmse <- training_data %>%
summarise(rmse = sqrt(mean((observed - fitted)^2, na.rm = TRUE))) %>%
pull(rmse)
print(rmse)
## [1] 0.005026517 0.589170175 1.284207886 0.379772934 1.767539163 0.950949141
## [7] 0.700514595 0.352677747 0.739125450 0.663592727 0.734174736 0.002872916
## [13] 0.165731995 1.401190718 0.394898572 0.958110323 0.342545018 0.351111011
## [19] 0.242347249 0.578628875 2.378612861 0.495069471 1.585892300 0.634590096
## [25] 0.289930670 1.553938250 0.428171485 0.666400405 0.779053221 0.500164086
## [31] 0.219379018 0.820269202 0.988361191 1.311213376 1.001301728 0.345563387
## [37] 1.172285798 0.744394866 0.743837142 0.918792164 0.690765741 3.091495661
## [43] 0.110320186 1.728011522 2.633205359 0.077287264 1.589281145 1.012197909
## [49] 0.397429587 3.022295307 1.884296269 0.812527552 0.398773827 1.358186445
## [55] 0.498587557 0.846411338 1.127284398 1.528079675
accuracy(fit_aus) %>%
select(RMSE)
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 1.15
I think ETS(A, A, N) is more suitable for this dataset because it captures the historical trend, providing a more realistic forecast that aligns with past growth patterns. This matches my observation that the export in GDP % is a long-term increase. Meanwhile, ETS(A, N, N) has a flat forecast.
fit_aus_trended <- aus_exports %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
rmse_trended <- accuracy(fit_aus_trended) %>%
select(RMSE) %>%
pull(RMSE)
print(rmse_trended)
## [1] 1.116727
fc_aus_trended <- fit_aus_trended %>% forecast(h = 10)
autoplot(aus_exports, Exports) +
autolayer(fc_aus_trended, series = "ETS(A,A,N)", PI = TRUE) +
labs(title = "Forecasts of Exports for Australia using ETS(A,A,N)",
x = "Year", y = "Exports (% of GDP)")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series` and `PI`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series` and `PI`
ETS(A, A, N) is the best choice for forecasting with slightly better accuracy (RMSE = 1.1167 vs. 1.1468).
accuracy(fit_aus)$RMSE
## [1] 1.146794
gg_tsresiduals(fit_aus)
accuracy(fit_aus_trended)$RMSE
## [1] 1.116727
gg_tsresiduals(fit_aus_trended)
fc_aus <- fit_aus %>% forecast(h = 1)
rmse_aus <- accuracy(fit_aus) %>% pull(RMSE)
first_forecast_aus <- fc_aus %>% pull(.mean)
manual_lower_aus <- first_forecast_aus - 1.96 * rmse_aus
manual_upper_aus <- first_forecast_aus + 1.96 * rmse_aus
r_intervals_aus <- fc_aus %>% hilo(level = 95) %>% unpack_hilo("95%")
fc_aus_trended <- fit_aus_trended %>% forecast(h = 1)
rmse_trended <- accuracy(fit_aus_trended) %>% pull(RMSE)
first_forecast_trended <- fc_aus_trended %>% pull(.mean)
manual_lower_trended <- first_forecast_trended - 1.96 * rmse_trended
manual_upper_trended <- first_forecast_trended + 1.96 * rmse_trended
r_intervals_trended <- fc_aus_trended %>% hilo(level = 95) %>% unpack_hilo("95%")
comparison_intervals <- tibble(
Model = c("ETS(A,N,N) Manual", "ETS(A,N,N) R", "ETS(A,A,N) Manual", "ETS(A,A,N) R"),
Lower = c(manual_lower_aus, r_intervals_aus$`95%_lower`, manual_lower_trended, r_intervals_trended$`95%_lower`),
Upper = c(manual_upper_aus, r_intervals_aus$`95%_upper`, manual_upper_trended, r_intervals_trended$`95%_upper`)
)
print(comparison_intervals)
## # A tibble: 4 × 3
## Model Lower Upper
## <chr> <dbl> <dbl>
## 1 ETS(A,N,N) Manual 18.4 22.9
## 2 ETS(A,N,N) R 18.3 22.9
## 3 ETS(A,A,N) Manual 18.6 23.0
## 4 ETS(A,A,N) R 18.6 23.1
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
china_gdp <- global_economy %>%
filter(Country == "China") %>%
select(Year, GDP)%>%
mutate(GDP = as.numeric(GDP)/1e9) %>%
as_tsibble(index = Year)
ets_model <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
ets_damped <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))
ets_boxcox <- china_gdp %>%
model(ETS(box_cox(GDP, lambda = 0.3) ~ error("A") + trend("A") + season("N")))
fc_ets_model <- ets_model %>%
forecast(h = 20)
fc_ets_model %>%
autoplot(china_gdp) +
labs(title = "fc_ets_model",
x = "Year", y = "GDP (Trillion)")
fc_ets_damped <- ets_damped %>%
forecast(h = 20)
fc_ets_damped %>%
autoplot(china_gdp) +
labs(title = "fc_ets_damped",
x = "Year", y = "GDP (Trillion)")
fc_ets_boxcox <- ets_boxcox %>%
forecast(h = 20)
fc_ets_boxcox %>%
autoplot(china_gdp) +
labs(title = "fc_ets_boxcox",
x = "Year", y = "GDP (Trillion)")
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
The multiplicative seasonality will capture the growing seasonal swings, improving the damped forecast. The ETS(A, Ad, M) model, with an RMSE of 10.21547, outperforms the ETS(A, A, M) model (RMSE = 15.82491).
set.seed(789)
gas_data <- aus_production %>%
select(Quarter, Gas) %>%
as_tsibble(index = Quarter) %>%
fill_gaps(Gas = na.interp(Gas))
train_data <- gas_data %>% filter(Quarter < yearquarter("2002 Q1"))
test_data <- gas_data %>% filter(Quarter >= yearquarter("2002 Q1"))
ets_gas <- train_data %>% model(ETS(Gas ~ error("A") + trend("A") + season("M")))
ets_gas_damped <- train_data %>% model(ETS(Gas ~ error("A") + trend("Ad") + season("M")))
fc_ets_gas <- ets_gas %>% forecast(h = 40)
fc_ets_gas_damped <- ets_gas_damped %>% forecast(h = 40)
autoplot(train_data, Gas) +
autolayer(fc_ets_gas, level = 95, color = "blue", alpha = 0.3) +
autolayer(fc_ets_gas_damped, level = 95, color = "red", alpha = 0.3) +
autolayer(test_data, Gas, color = "green") +
labs(title = "ETS vs. Damped ETS Forecasts")
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
accuracy(fc_ets_gas, test_data)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 6 observations are missing between 2010 Q3 and 2011 Q4
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"A\")… Test -14.3 15.8 14.3 -7.06 7.09 NaN NaN 0.492
accuracy(fc_ets_gas_damped, test_data)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 6 observations are missing between 2010 Q3 and 2011 Q4
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"A\")… Test -7.62 10.2 8.05 -3.97 4.15 NaN NaN 0.463
Recall your retail time series data (from Exercise 7 in Section 2.10).
Why is multiplicative seasonality necessary for this series?
Definition: A time series exhibits multiplicative seasonality when seasonal fluctuations increase or decrease proportionally to the level of the series. This differs from additive seasonality, where seasonal patterns remain constant in magnitude, regardless of the overall trend.
Retail sales usually fluctuate more when turnover is high and less when turnover is low. Therefore, an ETS model with multiplicative seasonality will better capture this behavior, leading to more accurate forecasts compared to an additive seasonality model.
set.seed(123)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))|>
select(Month, Turnover) |>
as_tsibble(index = Month)
autoplot(myseries, Turnover)
gg_season(myseries, Turnover)
gg_subseries(myseries, Turnover)
gg_lag(myseries, Turnover)
ACF(myseries, Turnover) |> autoplot()
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
hw_model <- myseries |>
model(ETS(Turnover ~ error("A") + trend("A") + season("M")))
hw_fc <- hw_model |>
forecast(h=12)
autoplot(hw_fc) +
labs(title="Holt-Winters' Multiplicative Method Forecast",
y="Turnover ($)")
hw_damped_model <- myseries |>
model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
hw_damped_fc <- hw_damped_model |>
forecast(h=12)
autoplot(hw_damped_fc) +
labs(title="Holt-Winters' Multiplicative Method with Damped Trend",
y="Turnover ($)")
hw_fc %>%
autoplot(myseries) +
autolayer(hw_damped_fc, series="Damped Trend Forecast", PI=FALSE) +
labs(title="Comparison of Holt-Winters’ Multiplicative vs. Damped Trend",
y="Turnover ($)") +
guides(colour=guide_legend(title="Model"))
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series` and `PI`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series` and `PI`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
accuracy(hw_model)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(… Trai… 1.09 23.6 17.6 0.0253 3.28 0.452 0.473 0.163
accuracy(hw_damped_model)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(… Trai… 2.35 23.6 16.9 0.258 3.16 0.436 0.472 0.0250
Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
Check that the residuals from the best method look like white noise.
I am use the damped model : ETS(A,Ad,M) do not fully resemble white noise. While they are mostly random and show minimal autocorrelation
best_model_residuals <- augment(hw_damped_model)
autoplot(best_model_residuals, .resid) +
labs(title="Residuals from Holt-Winters Damped Model",
y="Residuals")
best_model_residuals |>
ACF(.resid) |>
autoplot() +
labs(title="ACF of Residuals from Damped Model")
ljung_box_test <- best_model_residuals |>
features(.resid, ljung_box, lag = 10)
print(ljung_box_test)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(\"A\") + trend(\"Ad\") + season(\"M\"… 16.6 0.0847
best_model_residuals |>
ggplot(aes(x=.resid)) +
geom_histogram(bins=30, fill="lightblue", color="black") +
labs(title="Histogram of Residuals", x="Residuals", y="Frequency")
best_model_residuals |>
ggplot(aes(sample=.resid)) +
geom_qq() +
geom_qq_line() +
labs(title="QQ Plot of Residuals")
Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
No, the Training model has a lower RMSE.
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")+
labs(title = "Training", y = "Turnover")
fit <- myseries_train |>
model(SNAIVE())
## Model not specified, defaulting to automatic modelling of the `Turnover`
## variable. Override this using the model formula.
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(Month, Turnover)`
fc |> autoplot(myseries)+
labs(title = "Testing", y = "Turnover")
fit |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE() Training 25.1 45.6 35.4 5.05 7.29 1 1 0.695
fc |> accuracy(myseries)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE() Test 172. 213. 174. 15.1 15.2 4.90 4.68 0.947
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
The Box-Cox + STL + ETS forecast appears to be better than the Testing Model (Lower RMSE), but the Training Model has the lowest RMSE.
fit_snaive <- myseries_train |>
model(SNAIVE())
## Model not specified, defaulting to automatic modelling of the `Turnover`
## variable. Override this using the model formula.
fit_snaive |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
myseries_test <- anti_join(myseries, myseries_train)
## Joining with `by = join_by(Month, Turnover)`
fc_snaive <- fit_snaive |>
forecast(new_data = myseries_test)
fc_snaive |> autoplot(myseries)+
labs(title = "Snaive", y = "Turnover")
lambda <- BoxCox.lambda(myseries_train$Turnover)
myseries_train_transformed <- myseries_train %>%
mutate(Turnover_bc = box_cox(Turnover, lambda))
stl_fit <- myseries_train_transformed %>%
model(STL(Turnover_bc ~ season(window = "periodic"), robust = TRUE))
components <- components(stl_fit)
myseries_train_transformed <- myseries_train_transformed %>%
mutate(Turnover_adj = Turnover_bc - components$season_year)
ets_fit <- myseries_train_transformed %>%
model(ETS(Turnover_adj))
ets_fc <- ets_fit %>%
forecast(h = nrow(myseries_test))
seasonal_indices <- tail(components$season_year, 12)
seasonal_fc <- rep(seasonal_indices, length.out = nrow(myseries_test))
fc_with_season <- ets_fc$.mean + seasonal_fc
final_forecasts <- inv_box_cox(fc_with_season, lambda)
forecast_df <- tibble(
Month = myseries_test$Month,
Turnover = myseries_test$Turnover,
forecast = final_forecasts
)
accuracy_new <- forecast_df %>%
summarise(
RMSE = sqrt(mean((Turnover - forecast)^2, na.rm = TRUE)),
MAE = mean(abs(Turnover - forecast), na.rm = TRUE),
MASE = mean(abs(Turnover - forecast), na.rm = TRUE) /
mean(abs(diff(myseries_train$Turnover)), na.rm = TRUE)
)
myseries_test_with_fc <- myseries_test %>%
mutate(BoxCox_STL_ETS_Forecast = final_forecasts)
autoplot(myseries, Turnover) +
autolayer(myseries_test_with_fc, BoxCox_STL_ETS_Forecast, colour = "blue", linetype = "dashed") +
labs(title = "Box-Cox + STL + ETS Forecasts vs Actuals", y = "Turnover")
print(accuracy_new)
## # A tibble: 1 × 3
## RMSE MAE MASE
## <dbl> <dbl> <dbl>
## 1 209. 187. 3.68