# Filter data for pigs in Victoria
vic_pigs <- aus_livestock |>
filter(Animal == "Pigs", State == "Victoria")
fit <- vic_pigs |>
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
# Forecast next 4 months
fc <- fit |> forecast(h = "4 months")
# Plot forecasts
fc |>
autoplot(vic_pigs |> filter(Month >= yearmonth("2010 Jan"))) +
labs(y = "Count", title = "Victoria Pigs Slaughtered: Forecasts (next 4 months)")
alpha = 0.3221247 l0 = 100646.6
# manual PI from residual sd
resid_sd <- augment(fit)$.resid |> sd(na.rm = TRUE)
first_fc <- fc |> as_tibble() |> slice(1)
yhat <- first_fc$.mean
lower <- yhat - 1.96 * resid_sd
upper <- yhat + 1.96 * resid_sd
# R's PI for the first forecast
r_int <- fc |> hilo(95) |> as_tibble() |> slice(1) |> pull(`95%`)
r_lower <- r_int$lower
r_upper <- r_int$upper
cat("Manual 95% PI: [", lower, ", ", upper, "]\n", sep = "")
## Manual 95% PI: [76871.01, 113502.1]
cat("R 95% PI: [", r_lower, ", ", r_upper, "]\n", sep = "")
## R 95% PI: [76854.79, 113518.3]
The manually computed 95 % prediction interval, based on y +/- 1.96s was [76871.01, 113502.1], which closely matches R’s interval of [76854.79, 113518.3] for the first forecast.
aus_exports <- global_economy %>%
filter(Country == "Australia")
autoplot(aus_exports, Exports) +
labs(title = "Australian Exports")
fit_ses <- aus_exports %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit_ses)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.5659948
##
## Initial states:
## l[0]
## 12.98943
##
## sigma^2: 1.3621
##
## AIC AICc BIC
## 257.3943 257.8387 263.5756
fc_ses <- fit_ses %>% forecast(h = 10)
autoplot(fc_ses, aus_exports) +
labs(title = "SES (ETS(A,N,N)) Forecasts: Australian Exports")
###8.5c Compute the RMSE values for the training data.
accuracy(fit_ses)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Exports… Trai… 0.232 1.15 0.914 1.09 5.41 0.928 0.928 0.0125
RMSE = 1.146794
fit_holt <- aus_exports %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit_holt)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.4407571
## beta = 1e-04
##
## Initial states:
## l[0] b[0]
## 12.74993 0.1371607
##
## sigma^2: 1.3395
##
## AIC AICc BIC
## 258.3124 259.4662 268.6146
fc_holt <- fit_holt %>% forecast(h = 10)
autoplot(fc_holt, aus_exports) +
labs(title = "Holt’s Linear Trend Forecasts: Australian Exports")
accuracy(fit_ses)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Exports… Trai… 0.232 1.15 0.914 1.09 5.41 0.928 0.928 0.0125
accuracy(fit_holt)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Expo… Trai… -7.46e-7 1.12 0.893 -0.387 5.39 0.907 0.904 0.109
Overall, Holt’s linear trend model provides more realistic forecasts and aligns better with the underlying clear upward trend in Australian exports, while SES assumes a constant level and produces flat forecasts, which underestimate future growth. So Holt’s model, with its added trend component, provides more realistic and accurate projections for this data.
rmse_ses <- accuracy(fit_ses)$RMSE
rmse_holt <- accuracy(fit_holt)$RMSE
yhat_ses <- fc_ses %>% as_tibble() %>% slice(1) %>% pull(.mean)
yhat_holt <- fc_holt %>% as_tibble() %>% slice(1) %>% pull(.mean)
ses_lower <- yhat_ses - 1.96*rmse_ses
ses_upper <- yhat_ses + 1.96*rmse_ses
holt_lower <- yhat_holt - 1.96*rmse_holt
holt_upper <- yhat_holt + 1.96*rmse_holt
cbind(Model = c("SES","Holt"),
Mean = c(yhat_ses, yhat_holt),
Lower_Manual = c(ses_lower, holt_lower),
Upper_Manual = c(ses_upper, holt_upper))
## Model Mean Lower_Manual Upper_Manual
## [1,] "SES" "20.6071590526505" "18.359442541401" "22.8548755639001"
## [2,] "Holt" "20.8386411205804" "18.649855791726" "23.0274264494348"
fc_ses %>% as_tibble() %>% slice(1)
## # A tibble: 1 × 5
## Country .model Year
## <fct> <chr> <dbl>
## 1 Australia "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"N\"))" 2018
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>
fc_holt %>% as_tibble() %>% slice(1)
## # A tibble: 1 × 5
## Country .model Year
## <fct> <chr> <dbl>
## 1 Australia "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"N\"))" 2018
## # ℹ 2 more variables: Exports <dist>, .mean <dbl>
Both the manual and R-generated 95% prediction intervals are nearly identical, confirming that manually computing by using y+/-1.96xRMSE is accurate for the first-step forecast.
The Holt’s linear trend model produces slightly wider intervals than the Simple Exponential Smoothing (SES) model. This is expected, since Holt’s method includes an additional parameter (beta) to capture trend.
Overall, both models produce similar point forecasts, but Holt’s intervals better reflect the potential upward growth pattern seen in the data, while SES assumes a constant mean level. This suggests Holt’s model may be more appropriate for data with a clear upward trend, like Australia’s exports.
china_gdp <- global_economy %>%
filter(Country == "China")
autoplot(china_gdp, GDP) +
labs(title = "Chinese GDP (US$ billions)", y = "Billions of US$")
fits <- china_gdp %>%
model(
ETS_auto = ETS(GDP),
ETS_damped = ETS(GDP ~ error("A") + trend("Ad") + season("N")),
ETS_boxcox = ETS(box_cox(GDP, 0.3) ~ error("A") + trend("A") + season("N"))
)
report(fits)
## Warning in report.mdl_df(fits): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 3 × 10
## Country .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China ETS_auto 1.08e- 2 -1546. 3102. 3103. 3112. 4.00e22 1.61e23 7.93e- 2
## 2 China ETS_damped 3.96e+22 -1624. 3260. 3262. 3273. 3.62e22 1.30e23 9.49e+10
## 3 China ETS_boxcox 9.76e+ 4 -449. 908. 909. 918. 9.09e 4 3.33e 5 2.48e+ 2
fc <- fits %>% forecast(h = "30 years")
autoplot(fc, china_gdp) +
labs(title = "Chinese GDP Forecasts — ETS Variants", y = "Billions of US Dollars")
ETS_auto: Keeps projecting the strong growth seen in the past, so the forecasts rise quickly but can be too optimistic.
ETS_damped: Slows down the growth over time, giving a more realistic forecast for the long run since economies usually don’t keep growing at the same fast rate forever.
ETS_boxcox: Reduces the effect of very large values, making the forecasts smoother and more stable.
# 1. Plot data
autoplot(aus_production, Gas) +
labs(title = "Australian Gas Production", y = "Petajoules")
# 2. Fit models
fit_auto <- aus_production %>%
model(Auto = ETS(Gas))
fit_damped <- aus_production %>%
model(Damped = ETS(Gas ~ error("M") + trend("Ad") + season("M")))
# 3. Compare forecasts
fc_all <- bind_cols(fit_auto, fit_damped) %>%
forecast(h = 12)
autoplot(fc_all, aus_production) +
facet_wrap(~.model) +
labs(title = "Gas Production Forecasts: Auto vs Damped", y = "Petajoules")
# 4. Compare accuracy
accuracy(fit_auto)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Auto Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
accuracy(fit_damped)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Training -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
Multiplicative seasonality was necessary because the seasonal variation increases as production rises, the peaks get taller over time, so multiplicative seasonality scales the seasonal effect with the level of the data.
The damped model slightly reduces the long-term growth, flattening the trend over time. This is more realistic because production can’t rise indefinitely since things like natural resources and market demand eventually slow the growth. The damping adds an effect that assumes growth continues but at a decreasing rate.
Both have nearly identical accuracy (RMSE, MAE, etc…), but the damped ETS provides a more stable long-term forecast with slightly lower uncertainty, making it a more reasonable choice.
Multiplicative seasonality is necessary for this series because the magnitude of the seasonality fluctuation increases over time.
set.seed(2000)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
fit <- myseries %>%
model(
`Holt Winters Multiplicative Method` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
`Holt Winters Damped Method` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
HoltWinters <- fit %>% forecast(h = 60)
HoltWinters %>%
autoplot(myseries) +
facet_grid(vars(.model)) +
labs(title = "Holt–Winters Multiplicative vs Damped",
y = "Turnover")
When the trend is damped, the forecasts rise more slowly and level off,
suggesting a more conservative long-term outlook. This makes sense
because retail turnover is unlikely to keep accelerating at the same
pace indefinitely.
dplyr::select(accuracy(fit), .model, RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Holt Winters Multiplicative Method 12.0
## 2 Holt Winters Damped Method 12.1
I prefer the Holt-Winters Multiplicative Method because it has a slightly lower RMSE (10.85) than the Damped Method (11.00), meaning it’s more accurate.
fit_hw <- myseries %>%
model(HW = ETS(Turnover ~ error("M") + trend("A") + season("M")))
# Residual diagnostics
fit_hw %>% gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Yes the residuals look like white noise.
The residuals hover randomly around zero.
The ACF spikes mostly stay inside the blue bounds suggesting no strong autocorrelation.
Histograms are centered and symmetric.
set.seed(2000)
series_id <- sample(aus_retail$`Series ID`, 1)
myseries <- aus_retail |> filter(`Series ID` == series_id)
myseries_train <- myseries |> filter(year(Month) < 2011)
fits <- myseries_train |>
model(
HW = ETS(Turnover ~ error("M") + trend("A") + season("M")),
HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")),
SNAIVE = SNAIVE(Turnover)
)
test <- myseries |> filter(year(Month) >= 2011)
fc <- forecast(fits, new_data = test)
accuracy(fc, myseries)[, c(".model","RMSE")]
## # A tibble: 3 × 2
## .model RMSE
## <chr> <dbl>
## 1 HW 42.0
## 2 HW_damped 73.4
## 3 SNAIVE 99.9
The table above shows that the Holt Winters Damped and Multiplicative methods beat the SNAIVE method with a lower RMSE, and that the HW Multiplicative method is the most accurate with the lowest RMSE overall.
# train/test split
set.seed(2000)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
train <- myseries %>% filter(year(Month) <= 2010)
test <- myseries %>% filter(year(Month) >= 2011)
# Box–Cox
lambda <- train %>%
features(Turnover, guerrero) %>%
pull(lambda_guerrero)
fits <- train %>%
model(
HW = ETS(Turnover ~ error("M") + trend("A") + season("M")),
HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")),
SNAIVE = SNAIVE(Turnover),
STL_ETS_BoxCox = decomposition_model(
STL(box_cox(Turnover, lambda) ~ season(window = "periodic")),
ETS(season_adjust ~ error("A") + trend("A"))
)
)
fc <- fits %>% forecast(new_data = test)
acc <- accuracy(fc, myseries) %>%
dplyr::filter(.type == "Test") %>%
dplyr::select(.model, RMSE, MAE, MAPE)
acc
## # A tibble: 4 × 4
## .model RMSE MAE MAPE
## <chr> <dbl> <dbl> <dbl>
## 1 HW 42.0 35.6 8.32
## 2 HW_damped 73.4 61.2 12.6
## 3 SNAIVE 99.9 80.1 15.9
## 4 STL_ETS_BoxCox 112. 106. 22.9
The STL + ETS (Box-Cox) model performed worse than the best previous forecasts. Its test-set RMSE (~111.5) was much higher than Holt-Winters Damped (~73.4), Holt-Winters Multiplicative (~42) and Seasonal Naïve (~99.9). This means that the STL + ETS(Box-Cox) model produced less accurate predictions on the test data.