knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE
)
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.3.0 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 4.0.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tsibble)
library(dplyr)
library(fable)
library(ggplot2)
library(feasts)
# Load the data
data(aus_livestock)
head(aus_livestock)
For this question a simple exponential component model is required. The simple exponential smoothing model by definition has: - an additive error component - no trend component - no seasonal component
This translates to error(“A”) + trend(“N”) + season(“N”).
Filter the data for the pigs slaughtered.
# Filter for pigs in Victoria
pigs_vic <- aus_livestock %>%
filter(Animal == "Pigs", State == "Victoria")
# Fit simple exponential smoothing model using ETS
fit <- pigs_vic %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
# View the model parameters
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
# Generate forecasts for next 4 months
fc <- fit %>%
forecast(h = 4)
# View forecasts
fc
# Plot the forecasts with 80% and 95% prediction intervals
fc %>%
autoplot(pigs_vic, level = c(80, 95)) +
scale_fill_manual(
values = c("orange", "darkblue"),
breaks = c("80%", "95%")
) +
labs(
title = "Pigs Slaughtered in Victoria - 4 Month Forecast",
y = "Count",
x = "Month"
)
# Extract the point forecast for the first forecast
y_hat <- fc$.mean[1]
# Get residuals from the fitted model
residuals <- augment(fit) %>%
pull(.resid)
# Calculate standard deviation of residuals (s)
s <- sd(residuals, na.rm = TRUE)
# Compute manual 95% prediction interval
lower_manual <- y_hat - 1.96 * s # YOUR LOWER BOUND
upper_manual <- y_hat + 1.96 * s # YOUR UPPER BOUND
# Extract R's 95% prediction interval for the first forecast
r_interval <- fc %>%
hilo(level = 95) %>%
unpack_hilo("95%")
lower_r <- r_interval$`95%_lower`[1] # R'S LOWER BOUND
upper_r <- r_interval$`95%_upper`[1] # R'S UPPER BOUND
# Create comparison table
comparison <- data.frame(
Calculation = c("Manual (y + or - 1.96s)", "R's hilo()"),
Lower_Bound = c(lower_manual, lower_r),
Upper_Bound = c(upper_manual, upper_r),
Width = c(upper_manual - lower_manual, upper_r - lower_r)
)
print(comparison)
## Calculation Lower_Bound Upper_Bound Width
## 1 Manual (y + or - 1.96s) 76871.01 113502.1 36631.09
## 2 R's hilo() 76854.79 113518.3 36663.54
print("Key Values:")
## [1] "Key Values:"
print(paste("Point Forecast (y):", y_hat))
## [1] "Point Forecast (y): 95186.5574309915"
print(paste("Standard Deviation (s):", s))
## [1] "Standard Deviation (s): 9344.66579258967"
Botswana’s exports, measured as a percentage of the country’s total economic output (GDP) and illustrated in the graph below, have grown significantly over the past six decades. The overall trend shows exports increasing from lower levels in the 1960s to higher levels in recent years, though this growth hasn’t been smooth or steady.
The most dramatic expansion occurred in the early 2000s, when exports surged rapidly before experiencing some pullbacks and subsequent recoveries. These ups and downs reflect Botswana’s economy responding to global market conditions, commodity price changes, and international demand for their key exports like diamonds and minerals.
Unlike retail sales that peak during holiday seasons, Botswana’s export patterns don’t follow predictable annual cycles. Instead, the year-to-year variations we observe are likely driven by broader economic forces—such as global recessions, shifts in commodity prices, or changes in trade relationships—making exports somewhat unpredictable in the short term but generally positive over the long haul.
# See all countries
#unique(global_economy$Country)
# Load data
data(global_economy)
# Select one country - you can change this to any country
country_data <- global_economy %>%
filter(Country == "Botswana")
# Plot the exports series
country_data %>%
autoplot(Exports) +
labs(
title = "Annual Exports - Botswana",
y = "Exports (% of GDP)",
x = "Year"
) +
theme_minimal()
# Summary statistics
print("Summary Statistics:")
## [1] "Summary Statistics:"
summary(country_data$Exports)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22.77 39.08 49.73 47.17 54.05 75.13
# Check for trends and patterns
print("Data Range:")
## [1] "Data Range:"
print(paste("Years:", min(country_data$Year), "to", max(country_data$Year)))
## [1] "Years: 1960 to 2017"
print(paste("Number of observations:", nrow(country_data)))
## [1] "Number of observations: 58"
# Fit ETS(A,N,N) model - Simple Exponential Smoothing
fit_botswana_ann <- country_data %>%
model(ETS_ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
# Generate forecasts for 10 years
fc_botswana_ann <- fit_botswana_ann %>%
forecast(h = 10)
# Plot forecasts
fc_botswana_ann %>%
autoplot(country_data, level = c(80, 95)) +
scale_fill_manual(values = c("lightblue", "darkblue")) +
labs(
title = "Simple Exponential Smoothing - Botswana's Exports",
y = "Exports (% of GDP)",
x = "Year"
) +
theme_minimal()
# Show model details
print("Exponential Smoothing Analysis:")
## [1] "Exponential Smoothing Analysis:"
report(fit_botswana_ann)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 26.26254
##
## sigma^2: 27.0263
##
## AIC AICc BIC
## 430.6855 431.1299 436.8668
# Calculate RMSE for ETS(A,N,N) on training data
accuracy_botswana_ann<- accuracy(fit_botswana_ann)
print("ETS(A,N,N) Accuracy Metrics:")
## [1] "ETS(A,N,N) Accuracy Metrics:"
print(accuracy_botswana_ann)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Botswana ETS_ANN Training 0.234 5.11 3.93 0.118 8.40 0.983 0.991 0.0871
# Extract RMSE specifically
rmse_botswana_data_ann <- accuracy_botswana_ann$RMSE
print(paste("Root Mean Squared Error for ETS(A,N,N):", round(rmse_botswana_data_ann, 4)))
## [1] "Root Mean Squared Error for ETS(A,N,N): 5.1083"
ETS(A,N,N) assumes our data fluctuates randomly around a relatively stable average with no consistent upward or downward direction.
ETS(A,A,N) recognizes that our data may be steadily rising or falling over time and adjusts for that trend.
Comparing these two models helps us determine whether there’s a meaningful trend in our data. If the trend model performs better, it tells us we should expect values to continue moving in that direction — which is critical information for planning and decision-making.
# Fit ETS(A,A,N) model - Holt's Linear Trend
fit_botswana_aan <- country_data %>%
model(ETS_AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))
# Generate forecasts
fc_botswana_aan <- fit_botswana_aan %>%
forecast(h = 10)
# Plot ETS(A,A,N) forecasts
fc_botswana_aan %>%
autoplot(country_data, level = c(80, 95)) +
scale_fill_manual(values = c("lightgreen", "darkgreen")) +
labs(
title = "ETS(A,A,N) Forecast - Canada Exports",
y = "Exports (% of GDP)",
x = "Year"
) +
theme_minimal()
# Show model details
print("ETS(A,A,N) Model:")
## [1] "ETS(A,A,N) Model:"
report(fit_botswana_aan)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.05509657
##
## Initial states:
## l[0] b[0]
## 26.74895 2.396638
##
## sigma^2: 29.464
##
## AIC AICc BIC
## 437.5848 438.7387 447.8870
# Calculate RMSE for ETS(A,A,N)
accuracy_botswana_aan <- accuracy(fit_botswana_aan)
rmse_botswana_data_aan <- accuracy_botswana_aan$RMSE
print("ETS(A,A,N) Accuracy Metrics:")
## [1] "ETS(A,A,N) Accuracy Metrics:"
print(accuracy_botswana_aan)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Botswana ETS_AAN Training -0.900 5.24 4.02 -2.68 8.86 1.01 1.02 0.0652
print(paste("RMSE for ETS(A,A,N):", round(rmse_botswana_data_aan, 4)))
## [1] "RMSE for ETS(A,A,N): 5.2376"
# Comparison table
comparison_table <- data.frame(
Model = c("ETS(A,N,N)", "ETS(A,A,N)"),
Parameters = c("2 (level + alpha)", "4 (level + trend + alpha + beta)"),
RMSE = c(rmse_botswana_data_ann, rmse_botswana_data_aan),
AIC = c(glance(fit_botswana_ann)$AIC, glance(fit_botswana_aan)$AIC),
BIC = c(glance(fit_botswana_ann)$BIC, glance(fit_botswana_aan)$BIC)
)
print("Model Comparison:")
## [1] "Model Comparison:"
print(comparison_table)
## Model Parameters RMSE AIC BIC
## 1 ETS(A,N,N) 2 (level + alpha) 5.108268 430.6855 436.8668
## 2 ETS(A,A,N) 4 (level + trend + alpha + beta) 5.237553 437.5848 447.8870
The graph below compares two forecasting approaches for Botswana’s exports. The blue line shows forecasts assuming exports will stay relatively flat, while the red line accounts for any upward or downward trends in the data.
By visualizing both models together, we can see which approach better captures the direction our exports are heading. This comparison helps us understand whether we should expect stability or continued growth/decline, which is essential for strategic planning.
# Plot both forecasts together
country_data %>%
autoplot(Exports) +
autolayer(fc_botswana_ann, level = NULL, color = "blue", size = 1) +
autolayer(fc_botswana_aan, level = NULL, color = "red", size = 1) +
labs(
title = "Forecast Comparison: ETS(A,N,N) vs ETS(A,A,N)",
y = "Exports (% of GDP)",
x = "Year"
) +
theme_minimal() +
scale_color_manual(
name = "Model",
values = c("blue", "red"),
labels = c("ETS(A,N,N)", "ETS(A,A,N)")
)
# Compare first 5 forecast values
forecast_comparison <- data.frame(
Year = fc_botswana_ann$Year[1:5],
ETS_ANN = fc_botswana_ann$.mean[1:5],
ETS_AAN = fc_botswana_aan$.mean[1:5],
Difference = fc_botswana_aan$.mean[1:5] - fc_botswana_ann$.mean[1:5]
)
print("Forecast Value Comparison (First 5 Years):")
## [1] "Forecast Value Comparison (First 5 Years):"
print(forecast_comparison)
## Year ETS_ANN ETS_AAN Difference
## 1 2018 39.8287 39.34957 -0.4791221
## 2 2019 39.8287 38.87044 -0.9582512
## 3 2020 39.8287 38.39132 -1.4373803
## 4 2021 39.8287 37.91219 -1.9165094
## 5 2022 39.8287 37.43306 -2.3956385
# ============================================================
# Manual calculation using RMSE for ETS(A,N,N)
# ============================================================
y_hat_botswana_ann <- fc_botswana_ann$.mean[1]
lower_manual_botswana_ann <- y_hat_botswana_ann - 1.96 * rmse_botswana_data_ann
upper_manual_botswana_ann <- y_hat_botswana_ann + 1.96 * rmse_botswana_data_ann
print(" Step by step calculation of Botswana's export %GPP predictions - ETS(A,N,N)")
## [1] " Step by step calculation of Botswana's export %GPP predictions - ETS(A,N,N)"
print(paste("Our best estimate for next year's exports:", round(y_hat_botswana_ann, 2), "% of GDP"))
## [1] "Our best estimate for next year's exports: 39.83 % of GDP"
print(paste("We're 95% confident exports will be above:", round(lower_manual_botswana_ann, 2), "% of GDP"))
## [1] "We're 95% confident exports will be above: 29.82 % of GDP"
print(paste("We're 95% confident exports will be below:", round(upper_manual_botswana_ann, 2), "% of GDP"))
## [1] "We're 95% confident exports will be below: 49.84 % of GDP"
# ============================================================
# R's advanced calculation for ETS(A,N,N)
# ============================================================
r_interval_botswana_ann <- fc_botswana_ann %>%
hilo(level = 95) %>%
unpack_hilo("95%")
lower_r_botswana_ann <- r_interval_botswana_ann$`95%_lower`[1]
upper_r_botswana_ann <- r_interval_botswana_ann$`95%_upper`[1]
print("R'S Advanced Calculation of Botswana's export %GPP predictions - ETS(A,N,N)")
## [1] "R'S Advanced Calculation of Botswana's export %GPP predictions - ETS(A,N,N)"
print(paste("R's best estimate for next year's exports:", round(y_hat_botswana_ann, 2), "% of GDP"))
## [1] "R's best estimate for next year's exports: 39.83 % of GDP"
print(paste("R says we're 95% confident exports will be above:", round(lower_r_botswana_ann, 2), "% of GDP"))
## [1] "R says we're 95% confident exports will be above: 29.64 % of GDP"
print(paste("R says we're 95% confident exports will be below:", round(upper_r_botswana_ann, 2), "% of GDP"))
## [1] "R says we're 95% confident exports will be below: 50.02 % of GDP"
# Manual 95% Prediction Interval for ETS(A,A,N) Model
y_hat_botswana_aan <- fc_botswana_aan$.mean[1]
lower_manual_botswana_aan <- y_hat_botswana_aan - 1.96 * rmse_botswana_data_aan
upper_manual_botswana_aan <- y_hat_botswana_aan + 1.96 * rmse_botswana_data_aan
print("Based on the reslts of the manual calculation of Botswana's export %GPP predictions using the ETS(A,A,N) model, we can make the following statements:")
## [1] "Based on the reslts of the manual calculation of Botswana's export %GPP predictions using the ETS(A,A,N) model, we can make the following statements:"
print(paste("Our best estimate for next year's exports:", round(y_hat_botswana_aan, 2), "% of GDP"))
## [1] "Our best estimate for next year's exports: 39.35 % of GDP"
print(paste("We're 95% confident exports will be above:", round(lower_manual_botswana_aan, 2), "% of GDP"))
## [1] "We're 95% confident exports will be above: 29.08 % of GDP"
print(paste("We're 95% confident exports will be below:", round(upper_manual_botswana_aan, 2), "% of GDP"))
## [1] "We're 95% confident exports will be below: 49.62 % of GDP"
print("============================================")
## [1] "============================================"
# R's built-in 95% prediction interval for ETS(A,A,N)
r_interval_botswana_aan <- fc_botswana_aan %>%
hilo(level = 95) %>%
unpack_hilo("95%")
lower_r_botswana_aan <- r_interval_botswana_aan$`95%_lower`[1]
upper_r_botswana_aan <- r_interval_botswana_aan$`95%_upper`[1]
print("Based on the results of R's built-in 95% prediction interval for ETS(A,A,N) model, we can make the following statements")
## [1] "Based on the results of R's built-in 95% prediction interval for ETS(A,A,N) model, we can make the following statements"
print(paste("R's best estimate for next year's exports:", round(y_hat_botswana_aan, 2), "% of GDP"))
## [1] "R's best estimate for next year's exports: 39.35 % of GDP"
print(paste("R says we're 95% confident exports will be above:", round(lower_r_botswana_aan, 2), "% of GDP"))
## [1] "R says we're 95% confident exports will be above: 28.71 % of GDP"
print(paste("R says we're 95% confident exports will be below:", round(upper_r_botswana_aan, 2), "% of GDP"))
## [1] "R says we're 95% confident exports will be below: 49.99 % of GDP"
# Comparison table for ETS(A,N,N)
print("=== ETS(A,N,N) - 95% Prediction Interval Comparison ===")
## [1] "=== ETS(A,N,N) - 95% Prediction Interval Comparison ==="
comparison_botswana_ann <- data.frame(
Method = c("Manual RMSE)", "R's hilo()"),
Lower_Bound = c(lower_manual_botswana_ann, lower_r_botswana_ann),
Point_Forecast = c(y_hat_botswana_ann, y_hat_botswana_ann),
Upper_Bound = c(upper_manual_botswana_ann, upper_r_botswana_ann),
Width = c(upper_manual_botswana_ann - lower_manual_botswana_ann,
upper_r_botswana_ann - lower_r_botswana_ann)
)
print(comparison_botswana_ann)
## Method Lower_Bound Point_Forecast Upper_Bound Width
## 1 Manual RMSE) 29.81649 39.8287 49.84090 20.02441
## 2 R's hilo() 29.63946 39.8287 50.01793 20.37848
print("")
## [1] ""
print(paste("RMSE for ETS(A,N,N):", round(rmse_botswana_data_ann, 4)))
## [1] "RMSE for ETS(A,N,N): 5.1083"
print(paste("Difference in Lower Bound:", round(lower_r_botswana_ann - lower_manual_botswana_ann, 4)))
## [1] "Difference in Lower Bound: -0.177"
print(paste("Difference in Upper Bound:", round(upper_r_botswana_ann - upper_manual_botswana_ann, 4)))
## [1] "Difference in Upper Bound: 0.177"
print("")
## [1] ""
print("=== ETS(A,A,N) - 95% Prediction Interval Comparison ===")
## [1] "=== ETS(A,A,N) - 95% Prediction Interval Comparison ==="
comparison_botswana_aan <- data.frame(
Method = c("Manual (y_hat +/- 1.96*RMSE)", "R's hilo()"),
Lower_Bound = c(lower_manual_botswana_aan, lower_r_botswana_aan),
Point_Forecast = c(y_hat_botswana_aan, y_hat_botswana_aan),
Upper_Bound = c(upper_manual_botswana_aan, upper_r_botswana_aan),
Width = c(upper_manual_botswana_aan - lower_manual_botswana_aan,
upper_r_botswana_aan - lower_r_botswana_aan)
)
print(comparison_botswana_aan)
## Method Lower_Bound Point_Forecast Upper_Bound Width
## 1 Manual (y_hat +/- 1.96*RMSE) 29.08397 39.34957 49.61518 20.53121
## 2 R's hilo() 28.71075 39.34957 49.98840 21.27765
print("")
## [1] ""
print(paste("RMSE for ETS(A,A,N):", round(rmse_botswana_data_aan, 4)))
## [1] "RMSE for ETS(A,A,N): 5.2376"
print(paste("Difference in Lower Bound:", round(lower_r_botswana_aan - lower_manual_botswana_aan, 4)))
## [1] "Difference in Lower Bound: -0.3732"
print(paste("Difference in Upper Bound:", round(upper_r_botswana_aan - upper_manual_botswana_aan, 4)))
## [1] "Difference in Upper Bound: 0.3732"
In Botswana’s case, a 20-21 percentage point range in the value of exports as a % of GDP is quite wide, indicating that while exports around 39% of GDP, there’s significant uncertainty and that needs to be prepared for.
A stakeholder such as a minister of Finance or economic planner could use the lower bound (28.7-29.1%) to stress-test whether budgets, cash reserves, and operations can withstand a weaker export environment, while the upper bound (49.6-49.9%) should inform capacity planning and investment decisions to ensure the organization can capture opportunities if exports surge beyond expectations.
# Load data and filter for China
data(global_economy)
china_gdp <- global_economy %>%
filter(Country == "China")
# Plot the GDP series
china_gdp %>%
autoplot(GDP) +
labs(
title = "China GDP (1960-2017)",
y = "GDP (billions USD)",
x = "Year"
) +
theme_minimal()
# Summary statistics
print("Summary of China GDP:")
## [1] "Summary of China GDP:"
print(summary(china_gdp$GDP))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.721e+10 1.455e+11 3.301e+11 1.970e+12 1.613e+12 1.224e+13
names(china_gdp)
## [1] "Country" "Code" "Year" "GDP" "Growth"
## [6] "CPI" "Imports" "Exports" "Population"
# Fit simple exponential smoothing model using ETS
fit_china_ann <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("N") + season("N")))
# View the model parameters (α and ℓ0)
report(fit_china_ann)
## Series: GDP
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 67537060018
##
## sigma^2: 1.79001e+23
##
## AIC AICc BIC
## 3344.888 3345.332 3351.069
# Generate forecasts for next 10 years
fc_china_ann <- fit_china_ann %>%
forecast(h = 18)
# View forecasts
fc_china_ann
# Plot the forecasts with 80% and 95% prediction intervals
fc_china_ann %>%
autoplot(china_gdp, level = c(80, 95)) +
scale_fill_manual(
values = c("orange", "darkblue"),
breaks = c("80%", "95%")
) +
labs(
title = "China GDP Forecast - ETS(A,N,N) Simple Exponential Smoothing",
y = "GDP (billions USD)",
x = "Year"
)
##### 8.6ii Linear Trend ETS(A,A,N)
# Fit linear trend model using ETS
fit_china_aan <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
# View the model parameters
report(fit_china_aan)
## Series: GDP
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998964
## beta = 0.5518569
##
## Initial states:
## l[0] b[0]
## 50284778074 3288256684
##
## sigma^2: 3.87701e+22
##
## AIC AICc BIC
## 3258.053 3259.207 3268.356
# Generate forecasts for next 10 years
fc_china_aan <- fit_china_aan %>%
forecast(h = 24)
# View forecasts
fc_china_aan
# Plot the forecasts with 80% and 95% prediction intervals
fc_china_aan %>%
autoplot(china_gdp, level = c(80, 95)) +
scale_fill_manual(
values = c("lightgreen", "darkgreen"),
breaks = c("80%", "95%")
) +
labs(
title = "China GDP Forecast - ETS(A,A,N) Linear Trend",
y = "GDP (billions USD)",
x = "Year"
)
# Fit damped trend model using ETS
fit_china_aadn <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))
# View the model parameters
report(fit_china_aadn)
## Series: GDP
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.9998747
## beta = 0.5634078
## phi = 0.9799999
##
## Initial states:
## l[0] b[0]
## 50284778075 3288256683
##
## sigma^2: 3.959258e+22
##
## AIC AICc BIC
## 3260.187 3261.834 3272.549
# Generate forecasts for next 10 years
fc_china_aadn <- fit_china_aadn %>%
forecast(h = 12)
# View forecasts
fc_china_aadn
# Plot the forecasts with 80% and 95% prediction intervals
fc_china_aadn %>%
autoplot(china_gdp, level = c(80, 95)) +
scale_fill_manual(
values = c("lightblue", "navy"),
breaks = c("80%", "95%")
) +
labs(
title = "China GDP Forecast - ETS(A,Ad,N) Damped Trend",
y = "GDP (billions USD)",
x = "Year"
)
##### 8.6iv Comparison of the three models As the graphs move from
simple exponential smoothing (ETS(A,N,N)) to linear trend (ETS(A,A,N))
to damped trend (ETS(A,Ad,N)), I observe that the height of the
exponential portion of the curve decreases.
# Load data
data(aus_production)
# Filter for Gas data (remove missing values)
gas_data <- aus_production %>%
filter(!is.na(Gas))
# View the data
head(gas_data)
tail(gas_data)
# Plot the Gas series
gas_data %>%
autoplot(Gas) +
labs(
title = "Australian Quarterly Gas Production",
y = "Gas Production",
x = "Quarter"
) +
theme_minimal()
# Summary statistics
print("Summary of Gas Production:")
## [1] "Summary of Gas Production:"
summary(gas_data$Gas)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 11.25 105.00 99.21 163.75 252.00
# Fit additive seasonality model
fit_gas_aaa <- gas_data %>%
model(ETS(Gas ~ error("A") + trend("A") + season("A")))
# View the model parameters
print("ETS(A,A,A) Model - Additive Seasonality:")
## [1] "ETS(A,A,A) Model - Additive Seasonality:"
report(fit_gas_aaa)
## Series: Gas
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.5209159
## beta = 0.0001000024
## gamma = 0.4180453
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 10.06526 0.9870514 -5.890283 15.61932 -2.811314 -6.917728
##
## sigma^2: 23.5601
##
## AIC AICc BIC
## 1872.452 1873.318 1902.913
# Generate forecasts for next 8 quarters (2 years)
fc_gas_aaa <- fit_gas_aaa %>%
forecast(h = 8)
# View forecasts
fc_gas_aaa
# Plot the forecasts
fc_gas_aaa %>%
autoplot(gas_data, level = c(80, 95)) +
scale_fill_manual(
values = c("lightblue", "darkblue"),
breaks = c("80%", "95%")
) +
labs(
title = "Gas Production Forecast - ETS(A,A,A) Additive Seasonality",
y = "Gas Production",
x = "Quarter"
) +
theme_minimal()
# Calculate accuracy
accuracy_gas_aaa <- accuracy(fit_gas_aaa)
print("ETS(A,A,A) Accuracy:")
## [1] "ETS(A,A,A) Accuracy:"
print(accuracy_gas_aaa)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"A… Trai… 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
# Fit multiplicative seasonality model
fit_gas_mam <- gas_data %>%
model(ETS(Gas ~ error("M") + trend("A") + season("M")))
# View the model parameters
print("ETS(M,A,M) Model - Multiplicative Seasonality:")
## [1] "ETS(M,A,M) Model - Multiplicative Seasonality:"
report(fit_gas_mam)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
# Generate forecasts for next 8 quarters
fc_gas_mam <- fit_gas_mam %>%
forecast(h = 8)
# View forecasts
fc_gas_mam
# Plot the forecasts
fc_gas_mam %>%
autoplot(gas_data, level = c(80, 95)) +
scale_fill_manual(
values = c("lightgreen", "darkgreen"),
breaks = c("80%", "95%")
) +
labs(
title = "Gas Production Forecast - ETS(M,A,M) Multiplicative Seasonality",
y = "Gas Production",
x = "Quarter"
) +
theme_minimal()
# Calculate accuracy
accuracy_gas_mam <- accuracy(fit_gas_mam)
print("ETS(M,A,M) Accuracy:")
## [1] "ETS(M,A,M) Accuracy:"
print(accuracy_gas_mam)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"M… Trai… -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
# Plot both forecasts together
gas_data %>%
model(
Additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))
) %>%
forecast(h = 8) %>%
autoplot(gas_data, level = NULL) +
labs(
title = "Additive vs Multiplicative Seasonality",
y = "Gas Production",
x = "Quarter"
) +
theme_minimal()
# Compare accuracy metrics
comparison_seasonality <- data.frame(
Model = c("Additive (A,A,A)", "Multiplicative (M,A,M)"),
RMSE = c(accuracy_gas_aaa$RMSE, accuracy_gas_mam$RMSE),
MAE = c(accuracy_gas_aaa$MAE, accuracy_gas_mam$MAE),
AIC = c(glance(fit_gas_aaa)$AIC, glance(fit_gas_mam)$AIC),
BIC = c(glance(fit_gas_aaa)$BIC, glance(fit_gas_mam)$BIC)
)
print("Additive vs Multiplicative Comparison:")
## [1] "Additive vs Multiplicative Comparison:"
print(comparison_seasonality)
## Model RMSE MAE AIC BIC
## 1 Additive (A,A,A) 4.763979 3.345044 1872.452 1902.913
## 2 Multiplicative (M,A,M) 4.595113 3.021727 1680.929 1711.389
# Fit multiplicative seasonality with damped trend
fit_gas_madm <- gas_data %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
# View the model parameters
print("ETS(M,Ad,M) Model - Damped Trend:")
## [1] "ETS(M,Ad,M) Model - Damped Trend:"
report(fit_gas_madm)
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
# Generate forecasts for next 8 quarters
fc_gas_madm <- fit_gas_madm %>%
forecast(h = 8)
# View forecasts
fc_gas_madm
# Plot the forecasts
fc_gas_madm %>%
autoplot(gas_data, level = c(80, 95)) +
scale_fill_manual(
values = c("pink", "purple"),
breaks = c("80%", "95%")
) +
labs(
title = "Gas Production Forecast - ETS(M,Ad,M) Damped Trend",
y = "Gas Production",
x = "Quarter"
) +
theme_minimal()
# Calculate accuracy
accuracy_gas_madm <- accuracy(fit_gas_madm)
print("ETS(M,Ad,M) Accuracy:")
## [1] "ETS(M,Ad,M) Accuracy:"
print(accuracy_gas_madm)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\… Trai… -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
Based on the outputs fro the code the damped trend does not improve statistical fit since there is a higher AIC and BIC value. and lesser values would have indicated a better fit. Damped may still be preferred for conservative long-term forecasting to avoid unrealistic exponential growth projections, despite slightly worse statistical fit. Multiplicative gives the best AIC value indicating thta it is the best predictive model.
Multiplicative seasonality is necessary for this series since there is a clear seasonal pattern that increases in magnitude as the overall level of production increases.
# Loading the data
set.seed(12345678)
# Load data and select a series
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# View the selected series
print(paste("Selected Series ID:", unique(myseries$`Series ID`)))
## [1] "Selected Series ID: A3349767W"
head(myseries)
# Plot the series
myseries %>%
autoplot(Turnover) +
labs(
title = "Australian Retail Turnover - Monthly Data",
y = "Turnover ($ Million)",
x = "Month"
) +
theme_minimal()
##### 8.8i Explore the retail data
# Seasonal plot
myseries %>%
gg_season(Turnover) +
labs(
title = "Seasonal Pattern by Year",
y = "Turnover"
) +
theme_minimal()
# Subseries plot
myseries %>%
gg_subseries(Turnover) +
labs(
title = "Seasonal Subseries Plot",
y = "Turnover"
) +
theme_minimal()
# Lag plot
myseries %>%
gg_lag(Turnover, geom = "point") +
labs(title = "Lag Plots") +
theme_minimal()
# ACF plot
myseries %>%
ACF(Turnover) %>%
autoplot() +
labs(title = "Autocorrelation Function") +
theme_minimal()
##### 8.8ii Why is multiplicative seasonality necessary for this
series?
# Split data into training and test sets
myseries_train <- myseries %>%
filter(year(Month) <= 2010)
myseries_test <- myseries %>%
filter(year(Month) > 2010)
# Fit both additive and multiplicative models
fit_comparison <- myseries_train %>%
model(
Additive = ETS(Turnover ~ error("A") + trend("A") + season("A")),
Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
)
# Compare models
comparison_add_mult <- glance(fit_comparison) %>%
select(.model, AIC, AICc, BIC)
print("Additive vs Multiplicative Seasonality:")
## [1] "Additive vs Multiplicative Seasonality:"
print(comparison_add_mult)
## # A tibble: 2 × 4
## .model AIC AICc BIC
## <chr> <dbl> <dbl> <dbl>
## 1 Additive 1288. 1291. 1350.
## 2 Multiplicative 1164. 1166. 1225.
# Accuracy comparison
accuracy_comparison <- accuracy(fit_comparison)
print("Accuracy Metrics:")
## [1] "Accuracy Metrics:"
print(accuracy_comparison)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… Addit… Trai… 0.00335 0.602 0.451 -0.487 6.37 0.493 0.496
## 2 Northern … Clothin… Multi… Trai… -0.0119 0.518 0.384 -0.446 5.21 0.420 0.427
## # ℹ 1 more variable: ACF1 <dbl>
# Visualize the difference
myseries_train %>%
model(
Additive = ETS(Turnover ~ error("A") + trend("A") + season("A")),
Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
) %>%
forecast(h = 24) %>%
autoplot(myseries_train, level = NULL) +
labs(
title = "Additive vs Multiplicative Seasonality",
y = "Turnover",
x = "Month"
) +
theme_minimal()
- Seasonal variation increases proportionally with the level of turnover
- Early years show small seasonal swings, later years show much larger
swings - AIC/BIC values are significantly lower for multiplicative
(better fit)
# Fit Holt-Winters multiplicative (linear trend)
fit_hw_linear <- myseries_train %>%
model(HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")))
# View model
print("Holt-Winters Multiplicative (Linear Trend):")
## [1] "Holt-Winters Multiplicative (Linear Trend):"
report(fit_hw_linear)
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.5406521
## beta = 0.0001338935
## gamma = 0.0001008974
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 2.659274 0.04289703 0.8431579 0.7716643 0.8407577 1.342791 1.016251 1.04332
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.043807 1.106141 1.129276 1.024721 0.9733536 0.864759
##
## sigma^2: 0.0047
##
## AIC AICc BIC
## 1163.602 1166.002 1224.963
# Forecast
fc_hw_linear <- fit_hw_linear %>%
forecast(h = 24)
# Plot
fc_hw_linear %>%
autoplot(myseries_train, level = c(80, 95)) +
scale_fill_manual(values = c("lightblue", "darkblue")) +
labs(
title = "Holt-Winters Multiplicative - Linear Trend",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Fit Holt-Winters multiplicative with damped trend
fit_hw_damped <- myseries_train %>%
model(HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
# View model
print("Holt-Winters Multiplicative (Damped Trend):")
## [1] "Holt-Winters Multiplicative (Damped Trend):"
report(fit_hw_damped)
## Series: Turnover
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.4429807
## beta = 0.005351177
## gamma = 0.0001057534
## phi = 0.9799998
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 2.402317 0.07313785 0.8381105 0.7537881 0.8350622 1.331381 1.011018 1.047677
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.055305 1.123139 1.149219 1.020584 0.9755371 0.8591794
##
## sigma^2: 0.005
##
## AIC AICc BIC
## 1177.433 1180.126 1242.404
# Forecast
fc_hw_damped <- fit_hw_damped %>%
forecast(h = 24)
# Plot
fc_hw_damped %>%
autoplot(myseries_train, level = c(80, 95)) +
scale_fill_manual(values = c("lightgreen", "darkgreen")) +
labs(
title = "Holt-Winters Multiplicative - Damped Trend",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Compare both visually
myseries_train %>%
model(
Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
) %>%
forecast(h = 24) %>%
autoplot(myseries_train, level = NULL) +
labs(
title = "Linear vs Damped Trend",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Fit both models
fit_both <- myseries_train %>%
model(
Linear_Trend = ETS(Turnover ~ error("M") + trend("A") + season("M")),
Damped_Trend = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# Get accuracy metrics (includes one-step forecast RMSE)
accuracy_both <- accuracy(fit_both)
print("One-Step Forecast Accuracy Comparison:")
## [1] "One-Step Forecast Accuracy Comparison:"
print(accuracy_both %>% select(.model, RMSE, MAE, MAPE))
## # A tibble: 2 × 4
## .model RMSE MAE MAPE
## <chr> <dbl> <dbl> <dbl>
## 1 Linear_Trend 0.518 0.384 5.21
## 2 Damped_Trend 0.519 0.392 5.34
# Compare AIC/BIC
comparison_models <- glance(fit_both) %>%
select(.model, AIC, AICc, BIC)
print("Information Criteria Comparison:")
## [1] "Information Criteria Comparison:"
print(comparison_models)
## # A tibble: 2 × 4
## .model AIC AICc BIC
## <chr> <dbl> <dbl> <dbl>
## 1 Linear_Trend 1164. 1166. 1225.
## 2 Damped_Trend 1177. 1180. 1242.
# Determine preferred model
if (accuracy_both$RMSE[1] < accuracy_both$RMSE[2]) {
preferred <- "Linear Trend (M,A,M)"
} else {
preferred <- "Damped Trend (M,Ad,M)"
}
print(paste("PREFERRED MODEL:", preferred))
## [1] "PREFERRED MODEL: Linear Trend (M,A,M)"
print("Based on lowest RMSE and information criteria")
## [1] "Based on lowest RMSE and information criteria"
# Use the better model from Question 3
# Assuming linear trend is better (adjust if needed)
best_fit <- myseries_train %>%
model(Best = ETS(Turnover ~ error("M") + trend("A") + season("M")))
# Check residuals
augment(best_fit) %>%
autoplot(.innov) +
labs(
title = "Residual Plot",
y = "Residuals",
x = "Month"
) +
theme_minimal()
# ACF of residuals
augment(best_fit) %>%
ACF(.innov) %>%
autoplot() +
labs(title = "ACF of Residuals") +
theme_minimal()
# Ljung-Box test
ljung_box_test <- augment(best_fit) %>%
features(.innov, ljung_box, lag = 24)
print("Ljung-Box Test for White Noise:")
## [1] "Ljung-Box Test for White Noise:"
print(ljung_box_test)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… Best 21.3 0.619
if (ljung_box_test$lb_pvalue > 0.05) {
print("RESULT: Residuals appear to be white noise (p > 0.05)")
} else {
print("WARNING: Residuals may not be white noise (p < 0.05)")
}
## [1] "RESULT: Residuals appear to be white noise (p > 0.05)"
# Histogram of residuals
augment(best_fit) %>%
ggplot(aes(x = .innov)) +
geom_histogram(bins = 30, fill = "steelblue", color = "black") +
labs(
title = "Distribution of Residuals",
x = "Residuals",
y = "Frequency"
) +
theme_minimal()
##### 8.8vi Seasonal Naive Forecast
# Fit models on training data (up to end of 2010)
fit_all_methods <- myseries_train %>%
model(
SNAIVE = SNAIVE(Turnover),
HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# Forecast on test set
fc_all <- fit_all_methods %>%
forecast(h = nrow(myseries_test))
# Calculate test set accuracy
test_accuracy <- accuracy(fc_all, myseries_test)
print("Test Set RMSE Comparison:")
## [1] "Test Set RMSE Comparison:"
print(test_accuracy %>%
select(.model, .type, RMSE, MAE, MAPE) %>%
arrange(RMSE))
## # A tibble: 3 × 5
## .model .type RMSE MAE MAPE
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 HW_Damped Test 1.15 0.878 6.45
## 2 SNAIVE Test 1.55 1.24 9.06
## 3 HW_Linear Test 1.78 1.60 12.6
# Visualize all forecasts against actual test data
fc_all %>%
autoplot(myseries, level = NULL) +
autolayer(myseries_test, Turnover, color = "black", size = 1) +
labs(
title = "Forecast Comparison: SNAIVE vs Holt-Winters Methods",
subtitle = "Black line shows actual test set values",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Determine if we beat SNAIVE
snaive_rmse <- test_accuracy %>%
filter(.model == "SNAIVE") %>%
pull(RMSE)
hw_linear_rmse <- test_accuracy %>%
filter(.model == "HW_Linear") %>%
pull(RMSE)
hw_damped_rmse <- test_accuracy %>%
filter(.model == "HW_Damped") %>%
pull(RMSE)
print("")
## [1] ""
print("=== RESULTS ===")
## [1] "=== RESULTS ==="
print(paste("SNAIVE RMSE:", round(snaive_rmse, 2)))
## [1] "SNAIVE RMSE: 1.55"
print(paste("HW Linear RMSE:", round(hw_linear_rmse, 2)))
## [1] "HW Linear RMSE: 1.78"
print(paste("HW Damped RMSE:", round(hw_damped_rmse, 2)))
## [1] "HW Damped RMSE: 1.15"
if (hw_linear_rmse < snaive_rmse) {
print("Holt-Winters Linear Trend beats SNAIVE")
} else {
print("Holt-Winters Linear Trend does NOT beat SNAIVE")
}
## [1] "Holt-Winters Linear Trend does NOT beat SNAIVE"
# Using the same retail data from previous analysis
# Make sure myseries_train and myseries_test are already loaded
# Step 1: Determine optimal Box-Cox lambda
lambda <- myseries_train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
print(paste("Optimal Box-Cox lambda:", round(lambda, 4)))
## [1] "Optimal Box-Cox lambda: 0.1971"
# Step 2: Apply STL decomposition to Box-Cox transformed series
fit_stl_ets <- myseries_train %>%
model(
STL_ETS = decomposition_model(
STL(box_cox(Turnover, lambda) ~ season(window = "periodic")),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
)
# View the model
print("STL + ETS Model:")
## [1] "STL + ETS Model:"
report(fit_stl_ets)
## Series: Turnover
## Model: STL decomposition model
## Transformation: box_cox(Turnover, lambda)
## Combination: season_adjust + season_year
##
## ========================================
##
## Series: season_adjust
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.5691359
## beta = 0.0001000037
##
## Initial states:
## l[0] b[0]
## 1.087742 0.007973452
##
## sigma^2: 0.0097
##
## AIC AICc BIC
## 271.8798 272.1045 289.9272
##
## Series: season_year
## Model: SNAIVE
##
## sigma^2: 0
# Step 4: Generate forecasts
fc_stl_ets <- fit_stl_ets %>%
forecast(h = nrow(myseries_test))
# Plot forecasts
fc_stl_ets %>%
autoplot(myseries_train, level = c(80, 95)) +
autolayer(myseries_test, Turnover, color = "black") +
scale_fill_manual(values = c("lightcoral", "darkred")) +
labs(
title = "STL + ETS Forecast (Box-Cox Transformed)",
subtitle = "Black line shows actual test set values",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Step 5: Compare with previous best methods
fit_all_methods <- myseries_train %>%
model(
SNAIVE = SNAIVE(Turnover),
HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")),
STL_ETS = decomposition_model(
STL(box_cox(Turnover, lambda) ~ season(window = "periodic")),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
)
# Forecast all methods
fc_all_methods <- fit_all_methods %>%
forecast(h = nrow(myseries_test))
# Calculate test set accuracy
test_accuracy_all <- accuracy(fc_all_methods, myseries_test)
print("=== TEST SET ACCURACY COMPARISON ===")
## [1] "=== TEST SET ACCURACY COMPARISON ==="
print(test_accuracy_all %>%
select(.model, .type, RMSE, MAE, MAPE) %>%
arrange(RMSE))
## # A tibble: 4 × 5
## .model .type RMSE MAE MAPE
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 HW_Damped Test 1.15 0.878 6.45
## 2 SNAIVE Test 1.55 1.24 9.06
## 3 HW_Linear Test 1.78 1.60 12.6
## 4 STL_ETS Test 4.02 3.62 27.9
# Visual comparison
fc_all_methods %>%
autoplot(myseries, level = NULL) +
autolayer(myseries_test, Turnover, color = "black", size = 1.2) +
labs(
title = "All Methods Comparison on Test Set",
subtitle = "Black line = Actual values",
y = "Turnover",
x = "Month"
) +
theme_minimal()
# Create summary table
summary_table <- test_accuracy_all %>%
filter(.type == "Test") %>%
select(.model, RMSE, MAE, MAPE) %>%
arrange(RMSE) %>%
mutate(
Rank = row_number(),
RMSE_Improvement = round(100 * (RMSE[.model == "SNAIVE"] - RMSE) / RMSE[.model == "SNAIVE"], 2)
)
print("=== RANKED PERFORMANCE (Best to Worst) ===")
## [1] "=== RANKED PERFORMANCE (Best to Worst) ==="
print(summary_table)
## # A tibble: 4 × 6
## .model RMSE MAE MAPE Rank RMSE_Improvement
## <chr> <dbl> <dbl> <dbl> <int> <dbl>
## 1 HW_Damped 1.15 0.878 6.45 1 25.8
## 2 SNAIVE 1.55 1.24 9.06 2 0
## 3 HW_Linear 1.78 1.60 12.6 3 -14.9
## 4 STL_ETS 4.02 3.62 27.9 4 -159.
# Determine best method
best_method <- summary_table %>% slice(1) %>% pull(.model)
best_rmse <- summary_table %>% slice(1) %>% pull(RMSE)
snaive_rmse <- summary_table %>% filter(.model == "SNAIVE") %>% pull(RMSE)
print("")
## [1] ""
print("=== RESULTS ===")
## [1] "=== RESULTS ==="
print(paste("Best Method:", best_method))
## [1] "Best Method: HW_Damped"
print(paste("Best RMSE:", round(best_rmse, 2)))
## [1] "Best RMSE: 1.15"
print(paste("SNAIVE RMSE:", round(snaive_rmse, 2)))
## [1] "SNAIVE RMSE: 1.55"
print(paste("Improvement over SNAIVE:",
round(100 * (snaive_rmse - best_rmse) / snaive_rmse, 2), "%"))
## [1] "Improvement over SNAIVE: 25.82 %"
# Check if STL+ETS is best
if (best_method == "STL_ETS") {
print("")
print("STL + ETS is the BEST METHOD!")
print("Box-Cox transformation + STL decomposition improves forecast accuracy")
} else {
stl_rank <- summary_table %>% filter(.model == "STL_ETS") %>% pull(Rank)
print("")
print(paste("STL + ETS ranked #", stl_rank, "out of", nrow(summary_table)))
print("Previous Holt-Winters methods perform better on this data")
}
## [1] ""
## [1] "STL + ETS ranked # 4 out of 4"
## [1] "Previous Holt-Winters methods perform better on this data"
# Check residuals of STL+ETS
print("")
## [1] ""
print("=== RESIDUAL DIAGNOSTICS FOR STL+ETS ===")
## [1] "=== RESIDUAL DIAGNOSTICS FOR STL+ETS ==="
augment(fit_stl_ets) %>%
autoplot(.innov) +
labs(
title = "STL+ETS Residuals Over Time",
y = "Residuals"
) +
theme_minimal()
augment(fit_stl_ets) %>%
ACF(.innov) %>%
autoplot() +
labs(title = "ACF of STL+ETS Residuals") +
theme_minimal()
ljung_box_stl <- augment(fit_stl_ets) %>%
features(.innov, ljung_box, lag = 24)
print("Ljung-Box Test:")
## [1] "Ljung-Box Test:"
print(ljung_box_stl)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… STL_E… 28.9 0.224
if (ljung_box_stl$lb_pvalue > 0.05) {
print("Residuals appear to be white noise (p > 0.05)")
} else {
print("WARNING: Residuals may show autocorrelation (p < 0.05)")
}
## [1] "Residuals appear to be white noise (p > 0.05)"
# Training set accuracy comparison
train_accuracy <- accuracy(fit_all_methods)
print("")
## [1] ""
print("=== TRAINING SET ACCURACY (for reference) ===")
## [1] "=== TRAINING SET ACCURACY (for reference) ==="
print(train_accuracy %>%
select(.model, .type, RMSE, MAE, MAPE) %>%
arrange(RMSE))
## # A tibble: 4 × 5
## .model .type RMSE MAE MAPE
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 HW_Linear Training 0.518 0.384 5.21
## 2 HW_Damped Training 0.519 0.392 5.34
## 3 STL_ETS Training 0.536 0.392 5.08
## 4 SNAIVE Training 1.21 0.915 12.4