DATA 621 Exponential Smoothing

knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
message = FALSE
)
library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──

## ✔ tibble      3.3.0     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     4.0.0

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(tsibble)
library(dplyr)
library(fable)
library(ggplot2)
library(feasts)

# Load the data
data(aus_livestock)
head(aus_livestock)

8.1a. Use the ETS Function to estimate the equivalent model for a simple exponential smoothing

For this question a simple exponential component model is required. The simple exponential smoothing model by definition has: - an additive error component - no trend component - no seasonal component

This translates to error(“A”) + trend(“N”) + season(“N”).

Filter the data for the pigs slaughtered.

# Filter for pigs in Victoria
pigs_vic <- aus_livestock %>%
  filter(Animal == "Pigs", State == "Victoria")

# Fit simple exponential smoothing model using ETS
fit <- pigs_vic %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

# View the model parameters 
report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

# Generate forecasts for next 4 months
fc <- fit %>%
  forecast(h = 4)

# View forecasts
fc

# Plot the forecasts with 80% and 95% prediction intervals

fc %>%
  autoplot(pigs_vic, level = c(80, 95)) +
  scale_fill_manual(
    values = c("orange", "darkblue"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "Pigs Slaughtered in Victoria - 4 Month Forecast",
    y = "Count",
    x = "Month"
  )

8.1b. A 95% prediction interval for the first forecast step by step

# Extract the point forecast for the first forecast 
y_hat <- fc$.mean[1]

# Get residuals from the fitted model
residuals <- augment(fit) %>%
  pull(.resid)

# Calculate standard deviation of residuals (s)
s <- sd(residuals, na.rm = TRUE)

# Compute manual 95% prediction interval
lower_manual <- y_hat - 1.96 * s  # YOUR LOWER BOUND
upper_manual <- y_hat + 1.96 * s  # YOUR UPPER BOUND

8.1b A 95% prediction interval using R formulas hilo() and unpack_hilo() in 1 step

# Extract R's 95% prediction interval for the first forecast
r_interval <- fc %>%
  hilo(level = 95) %>%
  unpack_hilo("95%")

lower_r <- r_interval$`95%_lower`[1]  # R'S LOWER BOUND
upper_r <- r_interval$`95%_upper`[1]  # R'S UPPER BOUND

8.1b Compare the step by step calculalated prediction interval to Rformulas predited value

# Create comparison table

comparison <- data.frame(
  Calculation = c("Manual (y + or - 1.96s)", "R's hilo()"),
  Lower_Bound = c(lower_manual, lower_r),
  Upper_Bound = c(upper_manual, upper_r),
  Width = c(upper_manual - lower_manual, upper_r - lower_r)
)

print(comparison)

##               Calculation Lower_Bound Upper_Bound    Width
## 1 Manual (y + or - 1.96s)    76871.01    113502.1 36631.09
## 2              R's hilo()    76854.79    113518.3 36663.54

print("Key Values:")

## [1] "Key Values:"

print(paste("Point Forecast (y):", y_hat))

## [1] "Point Forecast (y): 95186.5574309915"

print(paste("Standard Deviation (s):", s))

## [1] "Standard Deviation (s): 9344.66579258967"

8.5.a Analysis of the annual exports (% of GDP) of Botswana

Botswana’s exports, measured as a percentage of the country’s total economic output (GDP) and illustrated in the graph below, have grown significantly over the past six decades. The overall trend shows exports increasing from lower levels in the 1960s to higher levels in recent years, though this growth hasn’t been smooth or steady.

The most dramatic expansion occurred in the early 2000s, when exports surged rapidly before experiencing some pullbacks and subsequent recoveries. These ups and downs reflect Botswana’s economy responding to global market conditions, commodity price changes, and international demand for their key exports like diamonds and minerals.

Unlike retail sales that peak during holiday seasons, Botswana’s export patterns don’t follow predictable annual cycles. Instead, the year-to-year variations we observe are likely driven by broader economic forces—such as global recessions, shifts in commodity prices, or changes in trade relationships—making exports somewhat unpredictable in the short term but generally positive over the long haul.

# See all countries
#unique(global_economy$Country)

# Load data 
data(global_economy)

# Select one country - you can change this to any country
country_data <- global_economy %>%
  filter(Country == "Botswana")

# Plot the exports series
country_data %>%
  autoplot(Exports) +
  labs(
    title = "Annual Exports - Botswana",
    y = "Exports (% of GDP)",
    x = "Year"
  ) +
  theme_minimal()

# Summary statistics
print("Summary Statistics:")

## [1] "Summary Statistics:"

summary(country_data$Exports)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   22.77   39.08   49.73   47.17   54.05   75.13

# Check for trends and patterns
print("Data Range:")

## [1] "Data Range:"

print(paste("Years:", min(country_data$Year), "to", max(country_data$Year)))

## [1] "Years: 1960 to 2017"

print(paste("Number of observations:", nrow(country_data)))

## [1] "Number of observations: 58"

8.5.b Using the ETS() model to forecat the series and plot forecasets of Botswana’s annual exports.

# Fit ETS(A,N,N) model - Simple Exponential Smoothing
fit_botswana_ann <- country_data %>%
  model(ETS_ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))

# Generate forecasts for 10 years
fc_botswana_ann <- fit_botswana_ann %>%
  forecast(h = 10)

# Plot forecasts
fc_botswana_ann %>%
  autoplot(country_data, level = c(80, 95)) +
  scale_fill_manual(values = c("lightblue", "darkblue")) +
  labs(
    title = "Simple Exponential Smoothing - Botswana's Exports",
    y = "Exports (% of GDP)",
    x = "Year"
  ) +
  theme_minimal()

# Show model details
print("Exponential Smoothing Analysis:")

## [1] "Exponential Smoothing Analysis:"

report(fit_botswana_ann)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
## 
##   Initial states:
##      l[0]
##  26.26254
## 
##   sigma^2:  27.0263
## 
##      AIC     AICc      BIC 
## 430.6855 431.1299 436.8668

8.5.c The Root Mean Squared Error for the training data

# Calculate RMSE for ETS(A,N,N) on training data
accuracy_botswana_ann<- accuracy(fit_botswana_ann)

print("ETS(A,N,N) Accuracy Metrics:")

## [1] "ETS(A,N,N) Accuracy Metrics:"

print(accuracy_botswana_ann)

## # A tibble: 1 × 11
##   Country  .model  .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>    <chr>   <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Botswana ETS_ANN Training 0.234  5.11  3.93 0.118  8.40 0.983 0.991 0.0871

# Extract RMSE specifically
rmse_botswana_data_ann <- accuracy_botswana_ann$RMSE
print(paste("Root Mean Squared Error for ETS(A,N,N):", round(rmse_botswana_data_ann, 4)))

## [1] "Root Mean Squared Error for ETS(A,N,N): 5.1083"

8.5.d Comparison of the ETS(A,N,N) model to the ETS(A,A,N) model

ETS(A,N,N) assumes our data fluctuates randomly around a relatively stable average with no consistent upward or downward direction.

ETS(A,A,N) recognizes that our data may be steadily rising or falling over time and adjusts for that trend.

Comparing these two models helps us determine whether there’s a meaningful trend in our data. If the trend model performs better, it tells us we should expect values to continue moving in that direction — which is critical information for planning and decision-making.

# Fit ETS(A,A,N) model - Holt's Linear Trend
fit_botswana_aan <- country_data %>%
  model(ETS_AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))

# Generate forecasts
fc_botswana_aan <- fit_botswana_aan %>%
  forecast(h = 10)

# Plot ETS(A,A,N) forecasts
fc_botswana_aan %>%
  autoplot(country_data, level = c(80, 95)) +
  scale_fill_manual(values = c("lightgreen", "darkgreen")) +
  labs(
    title = "ETS(A,A,N) Forecast - Canada Exports",
    y = "Exports (% of GDP)",
    x = "Year"
  ) +
  theme_minimal()

# Show model details
print("ETS(A,A,N) Model:")

## [1] "ETS(A,A,N) Model:"

report(fit_botswana_aan)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.05509657 
## 
##   Initial states:
##      l[0]     b[0]
##  26.74895 2.396638
## 
##   sigma^2:  29.464
## 
##      AIC     AICc      BIC 
## 437.5848 438.7387 447.8870

# Calculate RMSE for ETS(A,A,N)
accuracy_botswana_aan <- accuracy(fit_botswana_aan)
rmse_botswana_data_aan <- accuracy_botswana_aan$RMSE

print("ETS(A,A,N) Accuracy Metrics:")

## [1] "ETS(A,A,N) Accuracy Metrics:"

print(accuracy_botswana_aan)

## # A tibble: 1 × 11
##   Country  .model  .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>    <chr>   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Botswana ETS_AAN Training -0.900  5.24  4.02 -2.68  8.86  1.01  1.02 0.0652

print(paste("RMSE for ETS(A,A,N):", round(rmse_botswana_data_aan, 4)))

## [1] "RMSE for ETS(A,A,N): 5.2376"

# Comparison table
comparison_table <- data.frame(
  Model = c("ETS(A,N,N)", "ETS(A,A,N)"),
  Parameters = c("2 (level + alpha)", "4 (level + trend + alpha + beta)"),
  RMSE = c(rmse_botswana_data_ann, rmse_botswana_data_aan),
  AIC = c(glance(fit_botswana_ann)$AIC, glance(fit_botswana_aan)$AIC),
  BIC = c(glance(fit_botswana_ann)$BIC, glance(fit_botswana_aan)$BIC)
)

print("Model Comparison:")

## [1] "Model Comparison:"

print(comparison_table)

##        Model                       Parameters     RMSE      AIC      BIC
## 1 ETS(A,N,N)                2 (level + alpha) 5.108268 430.6855 436.8668
## 2 ETS(A,A,N) 4 (level + trend + alpha + beta) 5.237553 437.5848 447.8870

8.5.e Compare forecasts from both methods

The graph below compares two forecasting approaches for Botswana’s exports. The blue line shows forecasts assuming exports will stay relatively flat, while the red line accounts for any upward or downward trends in the data.

By visualizing both models together, we can see which approach better captures the direction our exports are heading. This comparison helps us understand whether we should expect stability or continued growth/decline, which is essential for strategic planning.

# Plot both forecasts together
country_data %>%
  autoplot(Exports) +
  autolayer(fc_botswana_ann, level = NULL, color = "blue", size = 1) +
  autolayer(fc_botswana_aan, level = NULL, color = "red", size = 1) +
  labs(
    title = "Forecast Comparison: ETS(A,N,N) vs ETS(A,A,N)",
    y = "Exports (% of GDP)",
    x = "Year"
  ) +
  theme_minimal() +
  scale_color_manual(
    name = "Model",
    values = c("blue", "red"),
    labels = c("ETS(A,N,N)", "ETS(A,A,N)")
  )

# Compare first 5 forecast values
forecast_comparison <- data.frame(
  Year = fc_botswana_ann$Year[1:5],
  ETS_ANN = fc_botswana_ann$.mean[1:5],
  ETS_AAN = fc_botswana_aan$.mean[1:5],
  Difference = fc_botswana_aan$.mean[1:5] - fc_botswana_ann$.mean[1:5]
)
print("Forecast Value Comparison (First 5 Years):")

## [1] "Forecast Value Comparison (First 5 Years):"

print(forecast_comparison)

##   Year ETS_ANN  ETS_AAN Difference
## 1 2018 39.8287 39.34957 -0.4791221
## 2 2019 39.8287 38.87044 -0.9582512
## 3 2020 39.8287 38.39132 -1.4373803
## 4 2021 39.8287 37.91219 -1.9165094
## 5 2022 39.8287 37.43306 -2.3956385

8.5.f Calculate the 95% prediction interval for the first forecast step by step

Manual calculation using RMSE for ETS(A,N,N)

# ============================================================
# Manual calculation using RMSE for ETS(A,N,N)
# ============================================================
y_hat_botswana_ann <- fc_botswana_ann$.mean[1]
lower_manual_botswana_ann <- y_hat_botswana_ann - 1.96 * rmse_botswana_data_ann
upper_manual_botswana_ann <- y_hat_botswana_ann + 1.96 * rmse_botswana_data_ann

print(" Step by step calculation of Botswana's export %GPP predictions - ETS(A,N,N)")

## [1] " Step by step calculation of Botswana's export %GPP predictions - ETS(A,N,N)"

print(paste("Our best estimate for next year's exports:", round(y_hat_botswana_ann, 2), "% of GDP"))

## [1] "Our best estimate for next year's exports: 39.83 % of GDP"

print(paste("We're 95% confident exports will be above:", round(lower_manual_botswana_ann, 2), "% of GDP"))

## [1] "We're 95% confident exports will be above: 29.82 % of GDP"

print(paste("We're 95% confident exports will be below:", round(upper_manual_botswana_ann, 2), "% of GDP"))

## [1] "We're 95% confident exports will be below: 49.84 % of GDP"

# ============================================================
# R's advanced calculation for ETS(A,N,N)
# ============================================================
r_interval_botswana_ann <- fc_botswana_ann %>%
  hilo(level = 95) %>%
  unpack_hilo("95%")

lower_r_botswana_ann <- r_interval_botswana_ann$`95%_lower`[1]
upper_r_botswana_ann <- r_interval_botswana_ann$`95%_upper`[1]


print("R'S Advanced Calculation of Botswana's export %GPP predictions  - ETS(A,N,N)")

## [1] "R'S Advanced Calculation of Botswana's export %GPP predictions  - ETS(A,N,N)"

print(paste("R's best estimate for next year's exports:", round(y_hat_botswana_ann, 2), "% of GDP"))

## [1] "R's best estimate for next year's exports: 39.83 % of GDP"

print(paste("R says we're 95% confident exports will be above:", round(lower_r_botswana_ann, 2), "% of GDP"))

## [1] "R says we're 95% confident exports will be above: 29.64 % of GDP"

print(paste("R says we're 95% confident exports will be below:", round(upper_r_botswana_ann, 2), "% of GDP"))

## [1] "R says we're 95% confident exports will be below: 50.02 % of GDP"

Calculation based on the ETS(A,A,N) Model

# Manual 95% Prediction Interval for ETS(A,A,N) Model
y_hat_botswana_aan <- fc_botswana_aan$.mean[1]
lower_manual_botswana_aan <- y_hat_botswana_aan - 1.96 * rmse_botswana_data_aan
upper_manual_botswana_aan <- y_hat_botswana_aan + 1.96 * rmse_botswana_data_aan

print("Based on the reslts of the manual calculation of Botswana's export %GPP predictions using the ETS(A,A,N) model, we can make the following statements:")

## [1] "Based on the reslts of the manual calculation of Botswana's export %GPP predictions using the ETS(A,A,N) model, we can make the following statements:"

print(paste("Our best estimate for next year's exports:", round(y_hat_botswana_aan, 2), "% of GDP"))

## [1] "Our best estimate for next year's exports: 39.35 % of GDP"

print(paste("We're 95% confident exports will be above:", round(lower_manual_botswana_aan, 2), "% of GDP"))

## [1] "We're 95% confident exports will be above: 29.08 % of GDP"

print(paste("We're 95% confident exports will be below:", round(upper_manual_botswana_aan, 2), "% of GDP"))

## [1] "We're 95% confident exports will be below: 49.62 % of GDP"

print("============================================")

## [1] "============================================"

# R's built-in 95% prediction interval for ETS(A,A,N)
r_interval_botswana_aan <- fc_botswana_aan %>%
  hilo(level = 95) %>%
  unpack_hilo("95%")

lower_r_botswana_aan <- r_interval_botswana_aan$`95%_lower`[1]
upper_r_botswana_aan <- r_interval_botswana_aan$`95%_upper`[1]

print("Based on the results of R's built-in 95% prediction interval for ETS(A,A,N) model, we can make the following statements")

## [1] "Based on the results of R's built-in 95% prediction interval for ETS(A,A,N) model, we can make the following statements"

print(paste("R's best estimate for next year's exports:", round(y_hat_botswana_aan, 2), "% of GDP"))

## [1] "R's best estimate for next year's exports: 39.35 % of GDP"

print(paste("R says we're 95% confident exports will be above:", round(lower_r_botswana_aan, 2), "% of GDP"))

## [1] "R says we're 95% confident exports will be above: 28.71 % of GDP"

print(paste("R says we're 95% confident exports will be below:", round(upper_r_botswana_aan, 2), "% of GDP"))

## [1] "R says we're 95% confident exports will be below: 49.99 % of GDP"

Comparison tables

# Comparison table for ETS(A,N,N)
print("=== ETS(A,N,N) - 95% Prediction Interval Comparison ===")

## [1] "=== ETS(A,N,N) - 95% Prediction Interval Comparison ==="

comparison_botswana_ann <- data.frame(
  Method = c("Manual RMSE)", "R's hilo()"),
  Lower_Bound = c(lower_manual_botswana_ann, lower_r_botswana_ann),
  Point_Forecast = c(y_hat_botswana_ann, y_hat_botswana_ann),
  Upper_Bound = c(upper_manual_botswana_ann, upper_r_botswana_ann),
  Width = c(upper_manual_botswana_ann - lower_manual_botswana_ann, 
            upper_r_botswana_ann - lower_r_botswana_ann)
)
print(comparison_botswana_ann)

##         Method Lower_Bound Point_Forecast Upper_Bound    Width
## 1 Manual RMSE)    29.81649        39.8287    49.84090 20.02441
## 2   R's hilo()    29.63946        39.8287    50.01793 20.37848

print("")

## [1] ""

print(paste("RMSE for ETS(A,N,N):", round(rmse_botswana_data_ann, 4)))

## [1] "RMSE for ETS(A,N,N): 5.1083"

print(paste("Difference in Lower Bound:", round(lower_r_botswana_ann - lower_manual_botswana_ann, 4)))

## [1] "Difference in Lower Bound: -0.177"

print(paste("Difference in Upper Bound:", round(upper_r_botswana_ann - upper_manual_botswana_ann, 4)))

## [1] "Difference in Upper Bound: 0.177"

print("")

## [1] ""

print("=== ETS(A,A,N) - 95% Prediction Interval Comparison ===")

## [1] "=== ETS(A,A,N) - 95% Prediction Interval Comparison ==="

comparison_botswana_aan <- data.frame(
  Method = c("Manual (y_hat +/- 1.96*RMSE)", "R's hilo()"),
  Lower_Bound = c(lower_manual_botswana_aan, lower_r_botswana_aan),
  Point_Forecast = c(y_hat_botswana_aan, y_hat_botswana_aan),
  Upper_Bound = c(upper_manual_botswana_aan, upper_r_botswana_aan),
  Width = c(upper_manual_botswana_aan - lower_manual_botswana_aan, 
            upper_r_botswana_aan - lower_r_botswana_aan)
)
print(comparison_botswana_aan)

##                         Method Lower_Bound Point_Forecast Upper_Bound    Width
## 1 Manual (y_hat +/- 1.96*RMSE)    29.08397       39.34957    49.61518 20.53121
## 2                   R's hilo()    28.71075       39.34957    49.98840 21.27765

print("")

## [1] ""

print(paste("RMSE for ETS(A,A,N):", round(rmse_botswana_data_aan, 4)))

## [1] "RMSE for ETS(A,A,N): 5.2376"

print(paste("Difference in Lower Bound:", round(lower_r_botswana_aan - lower_manual_botswana_aan, 4)))

## [1] "Difference in Lower Bound: -0.3732"

print(paste("Difference in Upper Bound:", round(upper_r_botswana_aan - upper_manual_botswana_aan, 4)))

## [1] "Difference in Upper Bound: 0.3732"

In Botswana’s case, a 20-21 percentage point range in the value of exports as a % of GDP is quite wide, indicating that while exports around 39% of GDP, there’s significant uncertainty and that needs to be prepared for.

A stakeholder such as a minister of Finance or economic planner could use the lower bound (28.7-29.1%) to stress-test whether budgets, cash reserves, and operations can withstand a weaker export environment, while the upper bound (49.6-49.9%) should inform capacity planning and investment decisions to ensure the organization can capture opportunities if exports surge beyond expectations.

8.6 Experiment with different forms of the ETS() model

# Load data and filter for China
data(global_economy)

china_gdp <- global_economy %>%
  filter(Country == "China")

# Plot the GDP series
china_gdp %>%
  autoplot(GDP) +
  labs(
    title = "China GDP (1960-2017)",
    y = "GDP (billions USD)",
    x = "Year"
  ) +
  theme_minimal()

# Summary statistics
print("Summary of China GDP:")

## [1] "Summary of China GDP:"

print(summary(china_gdp$GDP))

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.721e+10 1.455e+11 3.301e+11 1.970e+12 1.613e+12 1.224e+13

names(china_gdp)

## [1] "Country"    "Code"       "Year"       "GDP"        "Growth"    
## [6] "CPI"        "Imports"    "Exports"    "Population"

8.6i SimpleExponential Smoothing ETS(A,N,N)

# Fit simple exponential smoothing model using ETS
fit_china_ann <- china_gdp %>%
  model(ETS(GDP ~ error("A") + trend("N") + season("N")))

# View the model parameters (α and ℓ0)
report(fit_china_ann)

## Series: GDP 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##         l[0]
##  67537060018
## 
##   sigma^2:  1.79001e+23
## 
##      AIC     AICc      BIC 
## 3344.888 3345.332 3351.069

# Generate forecasts for next 10 years
fc_china_ann <- fit_china_ann %>%
  forecast(h = 18)

# View forecasts
fc_china_ann

# Plot the forecasts with 80% and 95% prediction intervals
fc_china_ann %>%
  autoplot(china_gdp, level = c(80, 95)) +
  scale_fill_manual(
    values = c("orange", "darkblue"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "China GDP Forecast - ETS(A,N,N) Simple Exponential Smoothing",
    y = "GDP (billions USD)",
    x = "Year"
  )

##### 8.6ii Linear Trend ETS(A,A,N)

# Fit linear trend model using ETS
fit_china_aan <- china_gdp %>%
  model(ETS(GDP ~ error("A") + trend("A") + season("N")))

# View the model parameters
report(fit_china_aan)

## Series: GDP 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998964 
##     beta  = 0.5518569 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778074 3288256684
## 
##   sigma^2:  3.87701e+22
## 
##      AIC     AICc      BIC 
## 3258.053 3259.207 3268.356

# Generate forecasts for next 10 years
fc_china_aan <- fit_china_aan %>%
  forecast(h = 24)

# View forecasts
fc_china_aan

# Plot the forecasts with 80% and 95% prediction intervals
fc_china_aan %>%
  autoplot(china_gdp, level = c(80, 95)) +
  scale_fill_manual(
    values = c("lightgreen", "darkgreen"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "China GDP Forecast - ETS(A,A,N) Linear Trend",
    y = "GDP (billions USD)",
    x = "Year"
  )

8.6iii Damped Trend ETS(A,Ad,N)

# Fit damped trend model using ETS
fit_china_aadn <- china_gdp %>%
  model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))

# View the model parameters
report(fit_china_aadn)

## Series: GDP 
## Model: ETS(A,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.9998747 
##     beta  = 0.5634078 
##     phi   = 0.9799999 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778075 3288256683
## 
##   sigma^2:  3.959258e+22
## 
##      AIC     AICc      BIC 
## 3260.187 3261.834 3272.549

# Generate forecasts for next 10 years
fc_china_aadn <- fit_china_aadn %>%
  forecast(h = 12)

# View forecasts
fc_china_aadn

# Plot the forecasts with 80% and 95% prediction intervals
fc_china_aadn %>%
  autoplot(china_gdp, level = c(80, 95)) +
  scale_fill_manual(
    values = c("lightblue", "navy"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "China GDP Forecast - ETS(A,Ad,N) Damped Trend",
    y = "GDP (billions USD)",
    x = "Year"
  )

##### 8.6iv Comparison of the three models As the graphs move from simple exponential smoothing (ETS(A,N,N)) to linear trend (ETS(A,A,N)) to damped trend (ETS(A,Ad,N)), I observe that the height of the exponential portion of the curve decreases.

8.7 Experiment with different forms of the ETS() model on the quarterly gas production in Australia

# Load data
data(aus_production)

# Filter for Gas data (remove missing values)
gas_data <- aus_production %>%
  filter(!is.na(Gas))

# View the data
head(gas_data)

tail(gas_data)

# Plot the Gas series
gas_data %>%
  autoplot(Gas) +
  labs(
    title = "Australian Quarterly Gas Production",
    y = "Gas Production",
    x = "Quarter"
  ) +
  theme_minimal()

# Summary statistics
print("Summary of Gas Production:")

## [1] "Summary of Gas Production:"

summary(gas_data$Gas)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   11.25  105.00   99.21  163.75  252.00

8.7i Fit the ETS Model with Additive Seasonlaity - ETS (A,A,A)

# Fit additive seasonality model
fit_gas_aaa <- gas_data %>%
  model(ETS(Gas ~ error("A") + trend("A") + season("A")))

# View the model parameters
print("ETS(A,A,A) Model - Additive Seasonality:")

## [1] "ETS(A,A,A) Model - Additive Seasonality:"

report(fit_gas_aaa)

## Series: Gas 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.5209159 
##     beta  = 0.0001000024 
##     gamma = 0.4180453 
## 
##   Initial states:
##      l[0]      b[0]      s[0]    s[-1]     s[-2]     s[-3]
##  10.06526 0.9870514 -5.890283 15.61932 -2.811314 -6.917728
## 
##   sigma^2:  23.5601
## 
##      AIC     AICc      BIC 
## 1872.452 1873.318 1902.913

# Generate forecasts for next 8 quarters (2 years)
fc_gas_aaa <- fit_gas_aaa %>%
  forecast(h = 8)

# View forecasts
fc_gas_aaa

# Plot the forecasts
fc_gas_aaa %>%
  autoplot(gas_data, level = c(80, 95)) +
  scale_fill_manual(
    values = c("lightblue", "darkblue"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "Gas Production Forecast - ETS(A,A,A) Additive Seasonality",
    y = "Gas Production",
    x = "Quarter"
  ) +
  theme_minimal()

# Calculate accuracy
accuracy_gas_aaa <- accuracy(fit_gas_aaa)
print("ETS(A,A,A) Accuracy:")

## [1] "ETS(A,A,A) Accuracy:"

print(accuracy_gas_aaa)

## # A tibble: 1 × 10
##   .model                .type      ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                 <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "ETS(Gas ~ error(\"A… Trai… 0.00525  4.76  3.35 -4.69  10.9 0.600 0.628 0.0772

8.7ii Fit the ETS Model with Multiplicative Seasonlaity - ETS (A,A,M)

# Fit multiplicative seasonality model
fit_gas_mam <- gas_data %>%
  model(ETS(Gas ~ error("M") + trend("A") + season("M")))

# View the model parameters
print("ETS(M,A,M) Model - Multiplicative Seasonality:")

## [1] "ETS(M,A,M) Model - Multiplicative Seasonality:"

report(fit_gas_mam)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

# Generate forecasts for next 8 quarters
fc_gas_mam <- fit_gas_mam %>%
  forecast(h = 8)

# View forecasts
fc_gas_mam

# Plot the forecasts
fc_gas_mam %>%
  autoplot(gas_data, level = c(80, 95)) +
  scale_fill_manual(
    values = c("lightgreen", "darkgreen"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "Gas Production Forecast - ETS(M,A,M) Multiplicative Seasonality",
    y = "Gas Production",
    x = "Quarter"
  ) +
  theme_minimal()

# Calculate accuracy
accuracy_gas_mam <- accuracy(fit_gas_mam)
print("ETS(M,A,M) Accuracy:")

## [1] "ETS(M,A,M) Accuracy:"

print(accuracy_gas_mam)

## # A tibble: 1 × 10
##   .model                .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                 <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "ETS(Gas ~ error(\"M… Trai… -0.115  4.60  3.02 0.199  4.08 0.542 0.606 -0.0131

8.7iii Comparison of the additive and multiplicative seasonality models

# Plot both forecasts together
gas_data %>%
  model(
    Additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
    Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))
  ) %>%
  forecast(h = 8) %>%
  autoplot(gas_data, level = NULL) +
  labs(
    title = "Additive vs Multiplicative Seasonality",
    y = "Gas Production",
    x = "Quarter"
  ) +
  theme_minimal()

# Compare accuracy metrics
comparison_seasonality <- data.frame(
  Model = c("Additive (A,A,A)", "Multiplicative (M,A,M)"),
  RMSE = c(accuracy_gas_aaa$RMSE, accuracy_gas_mam$RMSE),
  MAE = c(accuracy_gas_aaa$MAE, accuracy_gas_mam$MAE),
  AIC = c(glance(fit_gas_aaa)$AIC, glance(fit_gas_mam)$AIC),
  BIC = c(glance(fit_gas_aaa)$BIC, glance(fit_gas_mam)$BIC)
)

print("Additive vs Multiplicative Comparison:")

## [1] "Additive vs Multiplicative Comparison:"

print(comparison_seasonality)

##                    Model     RMSE      MAE      AIC      BIC
## 1       Additive (A,A,A) 4.763979 3.345044 1872.452 1902.913
## 2 Multiplicative (M,A,M) 4.595113 3.021727 1680.929 1711.389

Experimemnt with making the trend dampened

# Fit multiplicative seasonality with damped trend
fit_gas_madm <- gas_data %>%
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))

# View the model parameters
print("ETS(M,Ad,M) Model - Damped Trend:")

## [1] "ETS(M,Ad,M) Model - Damped Trend:"

report(fit_gas_madm)

## Series: Gas 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6489044 
##     beta  = 0.1551275 
##     gamma = 0.09369372 
##     phi   = 0.98 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]   s[-2]     s[-3]
##  5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
## 
##   sigma^2:  0.0033
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873

# Generate forecasts for next 8 quarters
fc_gas_madm <- fit_gas_madm %>%
  forecast(h = 8)

# View forecasts
fc_gas_madm

# Plot the forecasts
fc_gas_madm %>%
  autoplot(gas_data, level = c(80, 95)) +
  scale_fill_manual(
    values = c("pink", "purple"),
    breaks = c("80%", "95%")
  ) +
  labs(
    title = "Gas Production Forecast - ETS(M,Ad,M) Damped Trend",
    y = "Gas Production",
    x = "Quarter"
  ) +
  theme_minimal()

# Calculate accuracy
accuracy_gas_madm <- accuracy(fit_gas_madm)
print("ETS(M,Ad,M) Accuracy:")

## [1] "ETS(M,Ad,M) Accuracy:"

print(accuracy_gas_madm)

## # A tibble: 1 × 10
##   .model              .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>               <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "ETS(Gas ~ error(\… Trai… -0.00439  4.59  3.03 0.326  4.10 0.544 0.606 -0.0217

Comparison of all three models

Based on the outputs fro the code the damped trend does not improve statistical fit since there is a higher AIC and BIC value. and lesser values would have indicated a better fit. Damped may still be preferred for conservative long-term forecasting to avoid unrealistic exponential growth projections, despite slightly worse statistical fit. Multiplicative gives the best AIC value indicating thta it is the best predictive model.

8.8 Experiment with different forms of the ETS() model on the retail Australian data.

Multiplicative seasonality is necessary for this series since there is a clear seasonal pattern that increases in magnitude as the overall level of production increases.

# Loading the data
set.seed(12345678)

# Load data and select a series
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

# View the selected series
print(paste("Selected Series ID:", unique(myseries$`Series ID`)))

## [1] "Selected Series ID: A3349767W"

head(myseries)

# Plot the series
myseries %>%
  autoplot(Turnover) +
  labs(
    title = "Australian Retail Turnover - Monthly Data",
    y = "Turnover ($ Million)",
    x = "Month"
  ) +
  theme_minimal()

##### 8.8i Explore the retail data

# Seasonal plot
myseries %>%
  gg_season(Turnover) +
  labs(
    title = "Seasonal Pattern by Year",
    y = "Turnover"
  ) +
  theme_minimal()

# Subseries plot
myseries %>%
  gg_subseries(Turnover) +
  labs(
    title = "Seasonal Subseries Plot",
    y = "Turnover"
  ) +
  theme_minimal()

# Lag plot
myseries %>%
  gg_lag(Turnover, geom = "point") +
  labs(title = "Lag Plots") +
  theme_minimal()

# ACF plot
myseries %>%
  ACF(Turnover) %>%
  autoplot() +
  labs(title = "Autocorrelation Function") +
  theme_minimal()

##### 8.8ii Why is multiplicative seasonality necessary for this series?

# Split data into training and test sets
myseries_train <- myseries %>%
  filter(year(Month) <= 2010)

myseries_test <- myseries %>%
  filter(year(Month) > 2010)

# Fit both additive and multiplicative models
fit_comparison <- myseries_train %>%
  model(
    Additive = ETS(Turnover ~ error("A") + trend("A") + season("A")),
    Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
  )

# Compare models
comparison_add_mult <- glance(fit_comparison) %>%
  select(.model, AIC, AICc, BIC)

print("Additive vs Multiplicative Seasonality:")

## [1] "Additive vs Multiplicative Seasonality:"

print(comparison_add_mult)

## # A tibble: 2 × 4
##   .model           AIC  AICc   BIC
##   <chr>          <dbl> <dbl> <dbl>
## 1 Additive       1288. 1291. 1350.
## 2 Multiplicative 1164. 1166. 1225.

# Accuracy comparison
accuracy_comparison <- accuracy(fit_comparison)
print("Accuracy Metrics:")

## [1] "Accuracy Metrics:"

print(accuracy_comparison)

## # A tibble: 2 × 12
##   State      Industry .model .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>      <chr>    <chr>  <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… Addit… Trai…  0.00335 0.602 0.451 -0.487  6.37 0.493 0.496
## 2 Northern … Clothin… Multi… Trai… -0.0119  0.518 0.384 -0.446  5.21 0.420 0.427
## # ℹ 1 more variable: ACF1 <dbl>

# Visualize the difference
myseries_train %>%
  model(
    Additive = ETS(Turnover ~ error("A") + trend("A") + season("A")),
    Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))
  ) %>%
  forecast(h = 24) %>%
  autoplot(myseries_train, level = NULL) +
  labs(
    title = "Additive vs Multiplicative Seasonality",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

- Seasonal variation increases proportionally with the level of turnover - Early years show small seasonal swings, later years show much larger swings - AIC/BIC values are significantly lower for multiplicative (better fit)

8.8iii Using the Holt-Winters Method to forecast the retail data

# Fit Holt-Winters multiplicative (linear trend)
fit_hw_linear <- myseries_train %>%
  model(HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")))

# View model
print("Holt-Winters Multiplicative (Linear Trend):")

## [1] "Holt-Winters Multiplicative (Linear Trend):"

report(fit_hw_linear)

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.5406521 
##     beta  = 0.0001338935 
##     gamma = 0.0001008974 
## 
##   Initial states:
##      l[0]       b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]   s[-5]
##  2.659274 0.04289703 0.8431579 0.7716643 0.8407577 1.342791 1.016251 1.04332
##     s[-6]    s[-7]    s[-8]    s[-9]    s[-10]   s[-11]
##  1.043807 1.106141 1.129276 1.024721 0.9733536 0.864759
## 
##   sigma^2:  0.0047
## 
##      AIC     AICc      BIC 
## 1163.602 1166.002 1224.963

# Forecast
fc_hw_linear <- fit_hw_linear %>%
  forecast(h = 24)

# Plot
fc_hw_linear %>%
  autoplot(myseries_train, level = c(80, 95)) +
  scale_fill_manual(values = c("lightblue", "darkblue")) +
  labs(
    title = "Holt-Winters Multiplicative - Linear Trend",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

# Fit Holt-Winters multiplicative with damped trend
fit_hw_damped <- myseries_train %>%
  model(HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))

# View model
print("Holt-Winters Multiplicative (Damped Trend):")

## [1] "Holt-Winters Multiplicative (Damped Trend):"

report(fit_hw_damped)

## Series: Turnover 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.4429807 
##     beta  = 0.005351177 
##     gamma = 0.0001057534 
##     phi   = 0.9799998 
## 
##   Initial states:
##      l[0]       b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  2.402317 0.07313785 0.8381105 0.7537881 0.8350622 1.331381 1.011018 1.047677
##     s[-6]    s[-7]    s[-8]    s[-9]    s[-10]    s[-11]
##  1.055305 1.123139 1.149219 1.020584 0.9755371 0.8591794
## 
##   sigma^2:  0.005
## 
##      AIC     AICc      BIC 
## 1177.433 1180.126 1242.404

# Forecast
fc_hw_damped <- fit_hw_damped %>%
  forecast(h = 24)

# Plot
fc_hw_damped %>%
  autoplot(myseries_train, level = c(80, 95)) +
  scale_fill_manual(values = c("lightgreen", "darkgreen")) +
  labs(
    title = "Holt-Winters Multiplicative - Damped Trend",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

# Compare both visually
myseries_train %>%
  model(
    Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  ) %>%
  forecast(h = 24) %>%
  autoplot(myseries_train, level = NULL) +
  labs(
    title = "Linear vs Damped Trend",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

8.8iv Comparing the RMSE of One-Step Forecasts

# Fit both models
fit_both <- myseries_train %>%
  model(
    Linear_Trend = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    Damped_Trend = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

# Get accuracy metrics (includes one-step forecast RMSE)
accuracy_both <- accuracy(fit_both)

print("One-Step Forecast Accuracy Comparison:")

## [1] "One-Step Forecast Accuracy Comparison:"

print(accuracy_both %>% select(.model, RMSE, MAE, MAPE))

## # A tibble: 2 × 4
##   .model        RMSE   MAE  MAPE
##   <chr>        <dbl> <dbl> <dbl>
## 1 Linear_Trend 0.518 0.384  5.21
## 2 Damped_Trend 0.519 0.392  5.34

# Compare AIC/BIC
comparison_models <- glance(fit_both) %>%
  select(.model, AIC, AICc, BIC)

print("Information Criteria Comparison:")

## [1] "Information Criteria Comparison:"

print(comparison_models)

## # A tibble: 2 × 4
##   .model         AIC  AICc   BIC
##   <chr>        <dbl> <dbl> <dbl>
## 1 Linear_Trend 1164. 1166. 1225.
## 2 Damped_Trend 1177. 1180. 1242.

# Determine preferred model
if (accuracy_both$RMSE[1] < accuracy_both$RMSE[2]) {
  preferred <- "Linear Trend (M,A,M)"
} else {
  preferred <- "Damped Trend (M,Ad,M)"
}

print(paste("PREFERRED MODEL:", preferred))

## [1] "PREFERRED MODEL: Linear Trend (M,A,M)"

print("Based on lowest RMSE and information criteria")

## [1] "Based on lowest RMSE and information criteria"

I prefer the Linear trend since it is has the lowest AIC, AICc, BIC values. Lower values indicate a model with a better fit.

8.8v Check residuals from the linera trend model look like white noise.

# Use the better model from Question 3
# Assuming linear trend is better (adjust if needed)
best_fit <- myseries_train %>%
  model(Best = ETS(Turnover ~ error("M") + trend("A") + season("M")))

# Check residuals
augment(best_fit) %>%
  autoplot(.innov) +
  labs(
    title = "Residual Plot",
    y = "Residuals",
    x = "Month"
  ) +
  theme_minimal()

# ACF of residuals
augment(best_fit) %>%
  ACF(.innov) %>%
  autoplot() +
  labs(title = "ACF of Residuals") +
  theme_minimal()

# Ljung-Box test
ljung_box_test <- augment(best_fit) %>%
  features(.innov, ljung_box, lag = 24)

print("Ljung-Box Test for White Noise:")

## [1] "Ljung-Box Test for White Noise:"

print(ljung_box_test)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… Best      21.3     0.619

if (ljung_box_test$lb_pvalue > 0.05) {
  print("RESULT: Residuals appear to be white noise (p > 0.05)")
} else {
  print("WARNING: Residuals may not be white noise (p < 0.05)")
}

## [1] "RESULT: Residuals appear to be white noise (p > 0.05)"

# Histogram of residuals
augment(best_fit) %>%
  ggplot(aes(x = .innov)) +
  geom_histogram(bins = 30, fill = "steelblue", color = "black") +
  labs(
    title = "Distribution of Residuals",
    x = "Residuals",
    y = "Frequency"
  ) +
  theme_minimal()

##### 8.8vi Seasonal Naive Forecast

# Fit models on training data (up to end of 2010)
fit_all_methods <- myseries_train %>%
  model(
    SNAIVE = SNAIVE(Turnover),
    HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

# Forecast on test set
fc_all <- fit_all_methods %>%
  forecast(h = nrow(myseries_test))

# Calculate test set accuracy
test_accuracy <- accuracy(fc_all, myseries_test)

print("Test Set RMSE Comparison:")

## [1] "Test Set RMSE Comparison:"

print(test_accuracy %>% 
      select(.model, .type, RMSE, MAE, MAPE) %>%
      arrange(RMSE))

## # A tibble: 3 × 5
##   .model    .type  RMSE   MAE  MAPE
##   <chr>     <chr> <dbl> <dbl> <dbl>
## 1 HW_Damped Test   1.15 0.878  6.45
## 2 SNAIVE    Test   1.55 1.24   9.06
## 3 HW_Linear Test   1.78 1.60  12.6

# Visualize all forecasts against actual test data
fc_all %>%
  autoplot(myseries, level = NULL) +
  autolayer(myseries_test, Turnover, color = "black", size = 1) +
  labs(
    title = "Forecast Comparison: SNAIVE vs Holt-Winters Methods",
    subtitle = "Black line shows actual test set values",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

# Determine if we beat SNAIVE
snaive_rmse <- test_accuracy %>% 
  filter(.model == "SNAIVE") %>% 
  pull(RMSE)

hw_linear_rmse <- test_accuracy %>% 
  filter(.model == "HW_Linear") %>% 
  pull(RMSE)

hw_damped_rmse <- test_accuracy %>% 
  filter(.model == "HW_Damped") %>% 
  pull(RMSE)

print("")

## [1] ""

print("=== RESULTS ===")

## [1] "=== RESULTS ==="

print(paste("SNAIVE RMSE:", round(snaive_rmse, 2)))

## [1] "SNAIVE RMSE: 1.55"

print(paste("HW Linear RMSE:", round(hw_linear_rmse, 2)))

## [1] "HW Linear RMSE: 1.78"

print(paste("HW Damped RMSE:", round(hw_damped_rmse, 2)))

## [1] "HW Damped RMSE: 1.15"

if (hw_linear_rmse < snaive_rmse) {
  print("Holt-Winters Linear Trend beats SNAIVE")
} else {
  print("Holt-Winters Linear Trend does NOT beat SNAIVE")
}

## [1] "Holt-Winters Linear Trend does NOT beat SNAIVE"

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

# Using the same retail data from previous analysis
# Make sure myseries_train and myseries_test are already loaded

# Step 1: Determine optimal Box-Cox lambda
lambda <- myseries_train %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

print(paste("Optimal Box-Cox lambda:", round(lambda, 4)))

## [1] "Optimal Box-Cox lambda: 0.1971"

# Step 2: Apply STL decomposition to Box-Cox transformed series
fit_stl_ets <- myseries_train %>%
  model(
    STL_ETS = decomposition_model(
      STL(box_cox(Turnover, lambda) ~ season(window = "periodic")),
      ETS(season_adjust ~ error("A") + trend("A") + season("N"))
    )
  )

# View the model
print("STL + ETS Model:")

## [1] "STL + ETS Model:"

report(fit_stl_ets)

## Series: Turnover 
## Model: STL decomposition model 
## Transformation: box_cox(Turnover, lambda) 
## Combination: season_adjust + season_year
## 
## ========================================
## 
## Series: season_adjust 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.5691359 
##     beta  = 0.0001000037 
## 
##   Initial states:
##      l[0]        b[0]
##  1.087742 0.007973452
## 
##   sigma^2:  0.0097
## 
##      AIC     AICc      BIC 
## 271.8798 272.1045 289.9272 
## 
## Series: season_year 
## Model: SNAIVE 
## 
## sigma^2: 0

# Step 4: Generate forecasts
fc_stl_ets <- fit_stl_ets %>%
  forecast(h = nrow(myseries_test))

# Plot forecasts
fc_stl_ets %>%
  autoplot(myseries_train, level = c(80, 95)) +
  autolayer(myseries_test, Turnover, color = "black") +
  scale_fill_manual(values = c("lightcoral", "darkred")) +
  labs(
    title = "STL + ETS Forecast (Box-Cox Transformed)",
    subtitle = "Black line shows actual test set values",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

# Step 5: Compare with previous best methods
fit_all_methods <- myseries_train %>%
  model(
    SNAIVE = SNAIVE(Turnover),
    HW_Linear = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    HW_Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")),
    STL_ETS = decomposition_model(
      STL(box_cox(Turnover, lambda) ~ season(window = "periodic")),
      ETS(season_adjust ~ error("A") + trend("A") + season("N"))
    )
  )

# Forecast all methods
fc_all_methods <- fit_all_methods %>%
  forecast(h = nrow(myseries_test))

# Calculate test set accuracy
test_accuracy_all <- accuracy(fc_all_methods, myseries_test)

print("=== TEST SET ACCURACY COMPARISON ===")

## [1] "=== TEST SET ACCURACY COMPARISON ==="

print(test_accuracy_all %>% 
      select(.model, .type, RMSE, MAE, MAPE) %>%
      arrange(RMSE))

## # A tibble: 4 × 5
##   .model    .type  RMSE   MAE  MAPE
##   <chr>     <chr> <dbl> <dbl> <dbl>
## 1 HW_Damped Test   1.15 0.878  6.45
## 2 SNAIVE    Test   1.55 1.24   9.06
## 3 HW_Linear Test   1.78 1.60  12.6 
## 4 STL_ETS   Test   4.02 3.62  27.9

# Visual comparison
fc_all_methods %>%
  autoplot(myseries, level = NULL) +
  autolayer(myseries_test, Turnover, color = "black", size = 1.2) +
  labs(
    title = "All Methods Comparison on Test Set",
    subtitle = "Black line = Actual values",
    y = "Turnover",
    x = "Month"
  ) +
  theme_minimal()

# Create summary table
summary_table <- test_accuracy_all %>%
  filter(.type == "Test") %>%
  select(.model, RMSE, MAE, MAPE) %>%
  arrange(RMSE) %>%
  mutate(
    Rank = row_number(),
    RMSE_Improvement = round(100 * (RMSE[.model == "SNAIVE"] - RMSE) / RMSE[.model == "SNAIVE"], 2)
  )

print("=== RANKED PERFORMANCE (Best to Worst) ===")

## [1] "=== RANKED PERFORMANCE (Best to Worst) ==="

print(summary_table)

## # A tibble: 4 × 6
##   .model     RMSE   MAE  MAPE  Rank RMSE_Improvement
##   <chr>     <dbl> <dbl> <dbl> <int>            <dbl>
## 1 HW_Damped  1.15 0.878  6.45     1             25.8
## 2 SNAIVE     1.55 1.24   9.06     2              0  
## 3 HW_Linear  1.78 1.60  12.6      3            -14.9
## 4 STL_ETS    4.02 3.62  27.9      4           -159.

# Determine best method
best_method <- summary_table %>% slice(1) %>% pull(.model)
best_rmse <- summary_table %>% slice(1) %>% pull(RMSE)
snaive_rmse <- summary_table %>% filter(.model == "SNAIVE") %>% pull(RMSE)

print("")

## [1] ""

print("=== RESULTS ===")

## [1] "=== RESULTS ==="

print(paste("Best Method:", best_method))

## [1] "Best Method: HW_Damped"

print(paste("Best RMSE:", round(best_rmse, 2)))

## [1] "Best RMSE: 1.15"

print(paste("SNAIVE RMSE:", round(snaive_rmse, 2)))

## [1] "SNAIVE RMSE: 1.55"

print(paste("Improvement over SNAIVE:", 
            round(100 * (snaive_rmse - best_rmse) / snaive_rmse, 2), "%"))

## [1] "Improvement over SNAIVE: 25.82 %"

# Check if STL+ETS is best
if (best_method == "STL_ETS") {
  print("")
  print("STL + ETS is the BEST METHOD!")
  print("Box-Cox transformation + STL decomposition improves forecast accuracy")
} else {
  stl_rank <- summary_table %>% filter(.model == "STL_ETS") %>% pull(Rank)
  print("")
  print(paste("STL + ETS ranked #", stl_rank, "out of", nrow(summary_table)))
  print("Previous Holt-Winters methods perform better on this data")
}

## [1] ""
## [1] "STL + ETS ranked # 4 out of 4"
## [1] "Previous Holt-Winters methods perform better on this data"

# Check residuals of STL+ETS
print("")

## [1] ""

print("=== RESIDUAL DIAGNOSTICS FOR STL+ETS ===")

## [1] "=== RESIDUAL DIAGNOSTICS FOR STL+ETS ==="

augment(fit_stl_ets) %>%
  autoplot(.innov) +
  labs(
    title = "STL+ETS Residuals Over Time",
    y = "Residuals"
  ) +
  theme_minimal()

augment(fit_stl_ets) %>%
  ACF(.innov) %>%
  autoplot() +
  labs(title = "ACF of STL+ETS Residuals") +
  theme_minimal()

ljung_box_stl <- augment(fit_stl_ets) %>%
  features(.innov, ljung_box, lag = 24)

print("Ljung-Box Test:")

## [1] "Ljung-Box Test:"

print(ljung_box_stl)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… STL_E…    28.9     0.224

if (ljung_box_stl$lb_pvalue > 0.05) {
  print("Residuals appear to be white noise (p > 0.05)")
} else {
  print("WARNING: Residuals may show autocorrelation (p < 0.05)")
}

## [1] "Residuals appear to be white noise (p > 0.05)"

# Training set accuracy comparison
train_accuracy <- accuracy(fit_all_methods)

print("")

## [1] ""

print("=== TRAINING SET ACCURACY (for reference) ===")

## [1] "=== TRAINING SET ACCURACY (for reference) ==="

print(train_accuracy %>% 
      select(.model, .type, RMSE, MAE, MAPE) %>%
      arrange(RMSE))

## # A tibble: 4 × 5
##   .model    .type     RMSE   MAE  MAPE
##   <chr>     <chr>    <dbl> <dbl> <dbl>
## 1 HW_Linear Training 0.518 0.384  5.21
## 2 HW_Damped Training 0.519 0.392  5.34
## 3 STL_ETS   Training 0.536 0.392  5.08
## 4 SNAIVE    Training 1.21  0.915 12.4