ARIMA vs ETS Model Comparison & SARIMA Model Building

Load Required Libraries

library(fpp3)  # Forecasting and time series analysis

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(ggplot2)  # For custom visualization

Load and Prepare Data

# Filter dataset for Leisure and Hospitality employment
leisure <- us_employment |>
  filter(Title == "Leisure and Hospitality", year(Month) > 2000) |>
  mutate(Employed = Employed / 1000) |>  # Convert to millions
  select(Month, Employed)

# Plot raw time series data
autoplot(leisure, Employed) +
  labs(title = "US Employment in Leisure & Hospitality", 
       y = "Number of People (Millions)")

Fit ARIMA and ETS Models

# Fit ARIMA model
fit_arima <- leisure |> model(ARIMA(Employed))

# Fit ETS model
fit_ets <- leisure |> model(ETS(Employed))

Build a Seasonal ARIMA (SARIMA) Model

# Load necessary package for differencing tests
library(urca)

# Determine optimal differencing
ndiffs_val <- leisure |> features(Employed, unitroot_ndiffs)
nsdiffs_val <- leisure |> features(Employed, unitroot_nsdiffs)

# Display differencing needs
ndiffs_val

## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1

nsdiffs_val

## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1

# Fit SARIMA model
fit_sarima <- leisure |> model(ARIMA(Employed ~ pdq(1,1,1) + PDQ(1,1,1, period = 12)))

# Model Summary
report(fit_sarima)

## Series: Employed 
## Model: ARIMA(1,1,1)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1    sar1     sma1
##       0.8380  -0.6742  0.2904  -0.7272
## s.e.  0.1369   0.1897  0.1308   0.0954
## 
## sigma^2 estimated as 0.001421:  log likelihood=394.59
## AIC=-779.19   AICc=-778.9   BIC=-762.41

Compare Model Selection Criteria (AIC, BIC)

# Extract AIC and BIC values
model_comparison <- bind_rows(
  glance(fit_arima) |> mutate(Model = "ARIMA"),
  glance(fit_ets) |> mutate(Model = "ETS"),
  glance(fit_sarima) |> mutate(Model = "SARIMA")
) |> select(Model, AIC, BIC)

# Print model comparison table
model_comparison

## # A tibble: 3 × 3
##   Model    AIC   BIC
##   <chr>  <dbl> <dbl>
## 1 ARIMA  -777. -757.
## 2 ETS    -166. -108.
## 3 SARIMA -779. -762.

# Plot AIC comparison
ggplot(model_comparison, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "AIC Comparison: ARIMA vs ETS vs SARIMA", 
       y = "AIC Value", x = "Model") +
  theme_minimal()

Check Residuals for All Models

# Extract residuals for ARIMA model
res_arima <- augment(fit_arima)
# Residual diagnostics for ARIMA
gg_tsresiduals(fit_arima)

# Extract residuals for ETS model
res_ets <- augment(fit_ets)
# Residual diagnostics for ETS
gg_tsresiduals(fit_ets)

# Extract residuals for SARIMA model
res_sarima <- augment(fit_sarima)
# Residual diagnostics for SARIMA
gg_tsresiduals(fit_sarima)

# Ljung-Box test for all models
lb_arima <- res_arima |> features(.innov, ljung_box, lag = 10)
lb_ets <- res_ets |> features(.innov, ljung_box, lag = 10)
lb_sarima <- res_sarima |> features(.innov, ljung_box, lag = 10)

# Print Ljung-Box test results
lb_arima

## # A tibble: 1 × 3
##   .model          lb_stat lb_pvalue
##   <chr>             <dbl>     <dbl>
## 1 ARIMA(Employed)    7.22     0.705

lb_ets

## # A tibble: 1 × 3
##   .model        lb_stat     lb_pvalue
##   <chr>           <dbl>         <dbl>
## 1 ETS(Employed)    62.8 0.00000000108

lb_sarima

## # A tibble: 1 × 3
##   .model                                                     lb_stat lb_pvalue
##   <chr>                                                        <dbl>     <dbl>
## 1 ARIMA(Employed ~ pdq(1, 1, 1) + PDQ(1, 1, 1, period = 12))    7.18     0.709

Forecast with All Models

# Generate forecasts for next 12 months
fc_arima <- fit_arima |> forecast(h = 12)
fc_ets <- fit_ets |> forecast(h = 12)
fc_sarima <- fit_sarima |> forecast(h = 12)

# Plot forecasts
autoplot(leisure, Employed) +
  autolayer(fc_arima, color = "blue") +
  autolayer(fc_ets, color = "red") +
  autolayer(fc_sarima, color = "green") +
  labs(title = "12-Month Forecast: ARIMA vs ETS vs SARIMA", 
       y = "Number of People (Millions)") +
  theme_minimal()

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

Conclusion

# Compare results
if (model_comparison$AIC[model_comparison$Model == "ARIMA"] <
    model_comparison$AIC[model_comparison$Model == "ETS"] &
    model_comparison$AIC[model_comparison$Model == "SARIMA"]) {
  print("ARIMA has the best fit based on AIC")
} else if (model_comparison$AIC[model_comparison$Model == "SARIMA"] <
           model_comparison$AIC[model_comparison$Model == "ETS"]) {
  print("SARIMA has the best fit based on AIC")
} else {
  print("ETS has the best fit based on AIC")
}

## [1] "ARIMA has the best fit based on AIC"

if (lb_arima$lb_pvalue > 0.05 & lb_ets$lb_pvalue > 0.05 & lb_sarima$lb_pvalue > 0.05) {
  print("All models have good residuals (white noise).")
} else if (lb_sarima$lb_pvalue > 0.05) {
  print("SARIMA has better residuals.")
} else {
  print("Other models have better residuals.")
}

## [1] "SARIMA has better residuals."

Summary

AIC Comparison: Lower AIC suggests the better-fitting model.
Residual Analysis: Ljung-Box test ensures residuals behave like white noise.
ACF Plots: Confirm if residuals are uncorrelated (white noise).
Forecasting: All models provide future predictions, but the best choice depends on balancing fit and residual behavior.

This document helps compare ARIMA, ETS, and SARIMA models to determine the best approach for forecasting US employment trends in the Leisure & Hospitality sector.

ARIMA vs ETS Model Comparison & SARIMA Model Building

Sheriann McLarty

2025-03-11

Load Required Libraries

Load and Prepare Data

Fit ARIMA and ETS Models

Build a Seasonal ARIMA (SARIMA) Model

Compare Model Selection Criteria (AIC, BIC)

Check Residuals for All Models

Forecast with All Models

Conclusion

Summary