Load Required Libraries

library(fpp3)  # Forecasting and time series analysis
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(ggplot2)  # For custom visualization

Load and Prepare Data

# Filter dataset for Leisure and Hospitality employment
leisure <- us_employment |>
  filter(Title == "Leisure and Hospitality", year(Month) > 2000) |>
  mutate(Employed = Employed / 1000) |>  # Convert to millions
  select(Month, Employed)

# Plot raw time series data
autoplot(leisure, Employed) +
  labs(title = "US Employment in Leisure & Hospitality", 
       y = "Number of People (Millions)")

Fit ARIMA and ETS Models

# Fit ARIMA model
fit_arima <- leisure |> model(ARIMA(Employed))

# Fit ETS model
fit_ets <- leisure |> model(ETS(Employed))

Build a Seasonal ARIMA (SARIMA) Model

# Load necessary package for differencing tests
library(urca)

# Determine optimal differencing
ndiffs_val <- leisure |> features(Employed, unitroot_ndiffs)
nsdiffs_val <- leisure |> features(Employed, unitroot_nsdiffs)

# Display differencing needs
ndiffs_val
## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1
nsdiffs_val
## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1
# Fit SARIMA model
fit_sarima <- leisure |> model(ARIMA(Employed ~ pdq(1,1,1) + PDQ(1,1,1, period = 12)))

# Model Summary
report(fit_sarima)
## Series: Employed 
## Model: ARIMA(1,1,1)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1    sar1     sma1
##       0.8380  -0.6742  0.2904  -0.7272
## s.e.  0.1369   0.1897  0.1308   0.0954
## 
## sigma^2 estimated as 0.001421:  log likelihood=394.59
## AIC=-779.19   AICc=-778.9   BIC=-762.41

Compare Model Selection Criteria (AIC, BIC)

# Extract AIC and BIC values
model_comparison <- bind_rows(
  glance(fit_arima) |> mutate(Model = "ARIMA"),
  glance(fit_ets) |> mutate(Model = "ETS"),
  glance(fit_sarima) |> mutate(Model = "SARIMA")
) |> select(Model, AIC, BIC)

# Print model comparison table
model_comparison
## # A tibble: 3 × 3
##   Model    AIC   BIC
##   <chr>  <dbl> <dbl>
## 1 ARIMA  -777. -757.
## 2 ETS    -166. -108.
## 3 SARIMA -779. -762.
# Plot AIC comparison
ggplot(model_comparison, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity", width = 0.5) +
  labs(title = "AIC Comparison: ARIMA vs ETS vs SARIMA", 
       y = "AIC Value", x = "Model") +
  theme_minimal()

Check Residuals for All Models

# Extract residuals for ARIMA model
res_arima <- augment(fit_arima)
# Residual diagnostics for ARIMA
gg_tsresiduals(fit_arima)

# Extract residuals for ETS model
res_ets <- augment(fit_ets)
# Residual diagnostics for ETS
gg_tsresiduals(fit_ets)

# Extract residuals for SARIMA model
res_sarima <- augment(fit_sarima)
# Residual diagnostics for SARIMA
gg_tsresiduals(fit_sarima)

# Ljung-Box test for all models
lb_arima <- res_arima |> features(.innov, ljung_box, lag = 10)
lb_ets <- res_ets |> features(.innov, ljung_box, lag = 10)
lb_sarima <- res_sarima |> features(.innov, ljung_box, lag = 10)

# Print Ljung-Box test results
lb_arima
## # A tibble: 1 × 3
##   .model          lb_stat lb_pvalue
##   <chr>             <dbl>     <dbl>
## 1 ARIMA(Employed)    7.22     0.705
lb_ets
## # A tibble: 1 × 3
##   .model        lb_stat     lb_pvalue
##   <chr>           <dbl>         <dbl>
## 1 ETS(Employed)    62.8 0.00000000108
lb_sarima
## # A tibble: 1 × 3
##   .model                                                     lb_stat lb_pvalue
##   <chr>                                                        <dbl>     <dbl>
## 1 ARIMA(Employed ~ pdq(1, 1, 1) + PDQ(1, 1, 1, period = 12))    7.18     0.709

Forecast with All Models

# Generate forecasts for next 12 months
fc_arima <- fit_arima |> forecast(h = 12)
fc_ets <- fit_ets |> forecast(h = 12)
fc_sarima <- fit_sarima |> forecast(h = 12)

# Plot forecasts
autoplot(leisure, Employed) +
  autolayer(fc_arima, color = "blue") +
  autolayer(fc_ets, color = "red") +
  autolayer(fc_sarima, color = "green") +
  labs(title = "12-Month Forecast: ARIMA vs ETS vs SARIMA", 
       y = "Number of People (Millions)") +
  theme_minimal()
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

Conclusion

# Compare results
if (model_comparison$AIC[model_comparison$Model == "ARIMA"] <
    model_comparison$AIC[model_comparison$Model == "ETS"] &
    model_comparison$AIC[model_comparison$Model == "SARIMA"]) {
  print("ARIMA has the best fit based on AIC")
} else if (model_comparison$AIC[model_comparison$Model == "SARIMA"] <
           model_comparison$AIC[model_comparison$Model == "ETS"]) {
  print("SARIMA has the best fit based on AIC")
} else {
  print("ETS has the best fit based on AIC")
}
## [1] "ARIMA has the best fit based on AIC"
if (lb_arima$lb_pvalue > 0.05 & lb_ets$lb_pvalue > 0.05 & lb_sarima$lb_pvalue > 0.05) {
  print("All models have good residuals (white noise).")
} else if (lb_sarima$lb_pvalue > 0.05) {
  print("SARIMA has better residuals.")
} else {
  print("Other models have better residuals.")
}
## [1] "SARIMA has better residuals."

Summary

This document helps compare ARIMA, ETS, and SARIMA models to determine the best approach for forecasting US employment trends in the Leisure & Hospitality sector.