library(fpp3) # Forecasting and time series analysis
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(ggplot2) # For custom visualization
# Filter dataset for Leisure and Hospitality employment
leisure <- us_employment |>
filter(Title == "Leisure and Hospitality", year(Month) > 2000) |>
mutate(Employed = Employed / 1000) |> # Convert to millions
select(Month, Employed)
# Plot raw time series data
autoplot(leisure, Employed) +
labs(title = "US Employment in Leisure & Hospitality",
y = "Number of People (Millions)")
# Fit ARIMA model
fit_arima <- leisure |> model(ARIMA(Employed))
# Fit ETS model
fit_ets <- leisure |> model(ETS(Employed))
# Load necessary package for differencing tests
library(urca)
# Determine optimal differencing
ndiffs_val <- leisure |> features(Employed, unitroot_ndiffs)
nsdiffs_val <- leisure |> features(Employed, unitroot_nsdiffs)
# Display differencing needs
ndiffs_val
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
nsdiffs_val
## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 1
# Fit SARIMA model
fit_sarima <- leisure |> model(ARIMA(Employed ~ pdq(1,1,1) + PDQ(1,1,1, period = 12)))
# Model Summary
report(fit_sarima)
## Series: Employed
## Model: ARIMA(1,1,1)(1,1,1)[12]
##
## Coefficients:
## ar1 ma1 sar1 sma1
## 0.8380 -0.6742 0.2904 -0.7272
## s.e. 0.1369 0.1897 0.1308 0.0954
##
## sigma^2 estimated as 0.001421: log likelihood=394.59
## AIC=-779.19 AICc=-778.9 BIC=-762.41
# Extract AIC and BIC values
model_comparison <- bind_rows(
glance(fit_arima) |> mutate(Model = "ARIMA"),
glance(fit_ets) |> mutate(Model = "ETS"),
glance(fit_sarima) |> mutate(Model = "SARIMA")
) |> select(Model, AIC, BIC)
# Print model comparison table
model_comparison
## # A tibble: 3 × 3
## Model AIC BIC
## <chr> <dbl> <dbl>
## 1 ARIMA -777. -757.
## 2 ETS -166. -108.
## 3 SARIMA -779. -762.
# Plot AIC comparison
ggplot(model_comparison, aes(x = Model, y = AIC, fill = Model)) +
geom_bar(stat = "identity", width = 0.5) +
labs(title = "AIC Comparison: ARIMA vs ETS vs SARIMA",
y = "AIC Value", x = "Model") +
theme_minimal()
# Extract residuals for ARIMA model
res_arima <- augment(fit_arima)
# Residual diagnostics for ARIMA
gg_tsresiduals(fit_arima)
# Extract residuals for ETS model
res_ets <- augment(fit_ets)
# Residual diagnostics for ETS
gg_tsresiduals(fit_ets)
# Extract residuals for SARIMA model
res_sarima <- augment(fit_sarima)
# Residual diagnostics for SARIMA
gg_tsresiduals(fit_sarima)
# Ljung-Box test for all models
lb_arima <- res_arima |> features(.innov, ljung_box, lag = 10)
lb_ets <- res_ets |> features(.innov, ljung_box, lag = 10)
lb_sarima <- res_sarima |> features(.innov, ljung_box, lag = 10)
# Print Ljung-Box test results
lb_arima
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed) 7.22 0.705
lb_ets
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ETS(Employed) 62.8 0.00000000108
lb_sarima
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Employed ~ pdq(1, 1, 1) + PDQ(1, 1, 1, period = 12)) 7.18 0.709
# Generate forecasts for next 12 months
fc_arima <- fit_arima |> forecast(h = 12)
fc_ets <- fit_ets |> forecast(h = 12)
fc_sarima <- fit_sarima |> forecast(h = 12)
# Plot forecasts
autoplot(leisure, Employed) +
autolayer(fc_arima, color = "blue") +
autolayer(fc_ets, color = "red") +
autolayer(fc_sarima, color = "green") +
labs(title = "12-Month Forecast: ARIMA vs ETS vs SARIMA",
y = "Number of People (Millions)") +
theme_minimal()
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
# Compare results
if (model_comparison$AIC[model_comparison$Model == "ARIMA"] <
model_comparison$AIC[model_comparison$Model == "ETS"] &
model_comparison$AIC[model_comparison$Model == "SARIMA"]) {
print("ARIMA has the best fit based on AIC")
} else if (model_comparison$AIC[model_comparison$Model == "SARIMA"] <
model_comparison$AIC[model_comparison$Model == "ETS"]) {
print("SARIMA has the best fit based on AIC")
} else {
print("ETS has the best fit based on AIC")
}
## [1] "ARIMA has the best fit based on AIC"
if (lb_arima$lb_pvalue > 0.05 & lb_ets$lb_pvalue > 0.05 & lb_sarima$lb_pvalue > 0.05) {
print("All models have good residuals (white noise).")
} else if (lb_sarima$lb_pvalue > 0.05) {
print("SARIMA has better residuals.")
} else {
print("Other models have better residuals.")
}
## [1] "SARIMA has better residuals."
This document helps compare ARIMA, ETS, and SARIMA models to determine the best approach for forecasting US employment trends in the Leisure & Hospitality sector.