# Load libraries
library(fpp3)
library(dplyr)
library(imputeTS)
library(stringr)
library(fable)
library(tibble)
library(ggplot2)
Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman. Please submit both the link to your Rpubs and the .pdf file with your run code
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ 0 and generate forecasts for the next four months.
Compute a 95% prediction interval for the first forecast using ^y±1.96 s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
# Load pig values from data
pigs_victoria <- aus_livestock |>
filter(Animal == "Pigs", State == "Victoria") |>
as_tsibble(index = Month) |>
select(Month, Count)
# Plot
autoplot(pigs_victoria, Count) +
labs(title = "Number of Pigs Slaughtered in Victoria",
y = "Count",
x = "Month") +
theme_minimal()
## Series: Count
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.3579401
## gamma = 0.0001000139
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -579.8107 1215.464 -2827.091 1739.465 6441.989
##
## sigma^2: 60742898
##
## AIC AICc BIC
## 13545.38 13546.26 13610.24
# Forecast for the next 4 months
pig_forecast <- pig_model |>
forecast(h = '4 months')
# Calculate lower & upper bounds of first forecast
# Extract the first forecast row
first_forecast <- pig_forecast |>
filter(row_number() == 1)
# Extract the mean value from the first forecast row
mean_value <- first_forecast |>
pull(.mean)
# Extract the variance from the 'Count' distribution in the first forecast row
count_distribution <- first_forecast |>
pull(Count)
# Parse the distribution to extract the variance
variance <- as.numeric(str_extract(as.character(count_distribution), "(?<=, )[0-9\\.e\\+]+"))
# Calculate the standard deviation from the variance
std_dev <- sqrt(variance)
# Compute the 95% prediction interval manually for the first forecast
lower_bound <- mean_value - 1.96 * std_dev
upper_bound <- mean_value + 1.96 * std_dev
# Create a data frame for the manual prediction interval for the first forecast
manual_interval_df <- tibble(
Month = first_forecast$Month,
lower = lower_bound,
upper = upper_bound)
# Display the upper and lower interval value
manual_interval_df
## # A tibble: 1 × 3
## Month lower upper
## <mth> <dbl> <dbl>
## 1 2019 Jan 69117. 99733.
# Filter pigs_victoria to show only the previous 24 months
pigs_victoria_last_24 <- pigs_victoria |>
filter(Month >= yearmonth(max(Month)) - 23)
# Plot the forecast along with the last 24 months with lower & upper interval
autoplot(pig_forecast, pigs_victoria_last_24) +
geom_ribbon(
data = pig_forecast |> filter(row_number() == 1),
aes(x = Month, ymin = lower_bound, ymax = upper_bound),
fill = "red",
alpha = 0.2
) +
geom_errorbar(
data = manual_interval_df,
aes(x = Month, ymin = lower, ymax = upper),
width = 10,
color = "red"
) +
labs(
title = "Forecast of Pigs Slaughtered in Victoria: Next 4 Months\n(with (Upper & Lower Bounds of First Forecast)",
y = "Count",
x = "Month"
) +
theme_minimal()
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
Plot the Exports series and discuss the main features of the data.
Use an ETS(A,N,N) model to forecast the series, and plot the forecasts. Compute the RMSE values for the training data. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.)
While both the ETS(ANN) & ETS(AAN) models appear to model error/variance and have similar prediction intervals, the AAN model also models the upward direction of the trend of the data. The addition of the Additive trend improved (lowered) RSME from ANN RMSE = 0.627 to AAN RSMRE = 0.615. However, the ANN model had a slightly better AICc value indicating an overall better fit of the data.
The Auto ETS function identified an ETS(M,N,N) model which has similar RMSE as ETS(A,N,N) but lower AICc values than both ANN and AAN models – indicating MNN may be a better fit model. Based on this, I also modeled ETS(M,A,N). This resulted in a model that had a lower RSME similar to the ETS(A,A,N) model and lower AICc than both AAN and ANN. Given the ETS(M,A,N) combination of better AICc: 177.1747 and lower RSME: 0.6173608, it would be the best model for forecasting.
Discuss the merits of the two forecasting methods for this data set.
Compare the forecasts from both methods. Which do you think is best?
Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
# Filter USA Data
USA_data <- global_economy |>
filter(Code == "USA") |>
filter(!is.na(Exports)) |>
as_tsibble(index = Year)
# Plot the Exports series
USA_data |>
autoplot(Exports) +
ggtitle("Exports of USA") +
ylab("Exports") +
xlab("Year") +
theme_minimal()
# Fit an ETS(A,N,N) model
ets_ann <- USA_data |>
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
# Forecast using the fitted ETS(A,N,N) model
ets_ann_forecasts <- ets_ann |>
forecast(h = "10 years")
# Extract the first forecast row
first_forecast <- ets_ann_forecasts |>
filter(row_number() == 1)
# Extract the mean value from the first forecast row
mean_value <- first_forecast |>
pull(.mean)
# Extract the distribution
exports_distribution <- first_forecast |>
pull(Exports)
# Extract the variance
variance <- as.numeric(str_extract(as.character(exports_distribution), "(?<=, )[0-9\\.e\\+]+"))
# Calculate the standard deviation from the variance
std_dev <- sqrt(variance)
# Compute the 95% prediction interval
lower_bound <- mean_value - 1.96 * std_dev
upper_bound <- mean_value + 1.96 * std_dev
# Create a data frame for the manual prediction interval for the first forecast
manual_interval_df <- tibble(
Year = first_forecast$Year,
lower = lower_bound,
upper = upper_bound)
# Plot the forecasts with manual interval included
ets_ann_forecasts |>
autoplot(USA_data) +
geom_ribbon(
data = first_forecast,
aes(x = Year, ymin = lower_bound, ymax = upper_bound),
fill = "red",
alpha = 0.2
) +
geom_errorbar(
data = manual_interval_df,
aes(x = Year, ymin = lower, ymax = upper),
width = 0.1,
color = "red"
) +
ggtitle("ETS(A,N,N) Forecast of USA Exports\n(With Manual Prediction Interval for First Forecast)") +
ylab("Exports") +
xlab("Year") +
theme_minimal()
# Plot the fitted ETS(A,N,N) model
ets_ann |>
gg_tsresiduals() +
ggtitle("ETS(A,N,N) Model of USA Exports") +
ylab("Residuals") +
xlab("Year")
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"N\"))" 0.627
# Fit an ETS(A,A,N) model to the Exports series
ets_aan <- USA_data |>
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
# Forecast the series using the fitted ETS(A,A,N) model
ets_aan_forecasts <- ets_aan |>
forecast(h = "10 years")
# Extract the first forecast row
first_forecast_aan <- ets_aan_forecasts |>
filter(row_number() == 1)
# Extract the mean value from the first forecast row
mean_value_aan <- first_forecast_aan |>
pull(.mean)
# Extract the distribution
exports_distribution_aan <- first_forecast_aan |>
pull(Exports)
# Extract the variance
variance_aan <- as.numeric(str_extract(as.character(exports_distribution_aan), "(?<=, )[0-9\\.e\\+]+"))
# Calculate the standard deviation from the variance
std_dev_aan <- sqrt(variance_aan)
# Compute the 95% prediction interval
lower_bound_aan <- mean_value_aan - 1.96 * std_dev_aan
upper_bound_aan <- mean_value_aan + 1.96 * std_dev_aan
# Create a data frame for the manual prediction interval for the first forecast
manual_interval_df_aan <- tibble(
Year = first_forecast_aan$Year,
lower = lower_bound_aan,
upper = upper_bound_aan)
# Plot the forecasts with manual interval included
ets_aan_forecasts |>
autoplot(USA_data) +
geom_ribbon(
data = first_forecast_aan,
aes(x = Year, ymin = lower_bound_aan, ymax = upper_bound_aan),
fill = "red",
alpha = 0.2
) +
geom_errorbar(
data = manual_interval_df_aan,
aes(x = Year, ymin = lower, ymax = upper),
width = 0.1,
color = "red"
) +
ggtitle("ETS(A,A,N) Forecast of USA Exports\n(With Manual Prediction Interval for First Forecast)") +
ylab("Exports") +
xlab("Year") +
theme_minimal()
# Plot the fitted ETS(A,A,N) model
ets_aan |>
gg_tsresiduals() +
ggtitle("ETS(A,A,N) Model of USA Exports") +
ylab("Residuals") +
xlab("Year")
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"N\"))" 0.615
# Fit an ETS(M,N,N) model to the Exports series
ets_mnn <- USA_data |>
model(ETS(Exports ~ error("M") + trend("N") + season("N")))
# Forecast the series using the fitted ETS(MNN) model
ets_mnnforecasts <- ets_mnn |> forecast(h = "10 years")
# Plot the forecasts
ets_mnnforecasts |>
autoplot(USA_data) +
ggtitle("ETS(MNN) Forecast of USA Exports") +
ylab("Exports") +
xlab("Year")
# Plot the fitted ETS(M,N,N) model
ets_mnn |>
gg_tsresiduals() +
ggtitle("ETS(MNN) Model of USA Exports") +
ylab("Exports") +
xlab("Year")
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 "ETS(Exports ~ error(\"M\") + trend(\"N\") + season(\"N\"))" 0.626
# Fit an ETS(M,A,N) model to the Exports series
ets_man <- USA_data |>
model(ETS(Exports ~ error("M") + trend("A") + season("N")))
# Forecast the series using the fitted ETS(MNN) model
ets_manforecasts <- ets_man |> forecast(h = "10 years")
# Plot the forecasts
ets_manforecasts |>
autoplot(USA_data) +
ggtitle("ETS(MAN) Forecast of USA Exports") +
ylab("Exports") +
xlab("Year")
# Plot the fitted ETS(M,A,N) model
ets_man |>
gg_tsresiduals() +
ggtitle("ETS(MAN) Model of USA Exports") +
ylab("Exports") +
xlab("Year")
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 "ETS(Exports ~ error(\"M\") + trend(\"A\") + season(\"N\"))" 0.617
# Fit different ETS models
usa_exports_model <- USA_data |>
model(
auto_ets = ETS(Exports),
ann = ETS(Exports ~ error("A") + trend("N") + season("N")),
aan = ETS(Exports ~ error("A") + trend("A") + season("N")),
man = ETS(Exports ~ error("M") + trend("A") + season("N")))
# Compare AICc and BIC
model_comparison <- glance(usa_exports_model) |> select(.model, AICc, BIC)
print(model_comparison)
## # A tibble: 4 × 3
## .model AICc BIC
## <chr> <dbl> <dbl>
## 1 auto_ets 176. 182.
## 2 ann 184. 189.
## 3 aan 186. 195.
## 4 man 177. 186.
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
# Extract the Chinese GDP data from global_economy dataset
data("global_economy")
china_gdp <- global_economy |>
filter(Country == "China") |>
select(Year, GDP) |>
drop_na()
# Fit ETS models with various configurations
# Basic ETS model without damped trend
ets_basic <- china_gdp |>
model(ETS(GDP))
# ETS model with damped trend
ets_damped <- china_gdp |>
model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))
# ETS model with multiplicative trend
ets_mult_trend <- china_gdp |>
model(ETS(GDP ~ error("A") + trend("M") + season("N")))
# ETS model with additive trend and no seasonality
ets_add_trend <- china_gdp |>
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
# Forecast for a relatively large horizon (h = 30)
h <- 30
# Generate forecasts
forecast_basic <- ets_basic |> forecast(h = h)
forecast_damped <- ets_damped |> forecast(h = h)
forecast_mult_trend <- ets_mult_trend |> forecast(h = h)
forecast_add_trend <- ets_add_trend |> forecast(h = h)
# Plot the forecasts
# Basic ETS forecast (ETS(A,N,N))
autoplot(forecast_basic, china_gdp) +
ggtitle("Forecast of Chinese GDP - ETS(A,N,N)") +
xlab("Year") +
ylab("GDP (in trillions)") +
scale_y_continuous(labels = scales::label_number(scale = 1e-12))
# ETS forecast with damped trend (ETS(A,Ad,N))
autoplot(forecast_damped, china_gdp) +
ggtitle("Forecast of Chinese GDP - ETS(A,Ad,N)") +
xlab("Year") +
ylab("GDP (in trillions)") +
scale_y_continuous(labels = scales::label_number(scale = 1e-12))
# ETS forecast with multiplicative trend (ETS(A,M,N))
autoplot(forecast_mult_trend, china_gdp) +
ggtitle("Forecast of Chinese GDP - ETS(A,M,N)") +
xlab("Year") +
ylab("GDP (in trillions)") +
scale_y_continuous(labels = scales::label_number(scale = 1e-12))
# ETS forecast with additive trend (ETS(A,A,N))
autoplot(forecast_add_trend, china_gdp) +
ggtitle("Forecast of Chinese GDP - ETS(A,A,N)") +
xlab("Year") +
ylab("GDP (in trillions)") +
scale_y_continuous(labels = scales::label_number(scale = 1e-12))
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
# Filter the Gas data and drop missing values
Gas <- aus_production |>
filter(!is.na(Gas)) |>
drop_na()
# Extend the time series to ensure no gaps in future periods
Gas_full <- Gas |>
fill_gaps()
# Fit ETS models to the Gas data
Gas_models <- Gas_full |>
model(
MAM = ETS(Gas), # ETS(M, A, M)
MAA = ETS(Gas ~ error("M") + trend("A") + season("A")),
MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M")))
# Forecasts
Gas_forecasts <- Gas_models |>
forecast(h = 48)
# Plot ETS(M, A, M) model
Gas_forecasts |>
filter(.model == "MAM") |>
autoplot(Gas, level = NULL) +
ggtitle("ETS(M,A,M) Model Forecast for Gas Production") +
xlab("Year") +
ylab("Gas Production")
# Plot ETS(M, A, A) model
Gas_forecasts |>
filter(.model == "MAA") |>
autoplot(Gas, level = NULL) +
ggtitle("ETS(M,A,A) Model Forecast for Gas Production") +
xlab("Year") +
ylab("Gas Production")
# Plot ETS(M, Ad, M) model
Gas_forecasts |>
filter(.model == "MAdM") |>
autoplot(Gas, level = NULL) +
ggtitle("ETS(M, Ad, M) Model with Damped Trend for Gas Production") +
xlab("Year") +
ylab("Gas Production")
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6684818
## beta = 0.1455337
## gamma = 0.08239294
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.89476 0.0122951 0.936404 1.180338 1.064744 0.8185143
##
## sigma^2: 0.0035
##
## AIC AICc BIC
## 1425.183 1426.162 1454.594
## Series: Gas
## Model: ETS(M,A,A)
## Smoothing parameters:
## alpha = 0.03488527
## beta = 0.03488346
## gamma = 0.9651147
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 7.519692 0.1119759 -3.810601 13.2409 4.596753 -14.02705
##
## sigma^2: 0.0344
##
## AIC AICc BIC
## 1865.076 1866.055 1894.487
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6517919
## beta = 0.1543681
## gamma = 0.06972918
## phi = 0.9799999
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.823328 0.07058923 0.9333524 1.182856 1.067906 0.8158849
##
## sigma^2: 0.0036
##
## AIC AICc BIC
## 1427.801 1429.003 1460.480
# Compare models
model_comparison <- glance(Gas_models) |> select(.model, AICc, BIC)
print(model_comparison)
## # A tibble: 3 × 3
## .model AICc BIC
## <chr> <dbl> <dbl>
## 1 MAM 1426. 1455.
## 2 MAA 1866. 1894.
## 3 MAdM 1429. 1460.
Recall your retail time series data (from Exercise 7 in Section 2.10).
Why is multiplicative seasonality necessary for this series?
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
Check that the residuals from the best method look like white noise.
Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
# Set seed
set.seed(123456789)
# Select a random time series
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# Plot time series
autoplot(myseries, Turnover) +
labs(
title = "Time Series of Turnover",
y = "Turnover",
x = "Time"
) +
theme_minimal()
# Apply Holt-Winters' multiplicative method with and without damped trend
models <- myseries |>
model(
ETS_MAM = ETS(Turnover ~ error("M") + trend("A") + season("M")),
ETS_MAdM = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
# Report of the non-damped model
models |>
select(ETS_MAM) |>
report()
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6866315
## beta = 0.0001001499
## gamma = 0.1747648
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 2.283613 0.1052538 0.9416145 0.8915875 0.9671535 1.728275 1.028746 0.9616338
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9154267 0.942537 0.8551027 0.8178865 1.025802 0.9242344
##
## sigma^2: 0.0086
##
## AIC AICc BIC
## 2898.597 2900.044 2968.111
## Series: Turnover
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6899822
## beta = 0.0001428611
## gamma = 0.1784481
## phi = 0.9799353
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 2.170932 0.07914981 0.946957 0.8602352 0.9549359 1.918604 1.06894 0.9966251
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.8953939 0.8932047 0.8183433 0.7806524 0.9742733 0.8918354
##
## sigma^2: 0.009
##
## AIC AICc BIC
## 2911.433 2913.054 2985.036
# Calculate RMSE for both models
accuracy_results <- models |>
accuracy() |>
select(.model, RMSE)
# Plot residuals for ETS_MAM
models |>
select(ETS_MAM) |>
gg_tsresiduals(lag_max = 24) +
labs(title = "Residual for ETS(M,A,M)")
# Plot residuals for ETS_MAdM models
models |>
select(ETS_MAdM) |>
gg_tsresiduals(lag_max = 24) +
labs(title = "Residual for ETS(M,Ad,M)")
# Perform Ljung-Box test on ETS(MAM)
ljung_box_mam <- models |>
select(ETS_MAM) |>
augment() |>
features(.resid, ljung_box, lag = 24, dof = 4) |>
select(lb_stat, lb_pvalue)
# Perform Ljung-Box test on ETS(MAdM)
ljung_box_madm <- models |>
select(ETS_MAdM) |>
augment() |>
features(.resid, ljung_box, lag = 24, dof = 5) |>
select(lb_stat, lb_pvalue)
# Combine Ljung-Box results
combined_results <- bind_rows(
accuracy_results |> filter(.model == "ETS_MAM") |> bind_cols(ljung_box_mam),
accuracy_results |> filter(.model == "ETS_MAdM") |> bind_cols(ljung_box_madm))
# Print the combined table
print(combined_results)
## # A tibble: 2 × 4
## .model RMSE lb_stat lb_pvalue
## <chr> <dbl> <dbl> <dbl>
## 1 ETS_MAM 1.36 66.9 0.000000571
## 2 ETS_MAdM 1.35 60.5 0.00000320
# Create training and test sets
train_data <- myseries |>
filter(year(Month) <= 2010)
test_data <- myseries |>
filter(year(Month) > 2010)
# Train both ETS_MAM and ETS_MAdM models on the training set
hw_models <- train_data |>
model(
ETS_MAM = ETS(Turnover ~ error("M") + trend("A") + season("M")),
ETS_MAdM = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# Generate forecasts for test set
test_horizon <- nrow(test_data)
hw_forecasts <- hw_models |>
forecast(h = test_horizon)
# Plot Training + Test + ETS_MAM Forecast
plot_ets_mam <- ggplot() +
geom_line(data = train_data, aes(x = Month, y = Turnover, color = "Training Data")) +
geom_line(data = test_data, aes(x = Month, y = Turnover, color = "Test Data")) +
geom_line(data = hw_forecasts |> filter(.model == "ETS_MAM"), aes(x = Month, y = .mean, color = "ETS MAM Forecast")) +
labs(
title = "Turnover with ETS(M,A,M) Forecast",
y = "Turnover",
x = "Time",
color = "Legend"
) +
theme_minimal() +
scale_color_manual(
values = c("Training Data" = "blue", "Test Data" = "green", "ETS MAM Forecast" = "red"))
# Plot Training + Test + ETS_MAdM Forecast
plot_ets_madm <- ggplot() +
geom_line(data = train_data, aes(x = Month, y = Turnover, color = "Training Data")) +
geom_line(data = test_data, aes(x = Month, y = Turnover, color = "Test Data")) +
geom_line(data = hw_forecasts |> filter(.model == "ETS_MAdM"), aes(x = Month, y = .mean, color = "ETS MAdM Forecast")) +
labs(
title = "Turnover with ETS(M,Ad,M) Forecast",
y = "Turnover",
x = "Time",
color = "Legend"
) +
theme_minimal() +
scale_color_manual(
values = c("Training Data" = "blue", "Test Data" = "green", "ETS MAdM Forecast" = "purple"))
# Display plots
print(plot_ets_mam)
# Calculate accuracy metrics for both ETS models
accuracy_metrics <- hw_forecasts |>
accuracy(test_data)
# Seasonal Naive Model
# Fit the seasonal naive model on the training set
fit_snaive <- train_data |>
model(snaive = SNAIVE(Turnover))
# Generate forecasts for the test set
fc_snaive <- fit_snaive |>
forecast(h = test_horizon)
# Plot Training + Test + Seasonal Naive Forecast
plot_snaive <- ggplot() +
geom_line(data = train_data, aes(x = Month, y = Turnover, color = "Training Data")) +
geom_line(data = test_data, aes(x = Month, y = Turnover, color = "Test Data")) +
geom_line(data = fc_snaive, aes(x = Month, y = .mean, color = "SNaive Forecast")) +
labs(
title = "Turnover with Seasonal Naive Forecast",
y = "Turnover",
x = "Time",
color = "Legend"
) +
theme_minimal() +
scale_color_manual(
values = c("Training Data" = "blue", "Test Data" = "green", "SNaive Forecast" = "orange"))
# Display the Seasonal Naive plot
print(plot_snaive)
# Calculate accuracy metrics for the seasonal naive model
snaive_accuracy <- fc_snaive |>
accuracy(test_data)
# Combine accuracy metrics for all models
combined_accuracy <- bind_rows(accuracy_metrics, snaive_accuracy)
# Extract and print RMSE for all models
combined_accuracy |>
select(.model, RMSE) |>
print()
## # A tibble: 3 × 2
## .model RMSE
## <chr> <dbl>
## 1 ETS_MAM 4.28
## 2 ETS_MAdM 5.54
## 3 snaive 5.47
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
# Perform STL decomposition on the Box-Cox series
lambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
stl_decomp <- myseries |>
model(
STL(box_cox(Turnover, lambda) ~ season(window = "periodic")))
# Extract the components from the STL decomposition
stl_components <- stl_decomp |>
components()
# Calculate the seasonally adjusted series
myseries_sa <- stl_components |>
as_tsibble() |>
mutate(season_adjusted = remainder + trend)
# Fit an ETS model to the seasonally adjusted data
ets_model <- myseries_sa |>
model(
ets_fit = ETS(season_adjusted))
# Remove duplicate `.model` column
ets_model <- ets_model |>
select(-.model)
# Forecast using the ETS model
forecast_ets <- ets_model |>
forecast(h = "12 months")
# Transform the original series using Box-Cox transformation
myseries_transformed <- myseries |>
mutate(Turnover_transformed = box_cox(Turnover, lambda))
# Plot the STL decomposition components
autoplot(stl_components) +
labs(title = "STL Decomposition of Box-Cox Transformed Series") +
theme_minimal()
# Plot the forecast on the Box-Cox transformed scale
autoplot(forecast_ets) +
autolayer(myseries_transformed, Turnover_transformed, color = "blue", alpha = 0.7) +
labs(title = "ETS Forecast on Box-Cox Transformed Series",
y = "Transformed Turnover") +
guides(color = guide_legend(title = "Series")) +
theme_minimal()
## Series: season_adjusted
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.7613169
## gamma = 0.1176666
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 1.239864 -0.08441698 0.04140901 -0.07472382 0.1006675 -0.238579 -0.1876896
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## -0.04245136 0.07099196 0.1373728 0.0864524 0.1807392 0.01022783
##
## sigma^2: 0.0605
##
## AIC AICc BIC
## 1464.287 1465.416 1525.623
# Calculate the accuracy of the ETS model
ets_accuracy <- ets_model |>
accuracy()
# Display RMSE
ets_accuracy |>
select(.model, RMSE)
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 ets_fit 0.242