Training the Model
# I am first fitting my model with corss-validation to the entire data set to
# see the accuracy of the residuals.
data %>%
stretch_tsibble(.init = 10) %>%
model(
SES = ETS(Price ~ error("A") + trend("N") + season("N")),
Holt = ETS(Price ~ error("A") + trend("A") + season("N")),
Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")),
) %>%
forecast(h = 4) %>%
accuracy(data)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 4 observations are missing between 2022 Oct and 2023 Jan
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Test 0.0344 0.239 0.166 0.619 3.46 0.482 0.481 0.755
## 2 Holt Test 0.00617 0.232 0.167 -0.0486 3.53 0.484 0.468 0.759
## 3 SES Test 0.0417 0.230 0.161 0.747 3.36 0.468 0.463 0.748
# According to this measure, since the RMSE and MAE are the lowest
# for the SES method, it is most adequate.
# I am now creating a training and test set to conduct accuracy
# performances across each method.
train <- data %>%
filter(year(Month) <= 2015)
test <- data %>%
filter(year(Month) > 2015)
recent <- data %>%
filter(year(Month) >= 2000)
# A fit over my training set data using cross-validation and check
# the accuracy to choose which model is best
fit_train <- train %>%
stretch_tsibble(.init = 12) %>%
model(
SES = ETS(Price ~ error("A") + trend("N") + season("N")),
Holt = ETS(Price ~ error("A") + trend("A") + season("N")),
Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")),
)
# Now forecasting the training set
fc_train <- fit_train %>%
forecast(h = 81)
# Now checking the accuracy according to the residuals
accuracy(fit_train)
## # A tibble: 543 × 11
## .id .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 SES Training 0.0232 0.0708 0.0533 0.775 1.76 NaN NaN
## 2 1 Holt Training -0.0219 0.0627 0.0455 -0.712 1.49 NaN NaN
## 3 1 Damped Training -0.00554 0.0557 0.0356 -0.182 1.17 NaN NaN
## 4 2 SES Training 0.0188 0.0686 0.0519 0.628 1.71 0.213 0.281
## 5 2 Holt Training -0.0193 0.0602 0.0426 -0.626 1.39 0.174 0.247
## 6 2 Damped Training -0.0123 0.0536 0.0325 -0.404 1.07 0.133 0.220
## 7 3 SES Training 0.0225 0.0688 0.0532 0.749 1.76 0.243 0.312
## 8 3 Holt Training -0.0114 0.0641 0.0459 -0.365 1.50 0.210 0.291
## 9 3 Damped Training -0.00517 0.0555 0.0387 -0.177 1.27 0.177 0.252
## 10 4 SES Training 0.0273 0.0708 0.0559 0.897 1.84 0.251 0.316
## # … with 533 more rows, and 1 more variable: ACF1 <dbl>
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
# Now checking the accuracy of the prediction errors according
# to the test set.
accuracy(fc_train, recent)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Test 0.522 0.837 0.620 10.5 12.5 2.19 2.04 0.955
## 2 Holt Test 0.00667 0.864 0.591 -0.817 12.6 2.09 2.11 0.975
## 3 SES Test 0.535 0.833 0.611 10.7 12.4 2.16 2.03 0.952
# The lowest RMSE, which matters most, remains to be for the SES
# method. So it's seeming as if this method remains to be best so far.
# PLOTTING TRAINING SET FORECAST
# Now plotting the forecast of the training sets with the test
# set to graphically visualize all three methods.
# The SES method:
# The fit
fit02 <- train %>%
model(SES = ETS(Price ~ error("A") + trend("N") + season("N")))
# The forecast
fc02 <- fit02 %>%
forecast(h = 81)
# I am now graphing the fit on the training set and the forecast
# to the original data.
fc02 %>%
autoplot(recent) +
geom_line(aes(y = .fitted), color = "red", data = augment(fit02))

# Even though the fit looks nice, since there is an increasing trend,
# it doesn't seem appropriate. However, I am going to rely on the RMSE.
# Holt's linear method:
fit03 <- train %>%
model(Holt = ETS(Price ~ error("A") + trend("A") + season("N")))
fc03 <- fit03 %>%
forecast(h = 81)
fc03 %>%
autoplot(recent) +
geom_line(aes(y = .fitted), color = "red", data = augment(fit03))

# The training set seems to fit the data and test set pretty nicely! However,
# it had a higher RMSE than the SES method.
# Holt's Damped:
fit04 <- train %>%
model(Damped = ETS(Price ~ error("A") + trend("Ad") + season("N")))
fc04 <- fit04 %>%
forecast(h = 81)
fc04 %>%
autoplot(recent) +
geom_line(aes(y = .fitted), color = "red", data = augment(fit04))

# This is very similar to the SES method, but is damped at the very beginning
# of the forecast.
# ADDITIVE AND MULTIPLICATIVE
# I am going to create models for Holt-Winter's additive and
# multiplicative methods. I feel as if these methods will be accurate
# since the data seems to have a seasonal pattern. The additive method
# is preferred when the seasonal variations are roughly constant through
# the series, while the multiplicative method is preferred when the
# seasonal variations are changing proportional to the level of the series.
# Thus, I think multiplicative is going to be a good model.
# Fitting the methods to the data using cross validation and checking the overall
# accuracy of the time series.
data %>%
stretch_tsibble(.init = 10) %>%
model(additive = ETS(Price ~ error("A") + trend("A") + season("A")),
multiplicative = ETS(Price ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = 4) %>%
accuracy(data)
## Warning: 8 errors (2 unique) encountered for additive
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 4 observations are missing between 2022 Oct and 2023 Jan
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive Test 0.00473 0.229 0.167 -0.0757 3.53 0.485 0.462 0.758
## 2 multiplicative Test 0.00488 0.235 0.175 -0.0677 3.71 0.508 0.474 0.763
# Now, according to the entire data set, the method with the lowest RMSE overall
# is Holt-Winters' additive method. This is surprising. I guess the data has
# more of a constant seasonality than I thought.
# Now, I am fitting each method to the training set with cross-validation.
fit1 <- train %>%
stretch_tsibble(.init = 10) %>%
model(
additive = ETS(Price ~ error("A") + trend("A") + season("A")),
multiplicative = ETS(Price ~ error("M") + trend("A") + season("M"))
)
## Warning: 8 errors (2 unique) encountered for additive
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 8 errors (2 unique) encountered for multiplicative
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
# Now forecasting the training set
fc1 <- fit1 %>%
forecast(h = 81)
# Now, checking the accuracy of the forecast
accuracy(fc1, recent)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive Test -0.00320 1.16 0.692 -0.302 14.2 2.45 2.83 0.979
## 2 multiplicative Test 0.0718 1.30 0.770 1.17 15.7 2.72 3.17 0.978
# The RMSE is not lower for the training and test set compared to the
# SES method. To help make my decision, I am going to compare the graphs
# of the forecast.
fit13 <- train %>%
model(additive = ETS(Price ~ error("A") + trend("A") + season("A")))
fc13 <- fit13 %>%
forecast(h = 81) %>%
autoplot(data, level = NULL)
fc13

# The fit of the forecast seems like it could be more accurate than the
# SES method! Maybe this will be the chosen one.
# Even though Holt-Winters' additive method could be the best method chosen,
# there still exists the auto ETS method to compare:
data %>%
model(auto = ETS(Price)) %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto Training 0.000399 0.125 0.0914 -0.0627 1.99 0.265 0.252 0.240
# Expected, the RMSE is the lowest for the auto ETS method.
# Now, I am fitting the auto ETS method to my training and
# test set.
fit15 <- train %>%
model(auto = ETS(Price))
fit15
## # A mable: 1 x 1
## auto
## <model>
## 1 <ETS(M,A,A)>
# Now, the forecast:
fc15 <- fit15 %>%
forecast(h = 81)
# And so that accuracy of the prediction errors are:
accuracy(fc15, recent)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto Test -0.651 0.804 0.677 -11.7 12.1 2.40 1.96 0.929
# The RMSE is the lowest for the test set out of all methods!
# I am going to graph the fitted values and forecast now.
fc15 %>%
autoplot(recent) +
geom_line(aes(y = .fitted), color = "red", data = augment(fit15))

# The point forecast seems to not have enough seasonality, but the
# overall forecast seems to fit the test data set pretty nicely. The
# prediction interval looks very nice but slightly skewed. I honestly
# think that Holt-Winters' additive method looks best, but I will trust
# the RMSE. Therefore, the chosen method for my forecast is the auto
# ETS method.