Sección 8 - Forecasting strategies
Training with single training and testing partitions
library (TSstudio)
## Warning: package 'TSstudio' was built under R version 4.3.3
data (USgas)
ts_info(USgas)
## The USgas series is a ts object with 1 variable and 238 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2019 10
train <- window(USgas,
start = time (USgas)[1],
end = time(USgas)[length(USgas) - 12])
test <- window (USgas,
start = time(USgas)[length(USgas) - 12 + 1],
end = time(USgas)[length(USgas)])
ts_info(train)
## The train series is a ts object with 1 variable and 226 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2018 10
ts_info(test)
## The test series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2018 11
## End time: 2019 10
USgas_partitions <- ts_split(USgas, sample.out = 12)
train <- USgas_partitions$train
test <- USgas_partitions$test
ts_info(train)
## The train series is a ts object with 1 variable and 226 observations
## Frequency: 12
## Start time: 2000 1
## End time: 2018 10
ts_info (test)
## The test series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2018 11
## End time: 2019 10
Residual analysis
library (forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
md <- auto.arima(train)
checkresiduals(md)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)(2,1,1)[12]
## Q* = 24.95, df = 18, p-value = 0.1263
##
## Model df: 6. Total lags used: 24
Scoring the forecast
fc <- forecast(md, h=12)
accuracy(fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.844136 97.81626 73.42657 0.1170672 3.522348 0.6376860
## Test set 37.847885 103.22848 81.46603 1.3107987 3.261643 0.7075062
## ACF1 Theil's U
## Training set -0.004183172 NA
## Test set -0.046708926 0.3404092
test_forecast(actual = USgas,
forecast.obj = fc,
test = test)
Forecast benchmark
library (forecast)
naive_model <- naive(train, h = 12)
test_forecast(actual = USgas,
forecast.obj = naive_model,
test = test)
accuracy (naive_model, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.028444 285.6607 228.5084 -0.9218463 10.97123 1.984522
## Test set 301.891667 499.6914 379.1417 9.6798015 13.28187 3.292723
## ACF1 Theil's U
## Training set 0.3761105 NA
## Test set 0.7002486 1.499679
snaive_model <- snaive(train, h = 12)
test_forecast(actual =USgas,
forecast.obj = snaive_model,
test = test)
accuracy(snaive_model, test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 33.99953 148.7049 115.1453 1.379869 5.494048 1.000000 0.4859501
## Test set 96.45000 164.6967 135.8833 3.612060 5.220458 1.180103 -0.2120929
## Theil's U
## Training set NA
## Test set 0.4289964
Finalizing the forecast
md_final <- auto.arima(USgas)
fc_final <- forecast (md_final, h=12)
plot_forecast(fc_final,
title = "The US Natural Gas Consumption Forecast",
Xtitle = "year",
Ytitle = "Billion Cubic Feet")
Confidence interval
fc_final2 <- forecast(md_final,
h = 60,
level = c(80, 90))
plot_forecast(fc_final2,
title = "The US Natural Gas Counsumption Forecast",
Xtitle = "Year",
Ytitle = "Billion Cubic Feet")
Simulation
fc_final3 <- forecast_sim(model = md_final,
h = 60,
n = 500)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fc_final3$plot %>%
layout(title = "US Natural Gas Consumption - Forecasting Simulation",
yaxis = list(title = "Billion Cubic Feet"),
xaxis = list(title = "Year"))
Horse race approach
set.seed(1234)
methods <- list(ets1 = list(method = "ets",
method_arg = list(opt.crit = "lik"),
notes = "ETS model with opt.crit = lik"),
ets2 = list(method = "ets",
method_arg = list(opt.crit = "amse"),
notes = "ETS model with opt.crit = amse"),
arima1 = list(method = "arima",
method_arg = list(order = c(2,1,0)),
notes = "ARIMA(2,1,0)"),
arima2 = list(method = "arima",
method_arg = list(order = c(2,1,2),
seasonal = list(order = c(1,1,1))),
notes = "SARIMA(2,1,2)(1,1,1)"),
hw = list(method = "HoltWinters",
method_arg = NULL,
notes = "HoltWinters Model"),
tslm = list(method = "tslm",
method_arg = list(formula = input ~ trend + season),
notes = "tslm model with trend and seasonal components"))
# Training the models with backtesting
md <- train_model(input = USgas,
methods = methods,
train_method = list(partitions = 6,
sample.out = 12,
space = 3),
horizon = 12,
error = "MAPE")
## # A tibble: 6 × 7
## model_id model notes avg_mape avg_rmse `avg_coverage_80%` `avg_coverage_95%`
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 hw HoltWi… Holt… 0.0482 144. 0.792 0.931
## 2 ets1 ets ETS … 0.0526 156. 0.833 0.972
## 3 arima2 arima SARI… 0.0546 163. 0.583 0.819
## 4 ets2 ets ETS … 0.0650 185. 0.5 0.792
## 5 tslm tslm tslm… 0.0854 242. 0.319 0.611
## 6 arima1 arima ARIM… 0.163 539. 0.861 0.958
library(forecast)
plot_error(md)