Proyecto #3 - Análisis estadístico

Sección 8 - Forecasting strategies

Training with single training and testing partitions

library (TSstudio)

## Warning: package 'TSstudio' was built under R version 4.3.3

data (USgas)
ts_info(USgas)

##  The USgas series is a ts object with 1 variable and 238 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 10

train <- window(USgas,
                start = time (USgas)[1],
                end = time(USgas)[length(USgas) - 12])

test <- window (USgas,
                start = time(USgas)[length(USgas) - 12 + 1],
                end = time(USgas)[length(USgas)])

ts_info(train)

##  The train series is a ts object with 1 variable and 226 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2018 10

ts_info(test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 11 
##  End time: 2019 10

USgas_partitions <- ts_split(USgas, sample.out = 12)
train <- USgas_partitions$train
test <- USgas_partitions$test

ts_info(train)

##  The train series is a ts object with 1 variable and 226 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2018 10

ts_info (test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 11 
##  End time: 2019 10

Residual analysis

library (forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

md <- auto.arima(train)
checkresiduals(md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(2,1,1)[12]
## Q* = 24.95, df = 18, p-value = 0.1263
## 
## Model df: 6.   Total lags used: 24

Scoring the forecast

fc <- forecast(md, h=12)

accuracy(fc, test)

##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  5.844136  97.81626 73.42657 0.1170672 3.522348 0.6376860
## Test set     37.847885 103.22848 81.46603 1.3107987 3.261643 0.7075062
##                      ACF1 Theil's U
## Training set -0.004183172        NA
## Test set     -0.046708926 0.3404092

test_forecast(actual = USgas, 
              forecast.obj = fc, 
              test = test)

Forecast benchmark

library (forecast)

naive_model <- naive(train, h = 12)
test_forecast(actual = USgas,
              forecast.obj = naive_model,
              test = test)

accuracy (naive_model, test)

##                      ME     RMSE      MAE        MPE     MAPE     MASE
## Training set  -1.028444 285.6607 228.5084 -0.9218463 10.97123 1.984522
## Test set     301.891667 499.6914 379.1417  9.6798015 13.28187 3.292723
##                   ACF1 Theil's U
## Training set 0.3761105        NA
## Test set     0.7002486  1.499679

snaive_model <- snaive(train, h = 12)
test_forecast(actual =USgas,
              forecast.obj = snaive_model,
              test = test)

accuracy(snaive_model, test)

##                    ME     RMSE      MAE      MPE     MAPE     MASE       ACF1
## Training set 33.99953 148.7049 115.1453 1.379869 5.494048 1.000000  0.4859501
## Test set     96.45000 164.6967 135.8833 3.612060 5.220458 1.180103 -0.2120929
##              Theil's U
## Training set        NA
## Test set     0.4289964

Finalizing the forecast

md_final <- auto.arima(USgas)
fc_final <- forecast (md_final, h=12)

plot_forecast(fc_final,
              title = "The US Natural Gas Consumption Forecast",
              Xtitle = "year",
              Ytitle = "Billion Cubic Feet")

Confidence interval

fc_final2 <- forecast(md_final,
                      h = 60,
                      level = c(80, 90))

plot_forecast(fc_final2,
              title = "The US Natural Gas Counsumption Forecast",
              Xtitle = "Year",
              Ytitle = "Billion Cubic Feet")

Simulation

fc_final3 <- forecast_sim(model = md_final,
                          h = 60,
                          n = 500)

library(plotly)

## Warning: package 'plotly' was built under R version 4.3.3

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

fc_final3$plot %>%
  layout(title = "US Natural Gas Consumption - Forecasting Simulation",
         yaxis = list(title = "Billion Cubic Feet"),
         xaxis = list(title = "Year"))

Horse race approach

set.seed(1234)

methods <- list(ets1 = list(method = "ets",
                            method_arg = list(opt.crit = "lik"),
                            notes = "ETS model with opt.crit = lik"),
                ets2 = list(method = "ets",
                            method_arg = list(opt.crit = "amse"),
                            notes = "ETS model with opt.crit = amse"),
                arima1 = list(method = "arima",
                              method_arg = list(order = c(2,1,0)),
                              notes = "ARIMA(2,1,0)"),
                arima2 = list(method = "arima",
                              method_arg = list(order = c(2,1,2),
                                                seasonal = list(order = c(1,1,1))),
                              notes = "SARIMA(2,1,2)(1,1,1)"),
                hw = list(method = "HoltWinters",
                          method_arg = NULL,
                          notes = "HoltWinters Model"),
                tslm = list(method = "tslm",
                            method_arg = list(formula = input ~ trend + season),
                            notes = "tslm model with trend and seasonal components"))
# Training the models with backtesting
md <- train_model(input = USgas,
                  methods = methods,
                  train_method = list(partitions = 6, 
                                      sample.out = 12, 
                                      space = 3),
                  horizon = 12,
                  error = "MAPE")

## # A tibble: 6 × 7
##   model_id model   notes avg_mape avg_rmse `avg_coverage_80%` `avg_coverage_95%`
##   <chr>    <chr>   <chr>    <dbl>    <dbl>              <dbl>              <dbl>
## 1 hw       HoltWi… Holt…   0.0482     144.              0.792              0.931
## 2 ets1     ets     ETS …   0.0526     156.              0.833              0.972
## 3 arima2   arima   SARI…   0.0546     163.              0.583              0.819
## 4 ets2     ets     ETS …   0.0650     185.              0.5                0.792
## 5 tslm     tslm    tslm…   0.0854     242.              0.319              0.611
## 6 arima1   arima   ARIM…   0.163      539.              0.861              0.958

library(forecast)
plot_error(md)

PROYECTO 3

Alejandra Castillo

2024-04-22

Proyecto #3 - Análisis estadístico

Sección 8 - Forecasting strategies

Training with single training and testing partitions

Residual analysis

Scoring the forecast

Forecast benchmark

Finalizing the forecast

Confidence interval

Simulation

Horse race approach