CH8

R Markdown

library(TSstudio)
data(USgas)
ts_info(USgas)

##  The USgas series is a ts object with 1 variable and 235 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2019 7

# Using the series time index to set the start and end point of each partiton
train <- window(USgas,start = time(USgas)[1],end = time(USgas)[length(USgas) - 12])
test <- window(USgas,start = time(USgas)[length(USgas) - 12 + 1], end = time(USgas)[length(USgas)])

ts_info(train)

##  The train series is a ts object with 1 variable and 223 observations
##  Frequency: 12 
##  Start time: 2000 1 
##  End time: 2018 7

ts_info(test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 8 
##  End time: 2019 7

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

md <- auto.arima(train)
checkresiduals(md)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,2)(1,1,1)[12] with drift
## Q* = 32.18, df = 17, p-value = 0.01429
## 
## Model df: 7.   Total lags used: 24

fc <- forecast(md, h = 12)
# score the model's performance with respect to the actual values in the testing partition:
accuracy(fc, test)

##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  -4.776519 100.1783  74.7367 -0.4713371 3.592793 0.6594341
## Test set     124.548652 155.4699 131.1530  4.7905509 5.093099 1.1572192
##                     ACF1 Theil's U
## Training set -0.01466532        NA
## Test set     -0.14371060 0.5321727

# An alternative approach to evaluating the fit of the model on both the training and testing
library(TSstudio)
test_forecast(actual = USgas, forecast.obj = fc, test = test)

library(forecast) 
naive_model <- naive(train, h  = 12)

#“continue with the last value to infinity”
# there is no training process
#library(TSstudio)
#test_forecast(actual = USgas, forecast.obj = naive_model, test = test)
accuracy(naive_model, test)

##                       ME     RMSE      MAE        MPE     MAPE     MASE
## Training set  -0.7391892 286.8864 229.7464 -0.9185855 11.04124 2.027151
## Test set     190.1416667 451.1656 347.4083  5.2236269 12.54919 3.065333
##                   ACF1 Theil's U
## Training set 0.3777297        NA
## Test set     0.6792975   1.34002

# Since USgas has a strong seasonal pattern, it would make sense to use a seasonal naive model 
# “uses the last seasonal point as a forecast of all of the corresponding seasonal observations.
snaive_model <- snaive(train, h = 12)
library(TSstudio)
test_forecast(actual = USgas, forecast.obj = snaive_model, test = test)

accuracy(snaive_model, test)

##                     ME     RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set  30.83697 146.9012 113.3346 1.241319 5.421821 1.000000 0.4660561
## Test set     116.10833 184.8008 159.5250 4.622040 6.396391 1.407558 0.1180151
##              Theil's U
## Training set        NA
## Test set     0.5977902

md_final <- auto.arima(USgas)

fc_final <- forecast(md_final, h = 12)
plot_forecast(fc_final, title = "The US Natural Gas Consumption Forecast", 
              Xtitle = "Year", Ytitle = "Billion Cubic Feet")

By default, the forecast function generates a prediction interval with a level of confidence of 80% and 95%, but you can modify it using the level argument.

fc_final2 <- forecast(md_final,   h = 60,  level = c(80, 90))
plot_forecast(fc_final2,
              title = "The US Natural Gas Consumption Forecast",
              Xtitle = "Year", Ytitle = "Billion Cubic Feet")

fc_final3 <- forecast_sim(model = md_final, h = 60, n = 500)  
library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

fc_final3$plot

#%>% 
 # layout(title = "US Natural Gas Consumption - Forecasting Simulation",
  #       yaxis = list(title = "Billion Cubic Feet"), xaxis = list(title = "Year"))

The horse race approach is based on training, testing, and evaluating multiple forecasting models and selecting the model that performs the best on the testing partitions.

The ts_backtesting function from the TSstudio package conducts the full process of training, testing, evaluating, and then forecasting, using the model that performed the best on the backtesting testing partitions. By default, the model will test the following models:

auto.arima: Automated ARIMA model bsts: Bayesian structural time series model ets: Exponential smoothing state space model hybrid: An ensemble of multiple models nnetar: Neural network time series model tbats: Exponential smoothing state space model, along with Box-Cox transformation, trend, ARMA errors, and seasonal components HoltWinters: Holt-Winters filtering

#install.packages("bsts")
#remove.packages("TSstudio")
#remove.packages("bsts")
#utils::install.packages("https://cran.r-project.org/src/contrib/Archive/TSstudio/TSstudio_0.1.5.tar.gz", repos=NULL, type = "source")
#utils::install.packages("TSstudio")

library(TSstudio)
set.seed(1234)
USgas_forecast <- ts_backtesting(ts.obj = USgas,
                                 periods = 6,
                                 models = "abehntw", error = "MAPE",
                                 window_size = 12, h = 60, plot = FALSE)

## Warning: The 'ts_backtesting' function is deprecated, please use 'train_model'
## instead

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

## Fitting the auto.arima model

## Fitting the ets model

## Fitting the thetam model

## Fitting the nnetar model

## Fitting the stlm model

## Fitting the tbats model

##    Model_Name  avgMAPE    sdMAPE  avgRMSE   sdRMSE
## 1 HoltWinters 3.970000 0.3812086 122.5367 12.70447
## 2        bsts 4.041667 0.6532202 122.7783 19.40691
## 3         ets 5.308333 2.3387725 153.5767 56.16444
## 4      hybrid 5.406667 1.6948235 154.0683 38.22836
## 5  auto.arima 5.838333 0.6636088 172.8567 11.43843
## 6       tbats 6.528333 1.4542409 186.5267 30.79871
## 7      nnetar 8.073333 2.3695963 232.5483 63.14979

USgas_forecast$summary_plot