Forecast Metalearning

This is a reproduction of the metaforecasting method outlined here: https://github.com/robjhyndman/M4metalearning/blob/master/docs/metalearning_example.md

library(M4metalearning)
library(M4comp2018)
set.seed(31-05-2018)
#we start by creating the training and test subsets
indices <- sample(length(M4))
M4_train <- M4[ indices[1:15]]
M4_test <- M4[indices[16:25]]
#we create the temporal holdout version of the training and test sets
M4_train <- temp_holdout(M4_train)
M4_test <- temp_holdout(M4_test)

#this will take time
M4_train <- calc_forecasts(M4_train, forec_methods(), n.cores=3)
#once we have the forecasts, we can calculate the errors
M4_train <- calc_errors(M4_train)

M4_train <- THA_features(M4_train)

## Loading required package: tsfeatures

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff

train_data <- create_feat_classif_problem(M4_train)
round(head(train_data$data, n=3),2)

##      x_acf1 x_acf10 diff1_acf1 diff1_acf10 diff2_acf1 diff2_acf10
## [1,]   0.75    1.08       0.15        0.35      -0.28        0.23
## [2,]   0.95    4.63       0.05        0.09      -0.49        0.45
## [3,]   0.86    5.27      -0.56        0.40      -0.72        0.66
##      seas_acf1 ARCH.LM crossing_points entropy flat_spots arch_acf
## [1,]      0.76    0.59              56    0.78          5     0.10
## [2,]      0.77    0.84               2    0.62         20     0.18
## [3,]      0.44    0.45              21    0.67          9     0.07
##      garch_acf arch_r2 garch_r2 alpha beta hurst lumpiness nonlinearity
## [1,]      0.05    0.09     0.05  1.00    0  0.99      0.08         0.04
## [2,]      0.18    0.24     0.24  1.00    0  0.99      0.02         1.33
## [3,]      0.05    0.07     0.05  0.46    0  1.00      0.04         1.25
##      x_pacf5 diff1x_pacf5 diff2x_pacf5 seas_pacf nperiods seasonal_period
## [1,]    0.67         0.16         0.40      0.19        1              12
## [2,]    0.93         0.02         0.57     -0.10        1               4
## [3,]    1.04         0.41         1.12     -0.14        1              12
##      trend spike linearity curvature e_acf1 e_acf10 seasonal_strength peak
## [1,]  0.86     0     -6.05      5.99   0.10    0.09              0.88    2
## [2,]  0.99     0      5.59      1.58  -0.19    0.31              0.10    3
## [3,]  0.89     0     10.37     -3.43  -0.13    0.09              0.08    5
##      trough stability hw_alpha hw_beta hw_gamma unitroot_kpss unitroot_pp
## [1,]      7      0.40     0.45       0        0          1.67      -62.79
## [2,]      4      1.00     0.74       0        0          0.97       -0.77
## [3,]     12      0.81     0.44       0        0          2.48      -16.59
##      series_length
## [1,]           277
## [2,]            43
## [3,]           200

round(head(train_data$errors, n=3),2)

##      auto_arima_forec ets_forec nnetar_forec tbats_forec stlm_ar_forec
## [1,]             0.57      0.56         0.60        0.52          0.60
## [2,]             1.52      1.52         1.75        1.52          0.99
## [3,]             0.39      0.39         0.58        0.45          0.69
##      rw_drift_forec thetaf_forec naive_forec snaive_forec
## [1,]           1.36         0.54        1.40         0.49
## [2,]           1.87         1.73        1.52         1.55
## [3,]           0.28         0.34        0.34         0.57

set.seed(1345) #set the seed because xgboost is random!
meta_model <- train_selection_ensemble(train_data$data, train_data$errors)

M4_test <- calc_forecasts(M4_test, forec_methods(), n.cores=1)
M4_test <- THA_features(M4_test, n.cores=1)

test_data <- create_feat_classif_problem(M4_test)
preds <- predict_selection_ensemble(meta_model, test_data$data)
head(preds)

##            [,1]        [,2]        [,3]       [,4]        [,5]        [,6]
## [1,] 0.03604758 0.017487158 0.013555620 0.09334739 0.008989784 0.010645926
## [2,] 0.03360812 0.016303741 0.012638264 0.08151126 0.008381414 0.009925479
## [3,] 0.09706341 0.047086738 0.036500496 0.14623304 0.024206312 0.028665718
## [4,] 0.01677949 0.008139953 0.006309894 0.05574440 0.004184580 0.004955484
## [5,] 0.01409681 0.006838546 0.005301075 0.03054505 0.003515554 0.004163207
## [6,] 0.08850374 0.042934329 0.033281649 0.24544565 0.022071646 0.026137793
##           [,7]        [,8]        [,9]
## [1,] 0.7891775 0.013994350 0.016754739
## [2,] 0.8089635 0.013047304 0.015620888
## [3,] 0.5374479 0.037681841 0.045114594
## [4,] 0.8895731 0.006514115 0.007799025
## [5,] 0.9235150 0.005472645 0.006552126
## [6,] 0.4661303 0.034358817 0.041136101

M4_test <- ensemble_forecast(preds, M4_test)
M4_test[[1]]$y_hat

##  [1] 5528.282 4826.940 4884.705 4389.789 4360.049 4701.561 3803.859
##  [8] 3820.855 4364.242 4130.156 4990.280 6148.181 5303.313 4635.553
## [15] 4697.576 4230.293 4193.981 4526.168

summary <- summary_performance(preds, dataset = M4_test)

## [1] "Classification error:  0.7"
## [1] "Selected OWI :  0.6632"
## [1] "Weighted OWI :  0.7501"
## [1] "Naive Weighted OWI :  0.8573"
## [1] "Oracle OWI:  0.5551"
## [1] "Single method OWI:  0.666"
## [1] "Average OWI:  1"

mat <- xgboost::xgb.importance (feature_names = colnames(test_data$data),
                       model = meta_model)
xgboost::xgb.plot.importance(importance_matrix = mat[1:20], cex=1.0)

Forecast Metalearning

Stephen Ewing

5/8/2019