This is a reproduction of the metaforecasting method outlined here: https://github.com/robjhyndman/M4metalearning/blob/master/docs/metalearning_example.md
library(M4metalearning)
library(M4comp2018)
set.seed(31-05-2018)
#we start by creating the training and test subsets
indices <- sample(length(M4))
M4_train <- M4[ indices[1:15]]
M4_test <- M4[indices[16:25]]
#we create the temporal holdout version of the training and test sets
M4_train <- temp_holdout(M4_train)
M4_test <- temp_holdout(M4_test)#this will take time
M4_train <- calc_forecasts(M4_train, forec_methods(), n.cores=3)
#once we have the forecasts, we can calculate the errors
M4_train <- calc_errors(M4_train)## Loading required package: tsfeatures
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
## x_acf1 x_acf10 diff1_acf1 diff1_acf10 diff2_acf1 diff2_acf10
## [1,] 0.75 1.08 0.15 0.35 -0.28 0.23
## [2,] 0.95 4.63 0.05 0.09 -0.49 0.45
## [3,] 0.86 5.27 -0.56 0.40 -0.72 0.66
## seas_acf1 ARCH.LM crossing_points entropy flat_spots arch_acf
## [1,] 0.76 0.59 56 0.78 5 0.10
## [2,] 0.77 0.84 2 0.62 20 0.18
## [3,] 0.44 0.45 21 0.67 9 0.07
## garch_acf arch_r2 garch_r2 alpha beta hurst lumpiness nonlinearity
## [1,] 0.05 0.09 0.05 1.00 0 0.99 0.08 0.04
## [2,] 0.18 0.24 0.24 1.00 0 0.99 0.02 1.33
## [3,] 0.05 0.07 0.05 0.46 0 1.00 0.04 1.25
## x_pacf5 diff1x_pacf5 diff2x_pacf5 seas_pacf nperiods seasonal_period
## [1,] 0.67 0.16 0.40 0.19 1 12
## [2,] 0.93 0.02 0.57 -0.10 1 4
## [3,] 1.04 0.41 1.12 -0.14 1 12
## trend spike linearity curvature e_acf1 e_acf10 seasonal_strength peak
## [1,] 0.86 0 -6.05 5.99 0.10 0.09 0.88 2
## [2,] 0.99 0 5.59 1.58 -0.19 0.31 0.10 3
## [3,] 0.89 0 10.37 -3.43 -0.13 0.09 0.08 5
## trough stability hw_alpha hw_beta hw_gamma unitroot_kpss unitroot_pp
## [1,] 7 0.40 0.45 0 0 1.67 -62.79
## [2,] 4 1.00 0.74 0 0 0.97 -0.77
## [3,] 12 0.81 0.44 0 0 2.48 -16.59
## series_length
## [1,] 277
## [2,] 43
## [3,] 200
## auto_arima_forec ets_forec nnetar_forec tbats_forec stlm_ar_forec
## [1,] 0.57 0.56 0.60 0.52 0.60
## [2,] 1.52 1.52 1.75 1.52 0.99
## [3,] 0.39 0.39 0.58 0.45 0.69
## rw_drift_forec thetaf_forec naive_forec snaive_forec
## [1,] 1.36 0.54 1.40 0.49
## [2,] 1.87 1.73 1.52 1.55
## [3,] 0.28 0.34 0.34 0.57
set.seed(1345) #set the seed because xgboost is random!
meta_model <- train_selection_ensemble(train_data$data, train_data$errors)M4_test <- calc_forecasts(M4_test, forec_methods(), n.cores=1)
M4_test <- THA_features(M4_test, n.cores=1)test_data <- create_feat_classif_problem(M4_test)
preds <- predict_selection_ensemble(meta_model, test_data$data)
head(preds)## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.03604758 0.017487158 0.013555620 0.09334739 0.008989784 0.010645926
## [2,] 0.03360812 0.016303741 0.012638264 0.08151126 0.008381414 0.009925479
## [3,] 0.09706341 0.047086738 0.036500496 0.14623304 0.024206312 0.028665718
## [4,] 0.01677949 0.008139953 0.006309894 0.05574440 0.004184580 0.004955484
## [5,] 0.01409681 0.006838546 0.005301075 0.03054505 0.003515554 0.004163207
## [6,] 0.08850374 0.042934329 0.033281649 0.24544565 0.022071646 0.026137793
## [,7] [,8] [,9]
## [1,] 0.7891775 0.013994350 0.016754739
## [2,] 0.8089635 0.013047304 0.015620888
## [3,] 0.5374479 0.037681841 0.045114594
## [4,] 0.8895731 0.006514115 0.007799025
## [5,] 0.9235150 0.005472645 0.006552126
## [6,] 0.4661303 0.034358817 0.041136101
## [1] 5528.282 4826.940 4884.705 4389.789 4360.049 4701.561 3803.859
## [8] 3820.855 4364.242 4130.156 4990.280 6148.181 5303.313 4635.553
## [15] 4697.576 4230.293 4193.981 4526.168
## [1] "Classification error: 0.7"
## [1] "Selected OWI : 0.6632"
## [1] "Weighted OWI : 0.7501"
## [1] "Naive Weighted OWI : 0.8573"
## [1] "Oracle OWI: 0.5551"
## [1] "Single method OWI: 0.666"
## [1] "Average OWI: 1"
mat <- xgboost::xgb.importance (feature_names = colnames(test_data$data),
model = meta_model)
xgboost::xgb.plot.importance(importance_matrix = mat[1:20], cex=1.0)