Step 06: Models - Accuracy Stats

Setup

Load the relevant libraries.

# rm(list = ls())
# .rs.restartR()


# data manipulation
library("plyr")
library("tidyverse")
library("magrittr")
library("data.table")
library("lubridate")
library("sqldf")


# time series specific packages
library("timetk")
library("zoo")
library("tibbletime")


# modeling
library("fpp2")
library("prophet")
library("caret")
library("randomForest")
library("xgboost")
library("h2o")
library("keras")
# use_session_with_seed(123456789) # setting the seed to obtain reproducible results
# see https://keras.rstudio.com/articles/faq.html#how-can-i-obtain-reproducible-results-using-keras-during-development and https://cran.r-project.org/web/packages/keras/vignettes/faq.html
# can also re-enable gpu and parallel processing by using:  use_session_with_seed(42, disable_gpu = FALSE, disable_parallel_cpu = FALSE)



# other
library("geosphere")          # specific for distance calculations from lat-lon pairs
library("naniar")             # inspecting missing data
library("rlang")              # building functions
library("recipes")            # used in Keras modeling to design matrices
library("rsample")            # rolling samples for validation stats
library("tfruns")             # used in Keras modeling for trainin runs
library("stringr")            # string manipulation
library("ggplot2")            # viz
library("sweep")              # more easily pull out model statistics
library("yardstick")          # easily calculate accuracy stats
library("doParallel")         # parallel processing

Session Info.

sessionInfo()

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] doParallel_1.0.14   iterators_1.0.10    foreach_1.4.4      
##  [4] yardstick_0.0.2     sweep_0.2.1.1       tfruns_1.4         
##  [7] rsample_0.0.3       recipes_0.1.4       rlang_0.3.0.1      
## [10] naniar_0.4.1        geosphere_1.5-7     keras_2.2.4        
## [13] h2o_3.20.0.8        xgboost_0.71.2      randomForest_4.6-14
## [16] caret_6.0-81        lattice_0.20-38     prophet_0.3.0.1    
## [19] Rcpp_1.0.0          fpp2_2.3            expsmooth_2.3      
## [22] fma_2.3             forecast_8.4        tibbletime_0.1.1   
## [25] zoo_1.8-4           timetk_0.1.1.1      sqldf_0.4-11       
## [28] RSQLite_2.1.1       gsubfn_0.7          proto_1.0.0        
## [31] lubridate_1.7.4     data.table_1.11.8   magrittr_1.5       
## [34] forcats_0.3.0       stringr_1.3.1       dplyr_0.7.8        
## [37] purrr_0.2.5         readr_1.2.1         tidyr_0.8.2        
## [40] tibble_1.4.2        ggplot2_3.1.0       tidyverse_1.2.1    
## [43] plyr_1.8.4         
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.3-2   class_7.3-14       visdat_0.5.1      
##  [4] rprojroot_1.3-2    base64enc_0.1-3    rstudioapi_0.8    
##  [7] rstan_2.18.2       bit64_0.9-7        prodlim_2018.04.18
## [10] xml2_1.2.0         codetools_0.2-15   splines_3.5.1     
## [13] knitr_1.20         zeallot_0.1.0      jsonlite_1.5      
## [16] pROC_1.13.0        broom_0.5.0        compiler_3.5.1    
## [19] httr_1.3.1         backports_1.1.2    assertthat_0.2.0  
## [22] Matrix_1.2-15      lazyeval_0.2.1     cli_1.0.1         
## [25] htmltools_0.3.6    prettyunits_1.0.2  tools_3.5.1       
## [28] bindrcpp_0.2.2     gtable_0.2.0       glue_1.3.0        
## [31] reshape2_1.4.3     cellranger_1.1.0   fracdiff_1.4-2    
## [34] urca_1.3-0         debugme_1.1.0      nlme_3.1-137      
## [37] lmtest_0.9-36      timeDate_3043.102  gower_0.1.2       
## [40] ps_1.2.1           rvest_0.3.2        MASS_7.3-51.1     
## [43] scales_1.0.0       ipred_0.9-8        hms_0.4.2         
## [46] inline_0.3.15      yaml_2.2.0         quantmod_0.4-13   
## [49] curl_3.2           reticulate_1.10    memoise_1.1.0     
## [52] gridExtra_2.3      loo_2.0.0          StanHeaders_2.18.0
## [55] uroot_2.0-9        rpart_4.1-13       stringi_1.2.4     
## [58] tensorflow_1.10    tseries_0.10-46    TTR_0.23-4        
## [61] pkgbuild_1.0.2     lava_1.6.4         chron_2.3-53      
## [64] bitops_1.0-6       pkgconfig_2.0.2    matrixStats_0.54.0
## [67] evaluate_0.12      bindr_0.1.1        bit_1.1-14        
## [70] processx_3.2.0     tidyselect_0.2.5   R6_2.3.0          
## [73] generics_0.0.2     DBI_1.0.0          whisker_0.3-2     
## [76] pillar_1.3.0       haven_2.0.0        withr_2.1.2       
## [79] xts_0.11-2         sp_1.3-1           RCurl_1.95-4.11   
## [82] survival_2.43-3    nnet_7.3-12        modelr_0.1.2      
## [85] crayon_1.3.4       rmarkdown_1.10     grid_3.5.1        
## [88] readxl_1.1.0       blob_1.1.1         callr_3.0.0       
## [91] ModelMetrics_1.2.2 digest_0.6.18      stats4_3.5.1      
## [94] munsell_0.5.0      tcltk_3.5.1        quadprog_1.5-5

Setup the root directory.

Setting wd as the working directory.

wd <- getwd()

wd

## [1] "/Users/mdturse/Desktop/Analytics/Chicago_El_Divvy"

Modeling

NOTE: h2o.limvars_accuracy_stats, DV_Fit.Rf.corr_no, DV_Fit.Xgbtree.corr_yes, func_one_hot_vars, run_times, h2o.time, and h2o.limvars.time are the outputs produced in Step 02 - Step 05

h2o.limvars_accuracy_stats <-
  readRDS(paste0(wd,
                 "/Models/",
                 "h2o.limvars_accuracy_stats.Rds"
                 )
          )

DV_Fit.Rf.corr_no <-
  readRDS(paste0(wd,
                 "/Models/",
                 "DV_Fit.Rf.corr_no.Rds"
                 )
          )

DV_Fit.Xgbtree.corr_yes <-
  readRDS(paste0(wd,
                 "/Models/",
                 "DV_Fit.Xgbtree.corr_yes.Rds"
                 )
          )

func_one_hot_vars <-
  readRDS(paste0(wd,
                 "/Data/Interim/",
                 "func_one_hot_vars.Rds"
                 )
          )

run_times <-
  readRDS(paste0(wd,
                 "/Models/",
                 "run_times.Rds"
                 )
          )

h2o.time <-
  readRDS(paste0(wd,
                 "/Models/",
                 "h2o.time.Rds"
                 )
          )

h2o.limvars.time <-
  readRDS(paste0(wd,
                 "/Models/",
                 "h2o.limvars.time.Rds"
                 )
          )

Accuracy Measurements

Now we can check some summary (RMSE) statistics. NOTE: these are summary statistics based on the data used to build the models.

Interpolation - ARIMA Model

accuracy_interp_arima <-
  pmap(.l = list(a = h2o.limvars_accuracy_stats),
       .f = function(a) {
           
         interp_a =
           map_dbl(a$arima,
                   function(x) 
                     sw_glance(x)[["RMSE"]]
                   )
         
         interp_a_xreg =
           map_dbl(a$arima_xreg,
                   function(x) {
                     y = x$best_fit %>% 
                       sw_glance()
                     
                     y[["RMSE"]]
                     }
                   )
         
         a$arima.interpolation = interp_a
         a$arima_xreg.interpolation = interp_a_xreg
         
         return(a)
         }
       )

## Warning: `list_len()` is soft-deprecated as of rlang 0.2.0.
## Please use `new_list()` instead
## This warning is displayed once per session.

names(accuracy_interp_arima$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"

accuracy_interp_arima$`40600`$arima.interpolation

##  [1] 61.17978 59.01982 59.12690 61.16683 57.53232 59.93085 59.72882
##  [8] 59.90099 59.88639 57.12052 58.46335 58.94525 59.29253

accuracy_interp_arima$`40600`$arima_xreg.interpolation

##  [1] 45.92921 48.51186 48.68320 47.76805 46.71281 46.98059 47.57170
##  [8] 45.84552 47.30054 45.78911 46.39437 46.35905 46.54615

message("arima.interpolation")

## arima.interpolation

accuracy_interp_arima %>% 
  map(~ summary(.x$arima.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   57.12   58.95   59.29   59.33   59.90   61.18 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   69.45   71.19   72.89   73.74   76.86   77.75 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   351.0   364.2   368.0   368.5   376.9   378.4 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   323.8   335.1   352.6   349.2   355.9   385.7 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2369    2429    2649    2563    2656    2755 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2166    2213    2240    2249    2292    2345

message("arima_xreg.interpolation")

## arima_xreg.interpolation

accuracy_interp_arima %>% 
  map(~ summary(.x$arima_xreg.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   45.79   46.36   46.71   46.95   47.57   48.68 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.26   59.07   59.18   60.57   63.12   64.99 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   280.5   282.2   285.2   285.3   286.7   292.5 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   272.1   274.2   281.9   281.6   289.2   291.4 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1657    1721    1750    1739    1760    1793 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1748    1757    1827    1806    1832    1856

rm(h2o.limvars_accuracy_stats)

Interpolation - Prophet Model

get_interp_prophet <- function(split, pf) {
  # Get analysis data
  pred_dat =
    analysis(split) %>% 
    select(el_date,
           el_rides
         ) %>% 
    left_join(select(pf,
                     ds,
                     yhat,
                     yhat_zero_floor
                     ) %>% 
                mutate_at(vars(ds), as.Date),
              by = c("el_date" = "ds")
            ) %>% 
    mutate(sqrd_error = (el_rides - yhat_zero_floor)^2
           ) %>% 
    arrange(el_date)

  sqrt(mean(pred_dat$sqrd_error,
            na.rm = TRUE
            )
       )
  }    
  

accuracy_interp_prophet <-
  pmap(.l = list(a = accuracy_interp_arima),
       .f = function(a) {
         interp =
           map2_dbl(a$splits,
                    a$prophet.forecast,
                    get_interp_prophet
                    )
         
         interp_hol =
           map2_dbl(a$splits,
                    a$prophet_hol.forecast,
                    get_interp_prophet
                    )
         
         a$prophet.interpolation = interp
         
         a$prophet_hol.interpolation = interp_hol
         
         return(a)
         }
       )


names(accuracy_interp_prophet$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"

accuracy_interp_prophet$`40600`$prophet.interpolation

##  [1] 68.96201 68.62795 69.35761 69.70989 69.45280 68.88785 70.01384
##  [8] 70.17346 69.27674 68.85172 70.03093 69.98618 69.26562

accuracy_interp_prophet$`40600`$prophet_hol.interpolation

##  [1] 53.98834 53.39593 53.67258 53.43274 54.14267 54.02384 54.51328
##  [8] 53.80563 53.74292 54.19677 53.64952 53.80492 53.18090

message("prophet.interpolation")

## prophet.interpolation

accuracy_interp_prophet %>% 
  map(~ summary(.x$prophet.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   68.63   68.96   69.36   69.43   69.99   70.17 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   72.59   75.07   77.02   79.08   84.65   87.29 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   409.8   412.5   418.3   417.6   421.1   424.0 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   385.7   387.5   491.8   462.0   514.1   532.5 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2633    2651    2678    2677    2700    2725 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2430    2493    2530    2519    2552    2575

message("prophet_hol.interpolation")

## prophet_hol.interpolation

accuracy_interp_prophet %>% 
  map(~ summary(.x$prophet_hol.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   53.18   53.65   53.80   53.81   54.02   54.51 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.02   58.35   63.38   63.82   68.55   70.99 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   274.9   281.0   282.3   282.8   283.6   290.7 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   308.9   312.4   433.7   396.1   456.5   478.4 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1436    1470    1478    1481    1503    1511 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1685    1748    1775    1765    1784    1820

# saving is done to avoid having to run the models again
saveRDS(accuracy_interp_prophet,
        paste0(wd,
               "/Models/",
               "accuracy_interp_prophet.Rds"
               )
        )

# accuracy_interp_prophet <-
#   readRDS(paste0(wd,
#                  "/Models/",
#                  "accuracy_interp_prophet.Rds"
#                  )
#           )


rm(accuracy_interp_arima)

Forecast & Interpolation - Random Forest Model

Get the forecasted data for the random forest and the xgboost tree.

First, I create a function to get the error percentages.

get_pct_error <-
  function(base, mod) {
    data = bind_cols(base,
                     mod
                     ) %>% 
      select(el_date,
             el_rides,
             yhat
             ) %>%
      rename(y = el_rides
             ) %>%
      # if_else is needed to prevent negative predictions
      mutate(yhat_zero_floor = if_else(yhat < 0,
                                       0,
                                       yhat
                                       ),
             sqrd_error = (y - yhat_zero_floor)^2
             ) %>%
      arrange(el_date)
    
    return(data)
    }

Now I get the forecasts of all ML models as one list.

rf_xgb_forecasts_combined <-
  pmap(.l = list(a = accuracy_interp_prophet,
                 b = DV_Fit.Rf.corr_no,
                 c = DV_Fit.Xgbtree.corr_yes
                 ),
       .f = function(a, b, c) {
         splits = a$splits
         rf = b
         xgb = c
         
         fcast =
           pmap(.l = list(d = splits),
                .f = function(d) {
                  # prep the data
                  analysis_assessment =
                    analysis(d) %>% 
                    mutate(type = "interpolation") %>% 
                    bind_rows(assessment(d) %>% 
                                mutate(type = "extrapolation")
                              )
                  
                  one_hot =
                    analysis_assessment %>% 
                    func_one_hot_vars()
                  
                  one_hot_interp =
                    one_hot %>% 
                    filter(typeinterpolation == 1) %>% 
                    select(-typeinterpolation,
                           -typeextrapolation
                           )
                  
                  one_hot_extrap = 
                    one_hot %>% 
                    filter(typeextrapolation == 1) %>% 
                    select(-typeinterpolation,
                           -typeextrapolation
                           )
                  
                  complete_interp =
                    one_hot_interp[complete.cases(one_hot_interp), ]
                  
                  complete_extrap =
                    one_hot_extrap[complete.cases(one_hot_extrap), ]
                  
                  
                  # predictions
                  pred_rf.interp =
                    predict(rf,
                            newdata = complete_interp
                            ) %>% 
                    as.data.frame()
                  
                  names(pred_rf.interp) <- "yhat"
                  
                  pred_rf.extrap =
                    predict(rf,
                            newdata = complete_extrap
                            ) %>% 
                    as.data.frame()
                  
                  names(pred_rf.extrap) <- "yhat"
                  
                  
                  pred_xgb.interp =
                    predict(xgb,
                            newdata = complete_interp
                            ) %>% 
                    as.data.frame()
                  
                  names(pred_xgb.interp) <- "yhat"
                  
                  pred_xgb.extrap =
                    predict(xgb,
                            newdata = complete_extrap
                            ) %>% 
                    as.data.frame()
                  
                  names(pred_xgb.extrap) <- "yhat"
                  
                  
                  # add percent errors
                  full_pred.rf.interp =
                    get_pct_error(base = complete_interp,
                                  mod = pred_rf.interp
                                  )
                  
                  full_pred.rf.extrap =
                    get_pct_error(base = complete_extrap,
                                  mod = pred_rf.extrap
                                  )
                  
                  
                  full_pred.xgb.interp =
                    get_pct_error(base = complete_interp,
                                  mod = pred_xgb.interp
                                  )
                  
                  full_pred.xgb.extrap =
                    get_pct_error(base = complete_extrap,
                                  mod = pred_xgb.extrap
                                  )
                  
                  
                  # result as a list
                  full_pred_list =
                    list(pred.rf.interp = full_pred.rf.interp,
                         pred.rf.extrap = full_pred.rf.extrap,
                         pred.xgb.interp = full_pred.xgb.interp,
                         pred.xgb.extrap = full_pred.xgb.extrap
                         )
                  
                  # return value
                  return(full_pred_list)
                  }
                )
         
         a$ml_forecasts_all = fcast
         
         return(a)
         }
       )

names(rf_xgb_forecasts_combined$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "ml_forecasts_all"

Now I separate the forecats into their own dataframes.

rf_xgb_forecasts_individualized <-
  pmap(.l = list(a = rf_xgb_forecasts_combined),
       .f = function(a) {
         fc = a$ml_forecasts_all

         # limit results to the relevant model
         result.rf.interp =
           pmap(.l = list(b = fc),
              .f = function(b) {
                b$pred.rf.interp
                }
              )
         
         result.rf.extrap =
           pmap(.l = list(b = fc),
              .f = function(b) {
                b$pred.rf.extrap
                }
              )
         
         result.xgb.interp =
           pmap(.l = list(b = fc),
              .f = function(b) {
                b$pred.xgb.interp
                }
              )
         
         result.xgb.extrap =
           pmap(.l = list(b = fc),
              .f = function(b) {
                b$pred.xgb.extrap
                }
              )
         
         a$rf.forecast_interp <- result.rf.interp
         a$rf.forecast_extrap <- result.rf.extrap
         a$xgb.forecast_interp <- result.xgb.interp
         a$xgb.forecast_extrap <- result.xgb.extrap
         
         # remove no longer needed list of all forecasts
         a$ml_forecasts_all <- NULL
         
         return(a)
         }
       )


names(rf_xgb_forecasts_individualized$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"

rm(accuracy_interp_prophet, rf_xgb_forecasts_combined)

Now I calculate the interpolation and extrapolation accuracy stats.

# function to get the RMSE value
get_interp_ML <- function(ML.fcast) {
  pred_dat = ML.fcast
  
  sqrt(mean(pred_dat$sqrd_error,
            na.rm = TRUE
            )
       )
  }


accuracy_interp_ML <-
  pmap(.l = list(a = rf_xgb_forecasts_individualized),
       .f = function(a) {
         interp.rf =
           map_dbl(a$rf.forecast_interp,
                   get_interp_ML
                   )
         
         extrap.rf =
           map_dbl(a$rf.forecast_extrap,
                   get_interp_ML
                   )
         
         interp.xgb =
           map_dbl(a$xgb.forecast_interp,
                   get_interp_ML
                   )
         
         extrap.xgb =
           map_dbl(a$xgb.forecast_extrap,
                   get_interp_ML
                   )
         
         a$rf.interpolation <- interp.rf
         a$rf.extrapolation <- extrap.rf
         a$xgb.interpolation <- interp.xgb
         a$xgb.extrapolation <- extrap.xgb
         
         return(a)
         }
       )


names(accuracy_interp_ML$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"

accuracy_interp_ML$`40600`$rf.interpolation

##  [1] 20.78572 20.73984 20.74743 20.74830 20.21899 20.15955 20.13680
##  [8] 20.14938 20.15622 20.04888 20.50682 20.56212 20.59679

accuracy_interp_ML$`40600`$rf.extrapolation

##  [1] 17.98158 18.03256 18.17887 18.19466 18.19726 18.22872 18.40603
##  [8] 19.99322 21.52102 21.41720 20.67071 21.21724 21.38974

message("rf.interpolation")

## rf.interpolation

accuracy_interp_ML %>% 
  map(~ summary(.x$rf.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.05   20.16   20.51   20.43   20.74   20.79 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.03   27.66   28.25   28.76   30.36   31.15 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   124.3   124.9   126.6   126.4   128.2   128.8 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   128.0   128.2   128.8   128.7   129.0   129.4 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   767.8   771.4   777.1   782.3   791.6   805.0 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   876.9   888.9   892.2   895.3   905.5   912.0

message("rf.extrapolation")

## rf.extrapolation

accuracy_interp_ML %>% 
  map(~ summary(.x$rf.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.98   18.19   18.41   19.49   21.22   21.52 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   22.80   23.00   23.24   23.31   23.65   24.29 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   101.9   102.3   102.9   114.1   128.3   133.0 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   82.42  115.37  122.23  117.04  123.37  126.12 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   833.9   849.7   862.1   918.8  1013.5  1020.5 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   828.0   842.0   900.7   906.1   966.8   990.6

message("xgb.interpolation")

## xgb.interpolation

accuracy_interp_ML %>% 
  map(~ summary(.x$xgb.interpolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.39   58.74   59.31   59.19   59.36   60.06 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.09   50.59   50.89   50.87   51.21   51.51 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   146.7   147.6   147.8   147.9   148.3   149.1 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   209.5   212.9   214.2   214.4   217.2   218.4 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2317    2320    2362    2349    2376    2384 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2207    2214    2221    2231    2246    2264

message("xgb.extrapolation")

## xgb.extrapolation

accuracy_interp_ML %>% 
  map(~ summary(.x$xgb.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   47.37   47.71   47.80   51.13   55.36   55.67 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.05   43.40   44.18   43.65   44.56   45.10 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   133.0   133.6   134.9   137.0   140.7   142.6 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   183.4   189.4   189.9   191.1   194.3   198.1 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1836    1857    1909    2120    2393    2489 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1811    1866    1914    1998    2118    2227

# saving is done to avoid having to run the models again
saveRDS(accuracy_interp_ML,
        paste0(wd,
               "/Models/",
               "accuracy_interp_ML.Rds"
               )
        )

# accuracy_interp_ML <-
#   readRDS(paste0(wd,
#                  "/Models/",
#                  "accuracy_interp_ML.Rds"
#                  )
#           )


rm(rf_xgb_forecasts_individualized)

Now we can check some summary (RMSE) statistics. NOTE: these are summary statistics based on the data NOT used to build the models.

Extrapolation - ARIMA Model

Function to get the ARIMA predictions.

get_pred_arima <- function(split, mod) {
  n <- nrow(assessment(split)
            )
  # Get assessment data
  pred_dat <-
    assessment(split) %>%
    mutate(pred = as.vector(forecast(mod, h = n)$mean),
           # if_else is used to put a floor of 0 (zero) for the prediction
           pred_zero_floor = if_else(pred < 0,
                                     0,
                                     pred
                                     ),
           # pct_error = (el_rides - pred_zero_floor) / el_rides * 100,
           sqrd_error = (el_rides - pred_zero_floor)^2
           )
  
  return(pred_dat)
  }

Function to calculate the extrapolation stats

get_extrap_arima <- function(arima.fcast) {
  pred_dat = arima.fcast
  
  sqrt(mean(pred_dat$sqrd_error,
            na.rm = TRUE
            )
       )
  }

Run the the forecast and extrapolation functions.

accuracy_extrap_arima <-
  pmap(.l = list(a = accuracy_interp_ML),
       .f = function(a) {
         pred =
           map2(a$splits,
                a$arima,
                get_pred_arima
               )
         
         a$arima.forecast <- pred
         
         return(a)
         }
       )


accuracy_extrap_arima <-
  pmap(.l = list(a = accuracy_extrap_arima),
       .f = function(a) {
         extrap <-
           map_dbl(a$arima.forecast,
                   get_extrap_arima
                   )
         
         a$arima.extrapolation <- extrap
         
         return(a)
         }
       )

names(accuracy_extrap_arima$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"           "arima.forecast"             
## [37] "arima.extrapolation"

accuracy_extrap_arima$`40600`$arima.extrapolation

##  [1] 69.78843 71.21568 61.58047 55.39423 56.59482 54.82201 53.50198
##  [8] 62.57218 63.72728 61.69955 63.98101 75.47126 73.22924

accuracy_extrap_arima %>% 
  map(~ summary(.x$arima.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   53.50   56.59   62.57   63.35   69.79   75.47 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   69.84   92.69   98.45  114.41  134.24  193.21 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   386.6   435.3   446.1   447.8   470.5   516.3 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   313.2   358.3   379.3   379.6   401.1   434.0 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2032    2544    2595    2831    3249    3762 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3004    3397    3479    3581    3774    4219

rm(accuracy_interp_ML)

Extrapolation - ARIMA Model with XREG

Function to get the forecasts from the ARIMA XREG models.

get_pred_arima_xreg <-
  function(split, mod) {
    n = nrow(assessment(split)
             )
    
    data_og =
      split %>% 
      assessment %>% 
      mutate(holiday_binary = if_else(holiday == FALSE,
                                      0,
                                      1
                                      )
             )
    
    data_pred =
      data_og %>% 
      mutate(pred = as.vector(forecast(mod$best_fit,
                                       xreg = cbind(mod$best_ts_365_fourier_future,
                                                    # variables below identified based on "importance" from Random Forest and XGBTree models
                                                    data_og$holiday_binary,
                                                    data_og$year,
                                                    data_og$half,
                                                    data_og$quarter,
                                                    data_og$month,
                                                    data_og$mweek,
                                                    data_og$wday.lbl,
                                                    data_og$el_rides_l07,
                                                    data_og$el_rides_l14,
                                                    data_og$el_rides_l21,
                                                    data_og$el_rides_l28,
                                                    data_og$el_rides_ma07,
                                                    data_og$el_rides_ma14,
                                                    data_og$el_rides_ma21,
                                                    data_og$el_rides_ma28
                                    ),
                                       h = n
                                       )$mean
                              ),
             # if_else is used to put a floor of 0 (zero) for the prediction
             pred_zero_floor = if_else(pred < 0,
                                       0,
                                       pred
                                       ),
             # pct_error = (el_rides - pred_zero_floor) / el_rides * 100
             sqrd_error = (el_rides - pred_zero_floor)^2
             )
    }

Function to calculate the extrapolation stats.

get_extrap_arima_xreg <-
  function(arima.fcast) {
    pred_dat = arima.fcast

    sqrt(mean(pred_dat$sqrd_error,
              na.rm = TRUE
              )
         )
    }

Run the forecast and extrapolation functions.

accuracy_extrap_arima_xreg <-
  pmap(.l = list(a = accuracy_extrap_arima),
       .f = function(a) {
         pred <-
           map2(a$splits,
                a$arima_xreg,
                get_pred_arima_xreg
               )
         
         a$arima_xreg.forecast <- pred
         
         return(a)
         }
       )


accuracy_extrap_arima_xreg <-
  pmap(.l = list(a = accuracy_extrap_arima_xreg),
       .f = function(a) {
         extrap <-
           map_dbl(a$arima_xreg.forecast,
                   get_extrap_arima
                   )
         
         a$arima_xreg.extrapolation <- extrap
         
         return(a)
         }
       )

names(accuracy_extrap_arima_xreg$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"           "arima.forecast"             
## [37] "arima.extrapolation"         "arima_xreg.forecast"        
## [39] "arima_xreg.extrapolation"

accuracy_extrap_arima_xreg$`40600`$arima_xreg.extrapolation

##  [1] 43.08196 41.36051 40.86218 41.19748 42.25116 40.87467 40.34515
##  [8] 43.72419 46.16725 48.26305 46.41810 47.71259 49.56721

accuracy_extrap_arima_xreg %>% 
  map(~ summary(.x$arima_xreg.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40.35   41.20   43.08   43.99   46.42   49.57 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.61   52.02   53.54   53.36   53.80   56.40 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   231.7   233.1   238.2   253.8   276.3   294.0 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   224.3   277.2   292.8   284.6   297.2   307.4 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1469    1486    1569    1662    1848    1931 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1805    1869    1880    1963    2134    2179

rm(accuracy_extrap_arima)

Extrapolation - Prophet Model

Function to get the extrapolation stats.

get_extrap_prophet <- function(split, pf) {
  # Get assessment data
  pred_dat <-
    assessment(split) %>% 
    select(el_date,
           el_rides
         ) %>% 
    left_join(select(pf,
                     ds,
                     yhat,
                     yhat_zero_floor
                     ) %>% 
                mutate_at(vars(ds), as.Date),
              by = c("el_date" = "ds")
            ) %>% 
    mutate(sqrd_error = (el_rides - yhat_zero_floor)^2 
           ) %>% 
    arrange(el_date)

  sqrt(mean(pred_dat$sqrd_error,
            na.rm = TRUE
            )
       )
  }

Run the extrapolation stats function.

accuracy_extrap_prophet <-
  pmap(.l = list(a = accuracy_extrap_arima_xreg),
       .f = function(a) {
         extrap <-
           map2_dbl(a$splits,
                    a$prophet.forecast,
                    get_extrap_prophet
                    )
         
         extrap_hol <-
           map2_dbl(a$splits,
                    a$prophet_hol.forecast,
                    get_extrap_prophet
                    )
         
         a$prophet.extrapolation <- extrap
         
         a$prophet_hol.extrapolation <- extrap_hol
         
         return(a)
         }
       )


# saving is done to avoid having to run the models again
saveRDS(accuracy_extrap_prophet,
        paste0(wd,
               "/Models/",
               "accuracy_extrap_prophet.Rds"
               )
        )

# accuracy_extrap_prophet <-
#   readRDS(paste0(wd,
#                  "/Models/",
#                  "accuracy_extrap_prophet.Rds"
#                  )
#           )

names(accuracy_extrap_prophet$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"           "arima.forecast"             
## [37] "arima.extrapolation"         "arima_xreg.forecast"        
## [39] "arima_xreg.extrapolation"    "prophet.extrapolation"      
## [41] "prophet_hol.extrapolation"

accuracy_extrap_prophet$`40600`$prophet.extrapolation

##  [1] 124.98899 123.04937 106.03899  92.42239  77.78675  85.07669  72.08575
##  [8]  68.87321  58.21356  59.61095  62.60217  74.32767  83.69122

accuracy_extrap_prophet$`40600`$prophet_hol.extrapolation

##  [1] 127.10234 131.31208 105.91172  99.80493  85.14470  86.08135  60.29477
##  [8]  43.93964  39.87920  39.81755  54.94782  65.78041  83.07772

message("prophet")

## prophet

accuracy_extrap_prophet %>% 
  map(~ summary(.x$prophet.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.21   68.87   77.79   83.75   92.42  124.99 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   73.33   98.78  120.44  154.14  227.03  258.34 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   425.4   464.1   528.6   526.5   568.7   660.7 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   592.3   637.7   670.3   667.0   699.6   746.7 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2786    3065    3326    3427    3648    4664 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2646    3538    4636    4805    5761    7173

message("prophet_hol")

## prophet_hol

accuracy_extrap_prophet %>% 
  map(~ summary(.x$prophet_hol.extrapolation)
      )

## $`40600`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   39.82   54.95   83.08   78.70   99.80  131.31 
## 
## $`41140`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.12   89.02  119.24  148.03  223.80  252.26 
## 
## $`40120`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   215.6   334.5   517.0   460.6   589.0   689.4 
## 
## $`40910`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   462.7   616.9   690.3   661.7   722.9   765.2 
## 
## $`40380`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1403    1896    2926    2790    3224    4513 
## 
## $`41660`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2028    2847    4439    4574    5860    7561

rm(accuracy_extrap_arima_xreg)

Add an identifier for the relevant el_stop_id value.

add_el_stop_id <-
  pmap(.l = list(a = accuracy_extrap_prophet,
                 c = names(accuracy_extrap_prophet)
                 ),
       .f = function(a, c) {
         a$el_stop_id <- c
         
         return(a)
       }
       )

# saving is done to avoid having to run the forecasts again
saveRDS(add_el_stop_id,
        paste0(wd,
               "/Models/",
               "add_el_stop_id.Rds"
               )
        )

# add_el_stop_id <-
#   readRDS(paste0(wd,
#                  "/Models/",
#                  "add_el_stop_id.Rds"
#                  )
#           )


names(add_el_stop_id$`40600`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"           "arima.forecast"             
## [37] "arima.extrapolation"         "arima_xreg.forecast"        
## [39] "arima_xreg.extrapolation"    "prophet.extrapolation"      
## [41] "prophet_hol.extrapolation"   "el_stop_id"

names(add_el_stop_id$`40910`)

##  [1] "splits"                      "id"                         
##  [3] "start_date"                  "arima"                      
##  [5] "arima_xreg"                  "prophet"                    
##  [7] "prophet_hol"                 "prophet.future"             
##  [9] "prophet_hol.future"          "prophet.forecast"           
## [11] "prophet_hol.forecast"        "prophet.plots"              
## [13] "prophet_hol.plots"           "interp_data"                
## [15] "extrap_data"                 "h2o.interp.forecast"        
## [17] "h2o.extrap.forecast"         "h2o.interpolation"          
## [19] "h2o.extrapolation"           "h2o.limvars.interp.forecast"
## [21] "h2o.limvars.extrap.forecast" "h2o.limvars.interpolation"  
## [23] "h2o.limvars.extrapolation"   "arima.interpolation"        
## [25] "arima_xreg.interpolation"    "prophet.interpolation"      
## [27] "prophet_hol.interpolation"   "rf.forecast_interp"         
## [29] "rf.forecast_extrap"          "xgb.forecast_interp"        
## [31] "xgb.forecast_extrap"         "rf.interpolation"           
## [33] "rf.extrapolation"            "xgb.interpolation"          
## [35] "xgb.extrapolation"           "arima.forecast"             
## [37] "arima.extrapolation"         "arima_xreg.forecast"        
## [39] "arima_xreg.extrapolation"    "prophet.extrapolation"      
## [41] "prophet_hol.extrapolation"   "el_stop_id"

rm(accuracy_extrap_prophet)

Now I simply update the run_times dataset with the relevant h2o info.

run_times[9:10] <-
  list(h2o = as.list(h2o.time),
       h2o.limvars = as.list(h2o.limvars.time)
       )

names(run_times)[9:10] <- c("h2o", "h2o.limvars")


str(run_times)

## List of 10
##  $ arima               :List of 5
##   ..$ user.self : num 995
##   ..$ sys.self  : num 14.3
##   ..$ elapsed   : num 1145
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ arima_xreg          :List of 5
##   ..$ user.self : num 653
##   ..$ sys.self  : num 25.5
##   ..$ elapsed   : num 719
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ prophet             :List of 5
##   ..$ user.self : num 10.4
##   ..$ sys.self  : num 0.777
##   ..$ elapsed   : num 11.4
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ prophet_hol         :List of 5
##   ..$ user.self : num 23.7
##   ..$ sys.self  : num 1.6
##   ..$ elapsed   : num 25.4
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ prophet.forecast    :List of 5
##   ..$ user.self : num 243
##   ..$ sys.self  : num 21.5
##   ..$ elapsed   : num 282
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ prophet_hol.forecast:List of 5
##   ..$ user.self : num 833
##   ..$ sys.self  : num 79.9
##   ..$ elapsed   : num 960
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ rf_corr_no          :List of 5
##   ..$ user.self : num 56.2
##   ..$ sys.self  : num 2.88
##   ..$ elapsed   : num 471
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ xgbtree_corr_yes    :List of 5
##   ..$ user.self : num 10.6
##   ..$ sys.self  : num 2.53
##   ..$ elapsed   : num 215
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ h2o                 :List of 5
##   ..$ user.self : num 6.03
##   ..$ sys.self  : num 2.14
##   ..$ elapsed   : num 202
##   ..$ user.child: num 0
##   ..$ sys.child : num 0
##  $ h2o.limvars         :List of 5
##   ..$ user.self : num 5.99
##   ..$ sys.self  : num 2.22
##   ..$ elapsed   : num 203
##   ..$ user.child: num 0
##   ..$ sys.child : num 0

# saving is done to avoid having to run the forecasts again
saveRDS(run_times,
        paste0(wd,
               "/Models/",
               "run_times.Rds"
               )
        )

# run_times <-
#   readRDS(paste0(wd,
#                  "/Models/",
#                  "run_times.Rds"
#                  )
#           )