Load the relevant libraries.
# rm(list = ls())
# .rs.restartR()
# data manipulation
library("plyr")
library("tidyverse")
library("magrittr")
library("data.table")
library("lubridate")
library("sqldf")
# time series specific packages
library("timetk")
library("zoo")
library("tibbletime")
# modeling
library("fpp2")
library("prophet")
library("caret")
library("randomForest")
library("xgboost")
library("h2o")
library("keras")
# use_session_with_seed(123456789) # setting the seed to obtain reproducible results
# see https://keras.rstudio.com/articles/faq.html#how-can-i-obtain-reproducible-results-using-keras-during-development and https://cran.r-project.org/web/packages/keras/vignettes/faq.html
# can also re-enable gpu and parallel processing by using: use_session_with_seed(42, disable_gpu = FALSE, disable_parallel_cpu = FALSE)
# other
library("geosphere") # specific for distance calculations from lat-lon pairs
library("naniar") # inspecting missing data
library("rlang") # building functions
library("recipes") # used in Keras modeling to design matrices
library("rsample") # rolling samples for validation stats
library("tfruns") # used in Keras modeling for trainin runs
library("stringr") # string manipulation
library("ggplot2") # viz
library("sweep") # more easily pull out model statistics
library("yardstick") # easily calculate accuracy stats
library("doParallel") # parallel processing
Session Info.
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4
## [4] yardstick_0.0.2 sweep_0.2.1.1 tfruns_1.4
## [7] rsample_0.0.3 recipes_0.1.4 rlang_0.3.0.1
## [10] naniar_0.4.1 geosphere_1.5-7 keras_2.2.4
## [13] h2o_3.20.0.8 xgboost_0.71.2 randomForest_4.6-14
## [16] caret_6.0-81 lattice_0.20-38 prophet_0.3.0.1
## [19] Rcpp_1.0.0 fpp2_2.3 expsmooth_2.3
## [22] fma_2.3 forecast_8.4 tibbletime_0.1.1
## [25] zoo_1.8-4 timetk_0.1.1.1 sqldf_0.4-11
## [28] RSQLite_2.1.1 gsubfn_0.7 proto_1.0.0
## [31] lubridate_1.7.4 data.table_1.11.8 magrittr_1.5
## [34] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.8
## [37] purrr_0.2.5 readr_1.2.1 tidyr_0.8.2
## [40] tibble_1.4.2 ggplot2_3.1.0 tidyverse_1.2.1
## [43] plyr_1.8.4
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.3-2 class_7.3-14 visdat_0.5.1
## [4] rprojroot_1.3-2 base64enc_0.1-3 rstudioapi_0.8
## [7] rstan_2.18.2 bit64_0.9-7 prodlim_2018.04.18
## [10] xml2_1.2.0 codetools_0.2-15 splines_3.5.1
## [13] knitr_1.20 zeallot_0.1.0 jsonlite_1.5
## [16] pROC_1.13.0 broom_0.5.0 compiler_3.5.1
## [19] httr_1.3.1 backports_1.1.2 assertthat_0.2.0
## [22] Matrix_1.2-15 lazyeval_0.2.1 cli_1.0.1
## [25] htmltools_0.3.6 prettyunits_1.0.2 tools_3.5.1
## [28] bindrcpp_0.2.2 gtable_0.2.0 glue_1.3.0
## [31] reshape2_1.4.3 cellranger_1.1.0 fracdiff_1.4-2
## [34] urca_1.3-0 debugme_1.1.0 nlme_3.1-137
## [37] lmtest_0.9-36 timeDate_3043.102 gower_0.1.2
## [40] ps_1.2.1 rvest_0.3.2 MASS_7.3-51.1
## [43] scales_1.0.0 ipred_0.9-8 hms_0.4.2
## [46] inline_0.3.15 yaml_2.2.0 quantmod_0.4-13
## [49] curl_3.2 reticulate_1.10 memoise_1.1.0
## [52] gridExtra_2.3 loo_2.0.0 StanHeaders_2.18.0
## [55] uroot_2.0-9 rpart_4.1-13 stringi_1.2.4
## [58] tensorflow_1.10 tseries_0.10-46 TTR_0.23-4
## [61] pkgbuild_1.0.2 lava_1.6.4 chron_2.3-53
## [64] bitops_1.0-6 pkgconfig_2.0.2 matrixStats_0.54.0
## [67] evaluate_0.12 bindr_0.1.1 bit_1.1-14
## [70] processx_3.2.0 tidyselect_0.2.5 R6_2.3.0
## [73] generics_0.0.2 DBI_1.0.0 whisker_0.3-2
## [76] pillar_1.3.0 haven_2.0.0 withr_2.1.2
## [79] xts_0.11-2 sp_1.3-1 RCurl_1.95-4.11
## [82] survival_2.43-3 nnet_7.3-12 modelr_0.1.2
## [85] crayon_1.3.4 rmarkdown_1.10 grid_3.5.1
## [88] readxl_1.1.0 blob_1.1.1 callr_3.0.0
## [91] ModelMetrics_1.2.2 digest_0.6.18 stats4_3.5.1
## [94] munsell_0.5.0 tcltk_3.5.1 quadprog_1.5-5
Setup the root directory.
Setting wd as the working directory.
wd <- getwd()
wd
## [1] "/Users/mdturse/Desktop/Analytics/Chicago_El_Divvy"
NOTE: DV_corr_predict, DV_nzv_predict, func_custom_accuracy_metrics, period_train, period_test, and skip_span are the outputs produced in Step 02
DV_corr_predict <-
readRDS(paste0(wd,
"/Data/Interim/",
"DV_corr_predict.Rds"
)
)
DV_nzv_predict <-
readRDS(paste0(wd,
"/Data/Interim/",
"DV_nzv_predict.Rds"
)
)
func_custom_accuracy_metrics <-
readRDS(paste0(wd,
"/Data/Interim/",
"func_custom_accuracy_metrics.Rds"
)
)
period_train <-
readRDS(paste0(wd,
"/Data/Interim/",
"period_train.Rds"
)
)
period_test <-
readRDS(paste0(wd,
"/Data/Interim/",
"period_test.Rds"
)
)
skip_span <-
readRDS(paste0(wd,
"/Data/Interim/",
"skip_span.Rds"
)
)
Create one model with preprocessing that removes highly correlated variables, and one model that does not.
tot_cores <- detectCores()
cl <- makeCluster(tot_cores - 1)
registerDoParallel(cl)
start <- proc.time()
DV_Fit.Rf.corr_yes <-
DV_corr_predict %>%
map(.f = function(a) {
fitControl =
trainControl(method = "timeslice",
initialWindow = period_train,
horizon = period_test,
fixedWindow = TRUE,
skip = skip_span,
summaryFunction = func_custom_accuracy_metrics
)
set.seed(123456789)
output =
train(el_rides ~ .,
data = a %>%
select(#-el_stop_id,
-data_use_el_stop_id
),
preProcess = c(#"nzv"
#"corr"
"center",
"scale",
"medianImpute"
),
na.action = na.pass,
method = "rf",
metric = "RMSE",
maximize = FALSE,
importance = TRUE,
trControl = fitControl,
verbose = TRUE
)
return(output)
}
)
time.Rf.corr_yes <- proc.time() - start
message("DV_Fit.Rf.corr_yes")
## DV_Fit.Rf.corr_yes
DV_Fit.Rf.corr_yes
## $`40600`
## Random Forest
##
## 905 samples
## 56 predictor
##
## Pre-processing: centered (56), scaled (56), median imputation (56)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 53.36399 12.661148 66.12081 0.7628671
## 29 31.38680 7.566142 47.24439 0.8788246
## 56 31.99531 7.629171 47.73797 0.8761612
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 29.
##
## $`41140`
## Random Forest
##
## 905 samples
## 56 predictor
##
## Pre-processing: centered (56), scaled (56), median imputation (56)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 69.12999 12.333349 83.30069 0.6295243
## 29 49.05100 8.337495 64.50003 0.7768664
## 56 45.72820 7.883337 61.33798 0.7986168
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 56.
##
## $`40120`
## Random Forest
##
## 905 samples
## 55 predictor
##
## Pre-processing: centered (55), scaled (55), median imputation (55)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 360.3300 14.979545 437.4819 0.7308128
## 28 204.1010 8.621304 304.2583 0.8694514
## 55 203.0345 8.558621 303.7084 0.8699530
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 55.
##
## $`40910`
## Random Forest
##
## 875 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57), median imputation (57)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 274.7844 229.3202 371.2302 0.6457933
## 29 188.3720 217.8330 294.7416 0.7763492
## 57 195.3182 218.8777 304.3883 0.7615656
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 29.
##
## $`40380`
## Random Forest
##
## 905 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57), median imputation (57)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 3178.530 24.35459 3602.505 0.7246555
## 29 1911.570 15.97969 2553.785 0.8605411
## 57 1969.073 17.63183 2558.202 0.8594512
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 29.
##
## $`41660`
## Random Forest
##
## 905 samples
## 58 predictor
##
## Pre-processing: centered (58), scaled (58), median imputation (58)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 2800.403 14.825966 3315.517 0.5100332
## 30 1784.122 10.106161 2465.759 0.7295317
## 58 1736.895 9.920884 2451.605 0.7322425
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 58.
start <- proc.time()
DV_Fit.Rf.corr_no <-
DV_nzv_predict %>%
map(.f = function(a) {
fitControl =
trainControl(method = "timeslice",
initialWindow = period_train,
horizon = period_test,
fixedWindow = TRUE,
skip = skip_span,
summaryFunction = func_custom_accuracy_metrics
)
set.seed(123456789)
output =
train(el_rides ~ .,
data = a %>%
select(#-el_stop_id,
-data_use_el_stop_id
),
preProcess = c(#"nzv"
#"corr"
"center",
"scale",
"medianImpute"
),
na.action = na.pass,
method = "rf",
metric = "RMSE",
maximize = FALSE,
importance = TRUE,
trControl = fitControl,
verbose = TRUE
)
return(output)
}
)
time.Rf.corr_no <- proc.time() - start
message("DV_Fit.Rf.corr_no")
## DV_Fit.Rf.corr_no
DV_Fit.Rf.corr_no
## $`40600`
## Random Forest
##
## 905 samples
## 67 predictor
##
## Pre-processing: centered (67), scaled (67), median imputation (67)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 54.67882 13.346597 67.24735 0.7549971
## 34 31.66811 7.623250 47.57247 0.8770904
## 67 32.14927 7.684513 48.02464 0.8746478
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 34.
##
## $`41140`
## Random Forest
##
## 905 samples
## 68 predictor
##
## Pre-processing: centered (68), scaled (68), median imputation (68)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 71.72215 12.912940 85.38045 0.6104662
## 35 46.26375 7.993646 62.05579 0.7938126
## 68 45.72194 7.892781 61.70367 0.7961505
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 68.
##
## $`40120`
## Random Forest
##
## 905 samples
## 70 predictor
##
## Pre-processing: centered (70), scaled (70), median imputation (70)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 396.4919 17.159898 470.7127 0.6884465
## 36 205.8296 8.834800 306.5976 0.8674667
## 70 205.6014 8.680993 307.5658 0.8666173
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 36.
##
## $`40910`
## Random Forest
##
## 875 samples
## 69 predictor
##
## Pre-processing: centered (69), scaled (69), median imputation (69)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 285.8836 233.3822 380.8792 0.6271777
## 35 193.0397 220.7091 299.6599 0.7688630
## 69 195.3964 221.1577 303.9636 0.7622537
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 35.
##
## $`40380`
## Random Forest
##
## 905 samples
## 68 predictor
##
## Pre-processing: centered (68), scaled (68), median imputation (68)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 3437.994 27.17536 3805.897 0.6926114
## 35 1912.006 15.87499 2555.334 0.8604150
## 68 1981.270 17.60880 2575.045 0.8573869
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 35.
##
## $`41660`
## Random Forest
##
## 905 samples
## 70 predictor
##
## Pre-processing: centered (70), scaled (70), median imputation (70)
## Resampling: Rolling Forecasting Origin Resampling (212 held-out with a fixed window)
## Summary of sample sizes: 578, 578, 578, 578, 578, 578, ...
## Resampling results across tuning parameters:
##
## mtry MAE MAPE RMSE R2
## 2 2844.166 15.35689 3340.362 0.5033022
## 36 1835.743 10.35124 2490.086 0.7240566
## 70 1803.379 10.17824 2535.297 0.7131576
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 36.
stopCluster(cl)
rm(start, tot_cores, cl)
Compare the results.
# user system elapsed
# 61.039 5.166 527.512
# ~ 9 min
message("time.Rf.corr_yes")
## time.Rf.corr_yes
time.Rf.corr_yes
## user system elapsed
## 57.047 2.769 416.736
# user system elapsed
# 58.048 3.563 486.738
# ~ 8 min
message("time.Rf.corr_no")
## time.Rf.corr_no
time.Rf.corr_no
## user system elapsed
## 56.182 2.885 470.992
# Create a list of models
Models.Rf <-
pmap(.l = list(a = DV_Fit.Rf.corr_yes,
b = DV_Fit.Rf.corr_no
),
.f = function(a, b) {
l = list(Corr_No = a,
Corr_Yes = b
)
return(l)
}
)
# Resample the models
Resample_Results.Rf <-
Models.Rf %>%
map(~ resamples(.x)
)
# Generate a summary
Resample_Results.Rf %>%
map(~ summary(.x)
)
## $`40600`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 29.49156 30.29677 31.27463 31.38680 32.49142 33.53614 0
## Corr_Yes 29.10133 30.54453 31.41446 31.66811 32.68464 34.82144 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 6.849371 7.016811 7.340792 7.566142 8.013743 8.623327 0
## Corr_Yes 6.846320 7.010051 7.509362 7.623250 8.027933 8.788167 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.8568696 0.8638475 0.8893933 0.8788246 0.8923240 0.8942618 0
## Corr_Yes 0.8515189 0.8629410 0.8865375 0.8770904 0.8920949 0.8933530 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 43.85291 44.41439 44.82802 47.24439 50.77109 51.77616 0
## Corr_Yes 43.87476 44.68568 45.54589 47.57247 51.33672 52.95681 0
##
##
## $`41140`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 41.83843 44.57632 45.70248 45.72820 46.93409 49.13770 0
## Corr_Yes 41.05446 44.69314 45.96692 45.72194 46.31655 51.37393 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 7.612844 7.754832 7.923190 7.883337 7.966813 8.306049 0
## Corr_Yes 7.509003 7.763323 7.834721 7.892781 7.982655 8.622716 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.7796583 0.7875419 0.7925303 0.7986168 0.8001393 0.8409655 0
## Corr_Yes 0.7645538 0.7841397 0.7926616 0.7961505 0.7985534 0.8416195 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 57.48317 60.28750 61.17986 61.33798 62.44682 64.81715 0
## Corr_Yes 57.36486 60.41439 61.31194 61.70367 62.55303 67.00195 0
##
##
## $`40120`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 176.4748 186.2569 192.8253 203.0345 224.498 237.3381 0
## Corr_Yes 175.1231 185.2949 195.1465 205.8296 221.869 259.9216 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 7.406341 7.886926 8.051402 8.558621 9.418846 10.45619 0
## Corr_Yes 7.485117 7.992040 8.195726 8.834800 9.377135 11.43588 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.8347500 0.8489616 0.8803475 0.8699530 0.8850913 0.8966807 0
## Corr_Yes 0.8283214 0.8467642 0.8807202 0.8674667 0.8833842 0.8973781 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 269.0210 278.6145 283.1965 303.7084 335.4447 350.4054 0
## Corr_Yes 268.1115 283.0184 284.7382 306.5976 337.4272 367.6232 0
##
##
## $`40910`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 10
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 150.9585 179.6485 190.6999 188.3720 202.0979 215.3847 0
## Corr_Yes 155.3571 186.0088 194.3332 193.0397 204.7638 217.8496 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 5.498962 237.7162 240.9771 217.8330 244.4084 245.5900 0
## Corr_Yes 5.616745 243.4091 243.5759 220.7091 244.3935 248.8788 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.7504095 0.7573360 0.7627025 0.7763492 0.7802347 0.8681174 0
## Corr_Yes 0.7442099 0.7523486 0.7552171 0.7688630 0.7706389 0.8624315 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 207.6478 287.9799 306.4173 294.7416 313.9330 323.5713 0
## Corr_Yes 212.0767 294.7318 311.0596 299.6599 315.3143 327.5652 0
##
##
## $`40380`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 1377.077 1804.829 1879.122 1911.570 2050.309 2738.133 0
## Corr_Yes 1339.499 1785.645 1885.284 1912.006 2117.726 2617.534 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 10.25874 15.56809 16.13926 15.97969 16.70598 22.34793 0
## Corr_Yes 10.02759 15.23272 15.99324 15.87499 16.46497 21.06954 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.7748433 0.8527297 0.8600544 0.8605411 0.8705118 0.9065783 0
## Corr_Yes 0.7849040 0.8462781 0.8567119 0.8604150 0.8723439 0.9093752 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 2052.713 2423.448 2595.687 2553.785 2660.415 3314.556 0
## Corr_Yes 2021.752 2406.242 2597.468 2555.334 2652.086 3239.657 0
##
##
## $`41660`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 1578.035 1688.407 1737.096 1736.895 1788.231 1929.228 0
## Corr_Yes 1667.649 1751.958 1800.107 1835.743 1843.362 2254.031 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 8.986980 9.415665 9.817172 9.920884 10.21256 11.25040 0
## Corr_Yes 9.343517 9.631941 9.959996 10.351242 10.71342 12.76414 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.7102085 0.7208719 0.7222698 0.7322425 0.7336277 0.7932970 0
## Corr_Yes 0.6592414 0.7168635 0.7293069 0.7240566 0.7376489 0.7790171 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 2354.167 2396.771 2420.436 2451.605 2479.233 2643.995 0
## Corr_Yes 2317.457 2376.487 2450.175 2490.086 2555.142 2867.090 0
Resample_Results.Rf %>%
map(~ bwplot(.x)
)
## $`40600`
##
## $`41140`
##
## $`40120`
##
## $`40910`
##
## $`40380`
##
## $`41660`
After inspecting the results, we choose to keep the model that includes the correlation filter in the preprocessing stage - the results and runtimes were similar.
rm(list = ls(pattern = "corr_yes"))
saveRDS(DV_Fit.Rf.corr_no,
paste0(wd,
"/Models/",
"DV_Fit.Rf.corr_no.Rds"
)
)
saveRDS(time.Rf.corr_no,
paste0(wd,
"/Models/",
"time.Rf.corr_no.Rds"
)
)
# DV_Fit.Rf.corr_no <-
# readRDS(paste0(wd,
# "/Models/",
# "DV_Fit.Rf.corr_no.Rds"
# )
# )
# time.Rf.corr_no <-
# readRDS(paste0(wd,
# "/Models/",
# "time.Rf.corr_no.Rds"
# )
# )
Inspect varialbe importance.
# Permutation improtance is used for the variable importance
# Based on discussion here: http://parrt.cs.usfca.edu/doc/rf-importance/index.html
VI <- DV_Fit.Rf.corr_no %>%
map(~ varImp(.x,
type = 1,
scale = TRUE
)
)
VI
## $`40600`
## rf variable importance
##
## only 20 most important variables shown (out of 67)
##
## Overall
## el_rides_ma07 100.00
## el_rides_l28 89.15
## el_rides_l07 81.22
## el_rides_l21 58.40
## wday.lbl.Saturday 48.94
## el_rides_l14 43.28
## el_rides_ma14 40.43
## divvy_all_trip_cnt_cus_l7 37.61
## el_rides_ma28 36.25
## divvy_all_triptime_med_sub_l7 36.23
## wday.lbl.Sunday 34.11
## divvy_all_triptime_med_cus_l7 31.24
## el_date 31.18
## month.11 31.11
## divvy_all_trip_cnt_sub_l7 29.00
## el_rides_ma21 28.76
## mweek.1 24.61
## tmax_bands_l7.05_75to100 22.24
## wday.lbl.Monday 20.86
## divvy_all_triptime_mean_cus_l7 19.26
##
## $`41140`
## rf variable importance
##
## only 20 most important variables shown (out of 68)
##
## Overall
## el_rides_ma07 100.00
## el_rides_l07 80.94
## el_rides_l14 51.36
## el_rides_l21 49.01
## wday.lbl.Sunday 41.29
## el_date 40.78
## el_rides_ma14 34.91
## divvy_all_trip_cnt_cus_l7 34.11
## wday.lbl.Saturday 33.69
## el_rides_ma28 33.00
## divvy_all_triptime_med_sub_l7 31.76
## el_rides_l28 31.47
## divvy_all_trip_cnt_sub_l7 30.40
## el_rides_ma21 25.28
## wday.lbl.Friday 17.47
## divvy_mindist_triptime_mean_cus_l7 16.76
## tmax_bands.05_75to100 16.68
## month.9 16.66
## divvy_mindist_miles 16.63
## divvy_all_triptime_med_cus_l7 16.57
##
## $`40120`
## rf variable importance
##
## only 20 most important variables shown (out of 70)
##
## Overall
## el_rides_l07 100.00
## el_rides_ma07 91.88
## el_rides_l28 81.99
## el_rides_l14 53.57
## el_rides_l21 47.78
## divvy_all_trip_cnt_cus_l7 38.85
## wday.lbl.Saturday 37.85
## el_rides_ma14 37.19
## month.11 35.48
## el_date 34.36
## el_rides_ma28 33.92
## wday.lbl.Sunday 33.52
## divvy_all_triptime_med_sub_l7 32.89
## month.9 31.02
## wday.lbl.Monday 27.55
## el_rides_ma21 25.24
## divvy_all_triptime_mean_sub_l7 24.31
## divvy_all_trip_cnt_sub_l7 23.06
## divvy_all_triptime_med_cus_l7 22.31
## tmax_bands.05_75to100 18.93
##
## $`40910`
## rf variable importance
##
## only 20 most important variables shown (out of 69)
##
## Overall
## el_rides_l07 100.00
## el_rides_ma07 82.10
## wday.lbl.Saturday 77.04
## el_rides_l28 72.09
## wday.lbl.Sunday 64.27
## el_rides_l14 63.47
## el_rides_l21 53.70
## el_rides_ma14 47.24
## el_rides_ma28 46.21
## el_rides_ma21 41.10
## el_date 40.20
## divvy_all_trip_cnt_cus_l7 30.83
## divvy_all_triptime_med_sub_l7 29.18
## divvy_mindist_miles 24.32
## divvy_all_triptime_mean_sub_l7 22.89
## month.12 22.26
## quarter.4 22.18
## divvy_mindist_trip_cnt_sub_l7 22.16
## divvy_mindist_triptime_mean_sub_l7 21.99
## divvy_all_triptime_mean_cus_l7 20.11
##
## $`40380`
## rf variable importance
##
## only 20 most important variables shown (out of 68)
##
## Overall
## el_rides_ma07 100.00
## el_rides_l28 96.19
## el_rides_l07 88.16
## wday.lbl.Saturday 54.25
## el_rides_l14 50.78
## el_rides_l21 48.19
## month.11 43.00
## el_rides_ma14 41.21
## el_rides_ma28 36.70
## mweek.4 36.07
## wday.lbl.Monday 35.32
## el_rides_ma21 32.28
## divvy_all_trip_cnt_cus_l7 31.72
## divvy_all_triptime_med_cus_l7 28.99
## divvy_pt5mi_trip_cnt_sub_l7 27.12
## divvy_all_triptime_med_sub_l7 27.01
## el_date 26.74
## divvy_pt5mi_trip_cnt_cus_l7 24.49
## divvy_pt5mi_triptime_mean_sub_l7 23.15
## divvy_mindist_triptime_mean_cus_l7 22.57
##
## $`41660`
## rf variable importance
##
## only 20 most important variables shown (out of 70)
##
## Overall
## el_rides_l28 100.00
## el_rides_ma07 91.46
## el_rides_l07 85.83
## el_rides_l21 74.06
## el_rides_l14 73.02
## wday.lbl.Saturday 62.20
## el_rides_ma21 56.02
## el_rides_ma14 48.39
## el_date 47.23
## el_rides_ma28 45.74
## divvy_all_trip_cnt_cus_l7 41.55
## divvy_all_trip_cnt_sub_l7 37.04
## divvy_pt5mi_trip_cnt_sub_l7 34.58
## wday.lbl.Monday 31.42
## divvy_all_triptime_med_cus_l7 31.38
## divvy_mindist_trip_cnt_cus_l7 31.28
## divvy_all_triptime_med_sub_l7 31.21
## wday.lbl.Sunday 28.94
## month.12 27.64
## month.11 25.87
VI %>%
map(~ plot(.x, top = 20)
)
## $`40600`
##
## $`41140`
##
## $`40120`
##
## $`40910`
##
## $`40380`
##
## $`41660`
rm(VI)
Create one model with preprocessing that removes highly correlated variables, and one model that does not.
tot_cores <- detectCores()
cl <- makeCluster(tot_cores - 1)
registerDoParallel(cl)
start <- proc.time()
DV_Fit.Xgbtree.corr_yes <-
DV_corr_predict %>%
map(.f = function(a) {
fitControl =
trainControl(method = "timeslice",
initialWindow = period_train,
horizon = period_test,
fixedWindow = TRUE,
skip = skip_span,
summaryFunction = func_custom_accuracy_metrics
)
set.seed(123456789)
output =
train(el_rides ~ .,
data = a %>%
select(#-el_stop_id,
-data_use_el_stop_id
),
preProcess = c(#"nzv"
#"corr"
"center",
"scale",
"medianImpute"
),
na.action = na.pass,
method = "xgbTree",
metric = "RMSE",
maximize = FALSE,
importance = TRUE,
trControl = fitControl,
verbose = TRUE
)
return(output)
}
)
time.Xgbtree.corr_yes <- proc.time() - start
# message("DV_Fit.Xgbtree.corr_yes")
# DV_Fit.Xgbtree.corr_yes
start <- proc.time()
DV_Fit.Xgbtree.corr_no <-
DV_nzv_predict %>%
map(.f = function(a) {
fitControl =
trainControl(method = "timeslice",
initialWindow = period_train,
horizon = period_test,
fixedWindow = TRUE,
skip = skip_span,
summaryFunction = func_custom_accuracy_metrics
)
set.seed(123456789)
output =
train(el_rides ~ .,
data = a %>%
select(#-el_stop_id,
-data_use_el_stop_id
),
preProcess = c(#"nzv"
#"corr"
"center",
"scale",
"medianImpute"
),
na.action = na.pass,
method = "xgbTree",
metric = "RMSE",
maximize = FALSE,
importance = TRUE,
trControl = fitControl,
verbose = TRUE
)
return(output)
}
)
time.Xgbtree.corr_no <- proc.time() - start
# message("DV_Fit.Xgbtree.corr_no")
# DV_Fit.Xgbtree.corr_no
stopCluster(cl)
rm(start, tot_cores, cl)
Compare the results.
# user system elapsed
# 10.888 2.333 179.411
# ~ 3 min
message("time.Xgbtree.corr_yes")
## time.Xgbtree.corr_yes
time.Xgbtree.corr_yes
## user system elapsed
## 10.596 2.534 214.636
# user system elapsed
# 10.377 2.360 201.333
# ~ 3 min
message("time.Xgbtree.corr_no")
## time.Xgbtree.corr_no
time.Xgbtree.corr_no
## user system elapsed
## 10.379 2.605 238.173
# Create a list of models
Models.Xgbtree <-
pmap(.l = list(a = DV_Fit.Xgbtree.corr_yes,
b = DV_Fit.Xgbtree.corr_no
),
.f = function(a, b) {
l = list(Corr_No = a,
Corr_Yes = b
)
return(l)
}
)
# Resample the models
Resample_Results.Xgbtree <-
Models.Xgbtree %>%
map(~ resamples(.x)
)
# Generate a summary
Resample_Results.Xgbtree %>%
map(~ summary(.x)
)
## $`40600`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 32.19303 35.55575 37.49635 39.07401 40.58886 57.36103 0
## Corr_Yes 33.27039 34.71990 36.09276 37.70206 40.52589 45.22524 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 7.969871 8.645468 9.526889 9.679793 10.533630 12.01006 0
## Corr_Yes 7.446816 8.064649 8.922381 9.062954 9.074494 12.00199 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.7476391 0.7828786 0.8228306 0.8183967 0.8581394 0.8753876 0
## Corr_Yes 0.7329532 0.8212722 0.8402651 0.8280687 0.8649636 0.8769419 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 47.93771 50.79815 57.11459 57.56327 63.26301 69.03943 0
## Corr_Yes 47.30833 49.90248 54.26858 55.97031 57.68864 71.65853 0
##
##
## $`41140`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 44.25420 53.49472 59.14318 61.65002 65.98962 88.99007 0
## Corr_Yes 47.04107 53.15157 60.99780 61.28252 69.37796 80.08106 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 7.889643 8.990736 10.38354 10.39353 11.16079 14.11260 0
## Corr_Yes 8.282878 9.668258 10.63027 10.50186 11.44832 13.67824 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.4088250 0.6208651 0.6657441 0.6513493 0.7324304 0.7945801 0
## Corr_Yes 0.4807594 0.6239036 0.6791110 0.6686902 0.7177186 0.7735332 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 63.05710 69.90978 80.84457 79.89923 82.59053 103.68635 0
## Corr_Yes 66.20869 73.18454 77.04980 78.21456 85.16411 95.90091 0
##
##
## $`40120`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 193.4535 221.8841 260.6418 277.4664 313.4230 438.6116 0
## Corr_Yes 207.1631 230.2994 244.5939 273.2633 293.6921 403.2901 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 8.253393 9.708229 11.397093 11.68554 13.81434 16.91666 0
## Corr_Yes 7.900832 8.928526 9.569002 10.98873 13.65961 15.28340 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.6536373 0.7481348 0.8024843 0.7913396 0.8326028 0.8685105 0
## Corr_Yes 0.6900278 0.7599979 0.8316271 0.8015114 0.8447884 0.8775462 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 300.5154 350.4285 378.9381 381.7610 426.4795 491.6738 0
## Corr_Yes 292.6610 318.6677 352.5948 372.2966 436.5678 464.2249 0
##
##
## $`40910`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 10
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 182.5477 198.9882 215.4499 218.0884 242.3456 250.2286 0
## Corr_Yes 195.9871 204.0963 217.1693 222.7655 236.2191 276.7918 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 6.831365 231.9860 237.0799 216.7938 244.9702 255.6546 0
## Corr_Yes 7.189624 237.2212 243.3488 220.8403 247.4934 258.6670 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.6269560 0.6862052 0.7230603 0.7171828 0.7377268 0.8024803 0
## Corr_Yes 0.6414896 0.6880251 0.7105813 0.7116975 0.7439550 0.7860860 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 254.1199 319.2051 327.0842 331.2105 356.0990 393.3169 0
## Corr_Yes 264.4558 311.1674 338.3910 334.6585 354.3217 387.7991 0
##
##
## $`40380`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 1290.088 1674.217 1795.734 1808.918 1909.690 2217.246 0
## Corr_Yes 1330.699 1660.514 1702.089 1725.475 1867.537 2043.224 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 10.70217 12.30182 14.16940 14.82862 14.71599 21.66701 0
## Corr_Yes 10.49416 11.54394 12.82834 14.15156 14.30425 20.82751 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.8105004 0.8375893 0.8504863 0.8540175 0.8682326 0.9090692 0
## Corr_Yes 0.8173254 0.8561397 0.8640984 0.8619001 0.8828332 0.9082951 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 2025.838 2462.633 2629.306 2615.267 2706.523 3089.312 0
## Corr_Yes 2034.443 2305.265 2537.967 2544.655 2639.140 3033.171 0
##
##
## $`41660`
##
## Call:
## summary.resamples(object = .x)
##
## Models: Corr_No, Corr_Yes
## Number of resamples: 13
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 1461.259 1762.976 2123.482 2380.587 2846.651 3324.812 0
## Corr_Yes 1442.951 1507.182 2004.050 2180.979 2302.439 3799.306 0
##
## MAPE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 9.317611 9.901048 13.96259 13.17253 14.31943 18.61874 0
## Corr_Yes 8.296165 9.322468 11.69481 12.24441 13.71458 19.21908 0
##
## R2
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 0.3343301 0.4746273 0.6426096 0.5737701 0.7358578 0.7649216 0
## Corr_Yes 0.1613235 0.5885666 0.6736276 0.6114771 0.7597928 0.7944904 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## Corr_No 2320.089 2464.427 3095.532 3045.560 3310.782 3856.815 0
## Corr_Yes 2056.723 2362.283 2816.765 2876.031 3150.414 4194.559 0
Resample_Results.Xgbtree %>%
map(~ bwplot(.x)
)
## $`40600`
##
## $`41140`
##
## $`40120`
##
## $`40910`
##
## $`40380`
##
## $`41660`
After inspecting the results, we choose to keep the model that does NOT include the correlation filter in the preprocessing stage - the results were similar, and the run time was about half as long.
rm(list = ls(pattern = "Xgbtree.corr_no"))
saveRDS(DV_Fit.Xgbtree.corr_yes,
paste0(wd,
"/Models/",
"DV_Fit.Xgbtree.corr_yes.Rds"
)
)
saveRDS(time.Xgbtree.corr_yes,
paste0(wd,
"/Models/",
"time.Xgbtree.corr_yes.Rds"
)
)
# DV_Fit.Xgbtree.corr_yes <-
# readRDS(paste0(wd,
# "/Models/",
# "DV_Fit.Xgbtree.corr_yes.Rds"
# )
# )
# time.Xgbtree.corr_yes <-
# readRDS(paste0(wd,
# "/Models/",
# "time.Xgbtree.corr_yes.Rds"
# )
# )
Inspect varialbe importance.
# Permutation improtance is used for the variable importance
# Based on discussion here: http://parrt.cs.usfca.edu/doc/rf-importance/index.html
VI <- DV_Fit.Xgbtree.corr_yes %>%
map(~ varImp(.x,
type = 1,
scale = TRUE
)
)
VI
## $`40600`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## wday.lbl.Sunday 100.00000
## wday.lbl.Saturday 54.21841
## el_date 10.13680
## month.1 4.19157
## month.10 1.99296
## month.9 1.20995
## month.12 0.83474
## wday.lbl.Monday 0.51056
## month.3 0.28139
## wday.lbl.Tuesday 0.22407
## wday.lbl.Wednesday 0.19511
## mweek.2 0.14645
## mweek.4 0.09541
## month.2 0.09030
## month.7 0.05361
## mweek.1 0.03273
## divvy_all_trip_cnt_sub_l7 0.00000
## month.8 0.00000
## wday.lbl.Thursday 0.00000
## month.6 0.00000
##
## $`41140`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## el_date 100.0000
## wday.lbl.Sunday 54.9947
## wday.lbl.Saturday 27.9666
## quarter.1 3.9540
## quarter.3 2.9215
## month.1 1.9928
## month.12 1.0470
## wday.lbl.Friday 0.8736
## mweek.2 0.5172
## month.10 0.4596
## wday.lbl.Monday 0.3824
## mweek.1 0.3015
## month.9 0.2852
## month.11 0.2143
## wday.lbl.Thursday 0.1743
## month.8 0.1549
## mweek.4 0.1425
## wday.lbl.Tuesday 0.1368
## month.4 0.1275
## month.6 0.1143
##
## $`40120`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 55)
##
## Overall
## wday.lbl.Sunday 100.0000
## wday.lbl.Saturday 68.7862
## el_date 26.0470
## wday.lbl.Thursday 2.4572
## mweek.5 2.2036
## month.10 2.1857
## month.12 1.9718
## month.1 1.9460
## month.9 1.5397
## mweek.4 1.2234
## month.11 0.7489
## wday.lbl.Tuesday 0.7077
## wday.lbl.Wednesday 0.6385
## wday.lbl.Friday 0.6051
## quarter.4 0.5559
## quarter.1 0.4561
## wday.lbl.Monday 0.4460
## month.5 0.3966
## quarter.3 0.3237
## mweek.1 0.3165
##
## $`40910`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## el_date 100.0000
## wday.lbl.Sunday 27.5656
## wday.lbl.Saturday 14.5501
## year.2014 2.2619
## month.10 1.5704
## month.9 1.5558
## divvy_all_triptime_med_cus_l7 1.5419
## divvy_all_triptime_mean_cus_l7 1.3195
## divvy_all_trip_cnt_cus_l7 1.2040
## tmax_bands_l7.02_00to25 0.6750
## wday.lbl.Monday 0.4040
## tmax_bands_l7.04_50to75 0.3767
## mweek.5 0.3318
## tmin_bands_l7.03_00to25 0.3060
## tmin_bands_l7.04_25to50 0.2792
## quarter.4 0.2610
## month.11 0.2323
## mweek.4 0.2162
## month.1 0.1861
## wday.lbl.Friday 0.1359
##
## $`40380`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## wday.lbl.Sunday 100.00000
## wday.lbl.Saturday 84.92598
## el_date 8.96964
## month.12 1.57564
## month.10 0.84088
## quarter.3 0.77306
## month.1 0.73530
## wday.lbl.Tuesday 0.63052
## wday.lbl.Wednesday 0.45522
## mweek.5 0.41161
## mweek.3 0.37522
## mweek.2 0.35298
## month.6 0.29304
## wday.lbl.Thursday 0.23105
## tmax_bands.04_50to75 0.20358
## month.11 0.19781
## month.2 0.08966
## month.8 0.07502
## mweek.4 0.06973
## wday.lbl.Monday 0.06943
##
## $`41660`
## xgbTree variable importance
##
## only 20 most important variables shown (out of 58)
##
## Overall
## wday.lbl.Sunday 100.0000
## wday.lbl.Saturday 43.1934
## el_date 37.7939
## month.1 5.0212
## quarter.1 2.1457
## wday.lbl.Monday 1.7308
## month.12 1.3121
## wday.lbl.Friday 1.1027
## mweek.3 0.7702
## quarter.3 0.6994
## month.2 0.6061
## mweek.5 0.4890
## mweek.2 0.4380
## month.10 0.3641
## wday.lbl.Thursday 0.3233
## year.2014 0.2937
## mweek.4 0.2693
## wday.lbl.Wednesday 0.2294
## month.11 0.2073
## month.5 0.1991
VI %>%
map(~ plot(.x, top = 20)
)
## $`40600`
##
## $`41140`
##
## $`40120`
##
## $`40910`
##
## $`40380`
##
## $`41660`
rm(VI)