Title: Multiple Caret Run: Naive Bayes, KNN, C5.0 with AA Test-harness dataset
References:
Libraries:
Libraries = c("doMC", "caret", "klaR", "e1071",
"ggplot2", "lattice", "C50", "plyr",
"caretEnsemble")
# Install if not present
for(p in Libraries){
if(!require(p, character.only = TRUE)) { install.packages(p) }
library(p, character.only = TRUE)
}
Import Data and Data Handling
test_harness_paa <- read.csv("test_harness_paa.csv")
test_harness_paa <- test_harness_paa[, -c(2,3)]
Class <- as.factor(test_harness_paa$Class)
Partition Data
set.seed(1000)
index <- createDataPartition(test_harness_paa$Class, p = 0.8, list = FALSE)
# Create Training Data
training_data <- test_harness_paa[ index,]
test_data <- test_harness_paa[-index,]
Mulitple Model Run
set.seed(1000)
registerDoMC(cores = 3)
start_time <- Sys.time() # Start timer
# Create models
econtrol <- trainControl(allowParallel = TRUE)
algos_3_test <- c("knn", "C5.0", "nb")
model_list <- caretList(Class ~ .,
data = training_data,
methodList = algos_3_test,
trControl = econtrol)
## Warning in trControlCheck(x = trControl, y = target): trControl
## $savePredictions not 'all' or 'final'. Setting to 'final' so we can
## ensemble the models.
## Warning in trControlCheck(x = trControl, y = target): indexes not defined
## in trControl. Attempting to set them ourselves, so each model in the
## ensemble will have the same resampling indexes.
end_time <- Sys.time() # End timer
end_time - start_time # Display time
## Time difference of 3.551673 mins
results <- resamples(model_list)
# What is model correlation?
mcr <-modelCor(results)
print (mcr)
## knn C5.0 nb
## knn 1.0000000 0.5758163 0.4399854
## C5.0 0.5758163 1.0000000 0.3043800
## nb 0.4399854 0.3043800 1.0000000
Machine Settings:
Sys.info()[c(1:3,5)]
## sysname
## "Linux"
## release
## "4.15.0-50-generic"
## version
## "#54~16.04.1-Ubuntu SMP Wed May 8 15:55:19 UTC 2019"
## machine
## "x86_64"
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.3
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] caretEnsemble_2.0.0 plyr_1.8.4 C50_0.1.2
## [4] e1071_1.7-1 klaR_0.6-14 MASS_7.3-51.4
## [7] caret_6.0-84 ggplot2_3.1.1 lattice_0.20-38
## [10] doMC_1.3.5 iterators_1.0.10 foreach_1.4.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 mvtnorm_1.0-8 lubridate_1.7.4
## [4] class_7.3-15 assertthat_0.2.1 digest_0.6.19
## [7] ipred_0.9-9 mime_0.6 R6_2.4.0
## [10] stats4_3.4.4 evaluate_0.13 highr_0.8
## [13] pillar_1.4.0 rlang_0.3.4 lazyeval_0.2.2
## [16] rstudioapi_0.10 data.table_1.12.2 miniUI_0.1.1.1
## [19] partykit_1.2-4 rpart_4.1-15 Matrix_1.2-17
## [22] combinat_0.0-8 rmarkdown_1.12 splines_3.4.4
## [25] gower_0.2.1 stringr_1.4.0 questionr_0.7.0
## [28] munsell_0.5.0 Cubist_0.2.2 shiny_1.3.2
## [31] compiler_3.4.4 httpuv_1.5.1 xfun_0.7
## [34] pkgconfig_2.0.2 libcoin_1.0-4 htmltools_0.3.6
## [37] nnet_7.3-12 tidyselect_0.2.5 gridExtra_2.3
## [40] tibble_2.1.1 prodlim_2018.04.18 codetools_0.2-16
## [43] crayon_1.3.4 dplyr_0.8.1 withr_2.1.2
## [46] later_0.8.0 recipes_0.1.5 ModelMetrics_1.2.2
## [49] grid_3.4.4 nlme_3.1-140 xtable_1.8-4
## [52] gtable_0.3.0 magrittr_1.5 scales_1.0.0
## [55] pbapply_1.4-0 stringi_1.4.3 reshape2_1.4.3
## [58] promises_1.0.1 timeDate_3043.102 generics_0.0.2
## [61] Formula_1.2-3 lava_1.6.5 tools_3.4.4
## [64] glue_1.3.1 purrr_0.3.2 survival_2.44-1.1
## [67] yaml_2.2.0 colorspace_1.4-1 inum_1.0-1
## [70] knitr_1.23
EOF