Title: Multiple Caret Run: Naive Bayes, KNN, C5.0 with AA Test-harness dataset

References:

Libraries:

Libraries = c("doMC", "caret", "klaR", "e1071",
              "ggplot2", "lattice", "C50", "plyr", 
              "caretEnsemble")

# Install if not present
for(p in Libraries){
    if(!require(p, character.only = TRUE)) { install.packages(p) }
    library(p, character.only = TRUE)
}

Import Data and Data Handling

test_harness_paa <- read.csv("test_harness_paa.csv")
test_harness_paa <- test_harness_paa[, -c(2,3)]
Class <- as.factor(test_harness_paa$Class)

Partition Data

set.seed(1000)

index <- createDataPartition(test_harness_paa$Class, p = 0.8, list = FALSE)

# Create Training Data 
training_data <- test_harness_paa[ index,]
test_data     <- test_harness_paa[-index,]

Mulitple Model Run

set.seed(1000)
registerDoMC(cores = 3)

start_time <- Sys.time() # Start timer

# Create models
econtrol <- trainControl(allowParallel = TRUE)
algos_3_test <- c("knn", "C5.0", "nb")

model_list <- caretList(Class ~ ., 
                        data = training_data,
                        methodList = algos_3_test,
                        trControl = econtrol)
## Warning in trControlCheck(x = trControl, y = target): trControl
## $savePredictions not 'all' or 'final'. Setting to 'final' so we can
## ensemble the models.
## Warning in trControlCheck(x = trControl, y = target): indexes not defined
## in trControl. Attempting to set them ourselves, so each model in the
## ensemble will have the same resampling indexes.
end_time <- Sys.time()   # End timer
end_time - start_time    # Display time
## Time difference of 3.551673 mins
results <- resamples(model_list)

# What is model correlation?
mcr <-modelCor(results)
print (mcr)
##            knn      C5.0        nb
## knn  1.0000000 0.5758163 0.4399854
## C5.0 0.5758163 1.0000000 0.3043800
## nb   0.4399854 0.3043800 1.0000000

Machine Settings:

Sys.info()[c(1:3,5)]
##                                              sysname 
##                                              "Linux" 
##                                              release 
##                                  "4.15.0-50-generic" 
##                                              version 
## "#54~16.04.1-Ubuntu SMP Wed May 8 15:55:19 UTC 2019" 
##                                              machine 
##                                             "x86_64"
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 18.3
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] caretEnsemble_2.0.0 plyr_1.8.4          C50_0.1.2          
##  [4] e1071_1.7-1         klaR_0.6-14         MASS_7.3-51.4      
##  [7] caret_6.0-84        ggplot2_3.1.1       lattice_0.20-38    
## [10] doMC_1.3.5          iterators_1.0.10    foreach_1.4.4      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1         mvtnorm_1.0-8      lubridate_1.7.4   
##  [4] class_7.3-15       assertthat_0.2.1   digest_0.6.19     
##  [7] ipred_0.9-9        mime_0.6           R6_2.4.0          
## [10] stats4_3.4.4       evaluate_0.13      highr_0.8         
## [13] pillar_1.4.0       rlang_0.3.4        lazyeval_0.2.2    
## [16] rstudioapi_0.10    data.table_1.12.2  miniUI_0.1.1.1    
## [19] partykit_1.2-4     rpart_4.1-15       Matrix_1.2-17     
## [22] combinat_0.0-8     rmarkdown_1.12     splines_3.4.4     
## [25] gower_0.2.1        stringr_1.4.0      questionr_0.7.0   
## [28] munsell_0.5.0      Cubist_0.2.2       shiny_1.3.2       
## [31] compiler_3.4.4     httpuv_1.5.1       xfun_0.7          
## [34] pkgconfig_2.0.2    libcoin_1.0-4      htmltools_0.3.6   
## [37] nnet_7.3-12        tidyselect_0.2.5   gridExtra_2.3     
## [40] tibble_2.1.1       prodlim_2018.04.18 codetools_0.2-16  
## [43] crayon_1.3.4       dplyr_0.8.1        withr_2.1.2       
## [46] later_0.8.0        recipes_0.1.5      ModelMetrics_1.2.2
## [49] grid_3.4.4         nlme_3.1-140       xtable_1.8-4      
## [52] gtable_0.3.0       magrittr_1.5       scales_1.0.0      
## [55] pbapply_1.4-0      stringi_1.4.3      reshape2_1.4.3    
## [58] promises_1.0.1     timeDate_3043.102  generics_0.0.2    
## [61] Formula_1.2-3      lava_1.6.5         tools_3.4.4       
## [64] glue_1.3.1         purrr_0.3.2        survival_2.44-1.1 
## [67] yaml_2.2.0         colorspace_1.4-1   inum_1.0-1        
## [70] knitr_1.23

EOF