This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

START TIME

t <- proc.time()

LOAD THE NECESSARY LIBRARIES

# For manipulating the datasets
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(readxl)

# For plotting correlation matrix
library(ggcorrplot)
## Loading required package: ggplot2
# Machine Learning library
library(caret)
## Loading required package: lattice
library(catboost)

# For Multi-core processing support
library(parallel)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators

OPEN THE CLUSTER

cl <- makePSOCKcluster(2)
registerDoParallel(cl)

GET THE DATA

Load the datasets

#Numerical dataset
dataset_num <- read_excel("rice.xlsx")

#Categorical dataset
dataset_cat <- read.csv("mushrooms.csv")

#Mix dataset
dataset_mix <- read_excel("bank.xlsx")

CLEAN, PREPARE & MANIPULATE THE DATA

dataset_cat %>% group_by(VEIL.TYPE) %>% summarise(total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
#Eliminate VEIL.TYPE since it only has one value
dataset_cat <- dataset_cat %>% select(-VEIL.TYPE)

dataset_cat %>% group_by(STALK.ROOT) %>% summarise(total=n())
## `summarise()` ungrouping output (override with `.groups` argument)
#Eliminate STALK.ROOT since it has missing values
dataset_cat <- dataset_cat %>% select(-STALK.ROOT)

All character columns to factor

dataset_num$CLASS <- as.factor(dataset_num$CLASS)

dataset_cat <- mutate_if(dataset_cat, is.character, as.factor)

dataset_mix <- mutate_if(dataset_mix, is.character, as.factor)

#CATBOOST

catboost_function <- function(dataset){

#Split train and test
trainIndex <- createDataPartition(dataset$CLASS, p=0.80, list=FALSE)
data_train <- dataset[ trainIndex,]
data_test <-  dataset[-trainIndex,]

#Train the model
fitControl <- trainControl(method="repeatedcv", 
                     repeats = 2,
                     number = 5, 
                     returnResamp = 'final',
                     savePredictions = 'final',
                     verboseIter = T,
                     allowParallel = T)

catboost_model <- train(
               x = data_train[,!(names(data_train) %in% c("CLASS"))],
               y = data_train$CLASS,
               method = catboost.caret,
               trControl = fitControl)

catboost_model

#Predict results
catboost_predictions=predict(catboost_model,data_test)
confusionMatrix(catboost_predictions,as.factor(data_test$CLASS))
}

#RANDOM FOREST

rf_function <- function(dataset){

#Split train and test
trainIndex <- createDataPartition(dataset$CLASS, p=0.80, list=FALSE)
data_train <- dataset[ trainIndex,]
data_test <-  dataset[-trainIndex,]

#Train the model
fitControl <- trainControl(method="repeatedcv", 
                     repeats = 2,
                     number = 5, 
                     returnResamp = 'final',
                     savePredictions = 'final',
                     verboseIter = T,
                     allowParallel = T)

train_formula<-formula(CLASS~.)
rf_model <- train(train_formula,
               data = data_train,
               method = "rf",
               trControl = fitControl)

rf_model

#Predict results
rf_predictions=predict(rf_model,data_test)
confusionMatrix(rf_predictions,as.factor(data_test$CLASS))
}

#EXECUTE CATBOOST AND RANDOMFOREST IN EACH DATASET

##Numerical Dataset

#Start time
t1 <- proc.time()

catboost_function(dataset_num)
## Warning: The `i` argument of ``[`()` can't be a matrix as of tibble 3.0.0.
## Convert to a vector.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Aggregating results
## Selecting tuning parameters
## Fitting depth = 6, learning_rate = 0.0498, iterations = 100, l2_leaf_reg = 1e-06, rsm = 0.9, border_count = 255 on full training set
## Warning: Setting row names on a tibble is deprecated.
## 0:   learn: 0.6565769    total: 149ms    remaining: 14.7s
## 1:   learn: 0.6233721    total: 153ms    remaining: 7.49s
## 2:   learn: 0.5926802    total: 156ms    remaining: 5.06s
## 3:   learn: 0.5651742    total: 160ms    remaining: 3.83s
## 4:   learn: 0.5393602    total: 164ms    remaining: 3.11s
## 5:   learn: 0.5160615    total: 167ms    remaining: 2.62s
## 6:   learn: 0.4946913    total: 170ms    remaining: 2.26s
## 7:   learn: 0.4751261    total: 173ms    remaining: 1.99s
## 8:   learn: 0.4565163    total: 176ms    remaining: 1.78s
## 9:   learn: 0.4391522    total: 179ms    remaining: 1.61s
## 10:  learn: 0.4231026    total: 182ms    remaining: 1.48s
## 11:  learn: 0.4082204    total: 186ms    remaining: 1.36s
## 12:  learn: 0.3948492    total: 189ms    remaining: 1.26s
## 13:  learn: 0.3821495    total: 192ms    remaining: 1.18s
## 14:  learn: 0.3704653    total: 195ms    remaining: 1.1s
## 15:  learn: 0.3590292    total: 198ms    remaining: 1.04s
## 16:  learn: 0.3484852    total: 201ms    remaining: 982ms
## 17:  learn: 0.3387822    total: 204ms    remaining: 930ms
## 18:  learn: 0.3296860    total: 207ms    remaining: 884ms
## 19:  learn: 0.3211705    total: 210ms    remaining: 842ms
## 20:  learn: 0.3131427    total: 213ms    remaining: 802ms
## 21:  learn: 0.3053956    total: 216ms    remaining: 767ms
## 22:  learn: 0.2980851    total: 219ms    remaining: 735ms
## 23:  learn: 0.2917024    total: 222ms    remaining: 704ms
## 24:  learn: 0.2854329    total: 225ms    remaining: 676ms
## 25:  learn: 0.2795508    total: 230ms    remaining: 655ms
## 26:  learn: 0.2739456    total: 233ms    remaining: 631ms
## 27:  learn: 0.2690711    total: 237ms    remaining: 609ms
## 28:  learn: 0.2639438    total: 240ms    remaining: 587ms
## 29:  learn: 0.2591860    total: 243ms    remaining: 566ms
## 30:  learn: 0.2546290    total: 246ms    remaining: 547ms
## 31:  learn: 0.2505347    total: 249ms    remaining: 529ms
## 32:  learn: 0.2467858    total: 252ms    remaining: 512ms
## 33:  learn: 0.2430415    total: 255ms    remaining: 495ms
## 34:  learn: 0.2394067    total: 258ms    remaining: 480ms
## 35:  learn: 0.2360497    total: 261ms    remaining: 464ms
## 36:  learn: 0.2330366    total: 264ms    remaining: 450ms
## 37:  learn: 0.2297277    total: 268ms    remaining: 437ms
## 38:  learn: 0.2266661    total: 271ms    remaining: 423ms
## 39:  learn: 0.2237936    total: 274ms    remaining: 410ms
## 40:  learn: 0.2211592    total: 277ms    remaining: 398ms
## 41:  learn: 0.2185949    total: 279ms    remaining: 386ms
## 42:  learn: 0.2160258    total: 283ms    remaining: 375ms
## 43:  learn: 0.2136687    total: 286ms    remaining: 364ms
## 44:  learn: 0.2114214    total: 289ms    remaining: 353ms
## 45:  learn: 0.2092412    total: 292ms    remaining: 343ms
## 46:  learn: 0.2072914    total: 295ms    remaining: 333ms
## 47:  learn: 0.2055936    total: 299ms    remaining: 324ms
## 48:  learn: 0.2038581    total: 303ms    remaining: 315ms
## 49:  learn: 0.2021939    total: 305ms    remaining: 305ms
## 50:  learn: 0.2006988    total: 309ms    remaining: 297ms
## 51:  learn: 0.1991199    total: 312ms    remaining: 288ms
## 52:  learn: 0.1978599    total: 315ms    remaining: 279ms
## 53:  learn: 0.1965361    total: 318ms    remaining: 271ms
## 54:  learn: 0.1952260    total: 321ms    remaining: 263ms
## 55:  learn: 0.1940856    total: 325ms    remaining: 255ms
## 56:  learn: 0.1925847    total: 328ms    remaining: 247ms
## 57:  learn: 0.1915455    total: 331ms    remaining: 240ms
## 58:  learn: 0.1904831    total: 334ms    remaining: 232ms
## 59:  learn: 0.1893805    total: 337ms    remaining: 225ms
## 60:  learn: 0.1884513    total: 340ms    remaining: 217ms
## 61:  learn: 0.1873141    total: 343ms    remaining: 210ms
## 62:  learn: 0.1865668    total: 346ms    remaining: 203ms
## 63:  learn: 0.1857137    total: 349ms    remaining: 196ms
## 64:  learn: 0.1847696    total: 352ms    remaining: 189ms
## 65:  learn: 0.1839835    total: 355ms    remaining: 183ms
## 66:  learn: 0.1832257    total: 358ms    remaining: 176ms
## 67:  learn: 0.1825024    total: 361ms    remaining: 170ms
## 68:  learn: 0.1815570    total: 364ms    remaining: 164ms
## 69:  learn: 0.1809153    total: 367ms    remaining: 157ms
## 70:  learn: 0.1799917    total: 370ms    remaining: 151ms
## 71:  learn: 0.1791766    total: 389ms    remaining: 151ms
## 72:  learn: 0.1784891    total: 392ms    remaining: 145ms
## 73:  learn: 0.1778771    total: 395ms    remaining: 139ms
## 74:  learn: 0.1772184    total: 398ms    remaining: 133ms
## 75:  learn: 0.1764401    total: 401ms    remaining: 127ms
## 76:  learn: 0.1760319    total: 404ms    remaining: 121ms
## 77:  learn: 0.1755020    total: 407ms    remaining: 115ms
## 78:  learn: 0.1749951    total: 410ms    remaining: 109ms
## 79:  learn: 0.1742342    total: 413ms    remaining: 103ms
## 80:  learn: 0.1737224    total: 417ms    remaining: 97.7ms
## 81:  learn: 0.1730289    total: 420ms    remaining: 92.2ms
## 82:  learn: 0.1724515    total: 423ms    remaining: 86.6ms
## 83:  learn: 0.1719595    total: 426ms    remaining: 81.1ms
## 84:  learn: 0.1715425    total: 429ms    remaining: 75.7ms
## 85:  learn: 0.1712157    total: 432ms    remaining: 70.3ms
## 86:  learn: 0.1706916    total: 435ms    remaining: 65ms
## 87:  learn: 0.1701373    total: 438ms    remaining: 59.8ms
## 88:  learn: 0.1697693    total: 441ms    remaining: 54.5ms
## 89:  learn: 0.1692333    total: 445ms    remaining: 49.5ms
## 90:  learn: 0.1686698    total: 448ms    remaining: 44.3ms
## 91:  learn: 0.1680853    total: 451ms    remaining: 39.3ms
## 92:  learn: 0.1676725    total: 455ms    remaining: 34.2ms
## 93:  learn: 0.1672796    total: 458ms    remaining: 29.2ms
## 94:  learn: 0.1664915    total: 461ms    remaining: 24.2ms
## 95:  learn: 0.1661010    total: 464ms    remaining: 19.3ms
## 96:  learn: 0.1653447    total: 467ms    remaining: 14.4ms
## 97:  learn: 0.1650469    total: 470ms    remaining: 9.59ms
## 98:  learn: 0.1645316    total: 473ms    remaining: 4.78ms
## 99:  learn: 0.1640056    total: 476ms    remaining: 0us
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Cammeo Osmancik
##   Cammeo      303       26
##   Osmancik     23      410
##                                           
##                Accuracy : 0.9357          
##                  95% CI : (0.9159, 0.9521)
##     No Information Rate : 0.5722          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8688          
##                                           
##  Mcnemar's Test P-Value : 0.7751          
##                                           
##             Sensitivity : 0.9294          
##             Specificity : 0.9404          
##          Pos Pred Value : 0.9210          
##          Neg Pred Value : 0.9469          
##              Prevalence : 0.4278          
##          Detection Rate : 0.3976          
##    Detection Prevalence : 0.4318          
##       Balanced Accuracy : 0.9349          
##                                           
##        'Positive' Class : Cammeo          
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##    2.02    0.18   56.50
#Start time
t1 <- proc.time()

rf_function(dataset_num)
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 2 on full training set
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Cammeo Osmancik
##   Cammeo      300       29
##   Osmancik     26      407
##                                           
##                Accuracy : 0.9278          
##                  95% CI : (0.9071, 0.9452)
##     No Information Rate : 0.5722          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8527          
##                                           
##  Mcnemar's Test P-Value : 0.7874          
##                                           
##             Sensitivity : 0.9202          
##             Specificity : 0.9335          
##          Pos Pred Value : 0.9119          
##          Neg Pred Value : 0.9400          
##              Prevalence : 0.4278          
##          Detection Rate : 0.3937          
##    Detection Prevalence : 0.4318          
##       Balanced Accuracy : 0.9269          
##                                           
##        'Positive' Class : Cammeo          
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##    2.10    0.05   21.52

##Categorical dataset

#Start time
t1 <- proc.time()

catboost_function(dataset_cat)
## Aggregating results
## Selecting tuning parameters
## Fitting depth = 4, learning_rate = 0.135, iterations = 100, l2_leaf_reg = 1e-06, rsm = 0.9, border_count = 255 on full training set
## 0:   learn: 0.2739241    total: 14ms remaining: 1.39s
## 1:   learn: 0.1471262    total: 26.4ms   remaining: 1.29s
## 2:   learn: 0.1005981    total: 34.3ms   remaining: 1.11s
## 3:   learn: 0.0821546    total: 47.6ms   remaining: 1.14s
## 4:   learn: 0.0540418    total: 54ms remaining: 1.02s
## 5:   learn: 0.0353591    total: 62ms remaining: 972ms
## 6:   learn: 0.0192205    total: 68.5ms   remaining: 910ms
## 7:   learn: 0.0159398    total: 75.6ms   remaining: 870ms
## 8:   learn: 0.0133189    total: 82.7ms   remaining: 836ms
## 9:   learn: 0.0117904    total: 89.3ms   remaining: 804ms
## 10:  learn: 0.0106359    total: 97.3ms   remaining: 788ms
## 11:  learn: 0.0087630    total: 103ms    remaining: 756ms
## 12:  learn: 0.0063074    total: 111ms    remaining: 743ms
## 13:  learn: 0.0053043    total: 117ms    remaining: 718ms
## 14:  learn: 0.0046528    total: 122ms    remaining: 694ms
## 15:  learn: 0.0037141    total: 130ms    remaining: 685ms
## 16:  learn: 0.0031600    total: 136ms    remaining: 666ms
## 17:  learn: 0.0025516    total: 144ms    remaining: 654ms
## 18:  learn: 0.0021193    total: 149ms    remaining: 635ms
## 19:  learn: 0.0017337    total: 155ms    remaining: 619ms
## 20:  learn: 0.0013476    total: 162ms    remaining: 610ms
## 21:  learn: 0.0011413    total: 168ms    remaining: 595ms
## 22:  learn: 0.0010335    total: 174ms    remaining: 584ms
## 23:  learn: 0.0009143    total: 182ms    remaining: 576ms
## 24:  learn: 0.0007805    total: 188ms    remaining: 564ms
## 25:  learn: 0.0006741    total: 196ms    remaining: 557ms
## 26:  learn: 0.0006166    total: 201ms    remaining: 544ms
## 27:  learn: 0.0005257    total: 209ms    remaining: 536ms
## 28:  learn: 0.0004841    total: 214ms    remaining: 524ms
## 29:  learn: 0.0004323    total: 220ms    remaining: 513ms
## 30:  learn: 0.0003392    total: 227ms    remaining: 505ms
## 31:  learn: 0.0003180    total: 233ms    remaining: 494ms
## 32:  learn: 0.0002726    total: 240ms    remaining: 486ms
## 33:  learn: 0.0002602    total: 245ms    remaining: 476ms
## 34:  learn: 0.0002085    total: 251ms    remaining: 466ms
## 35:  learn: 0.0001800    total: 257ms    remaining: 458ms
## 36:  learn: 0.0001695    total: 263ms    remaining: 449ms
## 37:  learn: 0.0001695    total: 269ms    remaining: 439ms
## 38:  learn: 0.0001695    total: 275ms    remaining: 430ms
## 39:  learn: 0.0001695    total: 281ms    remaining: 421ms
## 40:  learn: 0.0001695    total: 286ms    remaining: 411ms
## 41:  learn: 0.0001695    total: 292ms    remaining: 403ms
## 42:  learn: 0.0001695    total: 297ms    remaining: 394ms
## 43:  learn: 0.0001695    total: 303ms    remaining: 385ms
## 44:  learn: 0.0001695    total: 309ms    remaining: 377ms
## 45:  learn: 0.0001605    total: 315ms    remaining: 370ms
## 46:  learn: 0.0001474    total: 320ms    remaining: 361ms
## 47:  learn: 0.0001474    total: 326ms    remaining: 353ms
## 48:  learn: 0.0001271    total: 333ms    remaining: 346ms
## 49:  learn: 0.0001150    total: 338ms    remaining: 338ms
## 50:  learn: 0.0001057    total: 344ms    remaining: 331ms
## 51:  learn: 0.0001057    total: 350ms    remaining: 323ms
## 52:  learn: 0.0000815    total: 355ms    remaining: 315ms
## 53:  learn: 0.0000815    total: 360ms    remaining: 307ms
## 54:  learn: 0.0000783    total: 366ms    remaining: 299ms
## 55:  learn: 0.0000783    total: 371ms    remaining: 292ms
## 56:  learn: 0.0000783    total: 377ms    remaining: 284ms
## 57:  learn: 0.0000751    total: 382ms    remaining: 277ms
## 58:  learn: 0.0000751    total: 387ms    remaining: 269ms
## 59:  learn: 0.0000674    total: 393ms    remaining: 262ms
## 60:  learn: 0.0000663    total: 399ms    remaining: 255ms
## 61:  learn: 0.0000663    total: 405ms    remaining: 248ms
## 62:  learn: 0.0000630    total: 427ms    remaining: 251ms
## 63:  learn: 0.0000630    total: 433ms    remaining: 243ms
## 64:  learn: 0.0000630    total: 440ms    remaining: 237ms
## 65:  learn: 0.0000597    total: 445ms    remaining: 229ms
## 66:  learn: 0.0000597    total: 451ms    remaining: 222ms
## 67:  learn: 0.0000597    total: 458ms    remaining: 216ms
## 68:  learn: 0.0000550    total: 464ms    remaining: 209ms
## 69:  learn: 0.0000550    total: 471ms    remaining: 202ms
## 70:  learn: 0.0000550    total: 477ms    remaining: 195ms
## 71:  learn: 0.0000550    total: 483ms    remaining: 188ms
## 72:  learn: 0.0000550    total: 504ms    remaining: 187ms
## 73:  learn: 0.0000550    total: 510ms    remaining: 179ms
## 74:  learn: 0.0000550    total: 516ms    remaining: 172ms
## 75:  learn: 0.0000550    total: 523ms    remaining: 165ms
## 76:  learn: 0.0000550    total: 528ms    remaining: 158ms
## 77:  learn: 0.0000550    total: 533ms    remaining: 150ms
## 78:  learn: 0.0000550    total: 540ms    remaining: 144ms
## 79:  learn: 0.0000550    total: 546ms    remaining: 137ms
## 80:  learn: 0.0000550    total: 554ms    remaining: 130ms
## 81:  learn: 0.0000550    total: 560ms    remaining: 123ms
## 82:  learn: 0.0000509    total: 566ms    remaining: 116ms
## 83:  learn: 0.0000509    total: 575ms    remaining: 109ms
## 84:  learn: 0.0000509    total: 582ms    remaining: 103ms
## 85:  learn: 0.0000480    total: 590ms    remaining: 96ms
## 86:  learn: 0.0000471    total: 596ms    remaining: 89ms
## 87:  learn: 0.0000471    total: 602ms    remaining: 82.1ms
## 88:  learn: 0.0000471    total: 607ms    remaining: 75ms
## 89:  learn: 0.0000471    total: 612ms    remaining: 68ms
## 90:  learn: 0.0000471    total: 620ms    remaining: 61.3ms
## 91:  learn: 0.0000471    total: 626ms    remaining: 54.4ms
## 92:  learn: 0.0000471    total: 633ms    remaining: 47.6ms
## 93:  learn: 0.0000471    total: 639ms    remaining: 40.8ms
## 94:  learn: 0.0000434    total: 644ms    remaining: 33.9ms
## 95:  learn: 0.0000434    total: 651ms    remaining: 27.1ms
## 96:  learn: 0.0000434    total: 656ms    remaining: 20.3ms
## 97:  learn: 0.0000434    total: 662ms    remaining: 13.5ms
## 98:  learn: 0.0000434    total: 669ms    remaining: 6.76ms
## 99:  learn: 0.0000434    total: 675ms    remaining: 0us
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   e   p
##          e 841   0
##          p   0 783
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9977, 1)
##     No Information Rate : 0.5179     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.5179     
##          Detection Rate : 0.5179     
##    Detection Prevalence : 0.5179     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : e          
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##    3.44    0.29   89.06
#Start time
t1 <- proc.time()

rf_function(dataset_cat)
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 46 on full training set
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   e   p
##          e 841   0
##          p   0 783
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9977, 1)
##     No Information Rate : 0.5179     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.5179     
##          Detection Rate : 0.5179     
##    Detection Prevalence : 0.5179     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : e          
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##   16.79    0.15  268.00

##Mix dataset

#Start time
t1 <- proc.time()

catboost_function(dataset_mix)
## Aggregating results
## Selecting tuning parameters
## Fitting depth = 4, learning_rate = 0.135, iterations = 100, l2_leaf_reg = 1e-06, rsm = 0.9, border_count = 255 on full training set
## Warning: Setting row names on a tibble is deprecated.
## 0:   learn: 0.6124366    total: 8.55ms   remaining: 847ms
## 1:   learn: 0.5497581    total: 17.2ms   remaining: 841ms
## 2:   learn: 0.5001998    total: 19.9ms   remaining: 645ms
## 3:   learn: 0.4612502    total: 24.5ms   remaining: 587ms
## 4:   learn: 0.4270973    total: 26.9ms   remaining: 511ms
## 5:   learn: 0.4010661    total: 29.8ms   remaining: 467ms
## 6:   learn: 0.3790895    total: 36ms remaining: 478ms
## 7:   learn: 0.3600855    total: 40.6ms   remaining: 467ms
## 8:   learn: 0.3441227    total: 43ms remaining: 435ms
## 9:   learn: 0.3300231    total: 46.3ms   remaining: 417ms
## 10:  learn: 0.3164177    total: 49.6ms   remaining: 401ms
## 11:  learn: 0.3051466    total: 52.2ms   remaining: 383ms
## 12:  learn: 0.2963441    total: 54.9ms   remaining: 368ms
## 13:  learn: 0.2879582    total: 57.2ms   remaining: 351ms
## 14:  learn: 0.2814924    total: 59.6ms   remaining: 338ms
## 15:  learn: 0.2770192    total: 62.8ms   remaining: 329ms
## 16:  learn: 0.2726809    total: 67.8ms   remaining: 331ms
## 17:  learn: 0.2677600    total: 70.2ms   remaining: 320ms
## 18:  learn: 0.2625060    total: 72.7ms   remaining: 310ms
## 19:  learn: 0.2589642    total: 75.2ms   remaining: 301ms
## 20:  learn: 0.2553530    total: 78.9ms   remaining: 297ms
## 21:  learn: 0.2516355    total: 81.2ms   remaining: 288ms
## 22:  learn: 0.2484904    total: 83.8ms   remaining: 281ms
## 23:  learn: 0.2458426    total: 86.9ms   remaining: 275ms
## 24:  learn: 0.2427339    total: 89.6ms   remaining: 269ms
## 25:  learn: 0.2399795    total: 91.8ms   remaining: 261ms
## 26:  learn: 0.2381049    total: 95.2ms   remaining: 257ms
## 27:  learn: 0.2356850    total: 98ms remaining: 252ms
## 28:  learn: 0.2337605    total: 101ms    remaining: 248ms
## 29:  learn: 0.2317468    total: 104ms    remaining: 242ms
## 30:  learn: 0.2303652    total: 107ms    remaining: 237ms
## 31:  learn: 0.2291056    total: 112ms    remaining: 237ms
## 32:  learn: 0.2282545    total: 117ms    remaining: 237ms
## 33:  learn: 0.2268279    total: 121ms    remaining: 234ms
## 34:  learn: 0.2260235    total: 124ms    remaining: 230ms
## 35:  learn: 0.2247115    total: 128ms    remaining: 227ms
## 36:  learn: 0.2234852    total: 131ms    remaining: 222ms
## 37:  learn: 0.2223861    total: 133ms    remaining: 217ms
## 38:  learn: 0.2213697    total: 137ms    remaining: 214ms
## 39:  learn: 0.2209944    total: 139ms    remaining: 208ms
## 40:  learn: 0.2197758    total: 142ms    remaining: 205ms
## 41:  learn: 0.2182849    total: 146ms    remaining: 201ms
## 42:  learn: 0.2174575    total: 148ms    remaining: 197ms
## 43:  learn: 0.2167092    total: 151ms    remaining: 193ms
## 44:  learn: 0.2158777    total: 154ms    remaining: 188ms
## 45:  learn: 0.2151080    total: 157ms    remaining: 184ms
## 46:  learn: 0.2143868    total: 161ms    remaining: 181ms
## 47:  learn: 0.2135538    total: 163ms    remaining: 177ms
## 48:  learn: 0.2129472    total: 166ms    remaining: 172ms
## 49:  learn: 0.2125243    total: 168ms    remaining: 168ms
## 50:  learn: 0.2115262    total: 170ms    remaining: 164ms
## 51:  learn: 0.2108375    total: 173ms    remaining: 159ms
## 52:  learn: 0.2094871    total: 177ms    remaining: 157ms
## 53:  learn: 0.2085015    total: 180ms    remaining: 153ms
## 54:  learn: 0.2075360    total: 183ms    remaining: 150ms
## 55:  learn: 0.2068616    total: 186ms    remaining: 146ms
## 56:  learn: 0.2057092    total: 189ms    remaining: 142ms
## 57:  learn: 0.2051327    total: 192ms    remaining: 139ms
## 58:  learn: 0.2049704    total: 196ms    remaining: 136ms
## 59:  learn: 0.2037741    total: 199ms    remaining: 133ms
## 60:  learn: 0.2031868    total: 201ms    remaining: 129ms
## 61:  learn: 0.2029190    total: 203ms    remaining: 125ms
## 62:  learn: 0.2024161    total: 208ms    remaining: 122ms
## 63:  learn: 0.2022875    total: 210ms    remaining: 118ms
## 64:  learn: 0.2016485    total: 212ms    remaining: 114ms
## 65:  learn: 0.2012732    total: 214ms    remaining: 110ms
## 66:  learn: 0.1995096    total: 217ms    remaining: 107ms
## 67:  learn: 0.1992574    total: 219ms    remaining: 103ms
## 68:  learn: 0.1979950    total: 223ms    remaining: 100ms
## 69:  learn: 0.1972770    total: 227ms    remaining: 97.3ms
## 70:  learn: 0.1963131    total: 232ms    remaining: 94.7ms
## 71:  learn: 0.1953201    total: 236ms    remaining: 91.6ms
## 72:  learn: 0.1943661    total: 241ms    remaining: 89.2ms
## 73:  learn: 0.1938614    total: 246ms    remaining: 86.5ms
## 74:  learn: 0.1932402    total: 251ms    remaining: 83.5ms
## 75:  learn: 0.1931342    total: 253ms    remaining: 80ms
## 76:  learn: 0.1924828    total: 257ms    remaining: 76.8ms
## 77:  learn: 0.1920000    total: 259ms    remaining: 73.1ms
## 78:  learn: 0.1912209    total: 262ms    remaining: 69.6ms
## 79:  learn: 0.1907802    total: 264ms    remaining: 66ms
## 80:  learn: 0.1904008    total: 266ms    remaining: 62.4ms
## 81:  learn: 0.1896263    total: 268ms    remaining: 58.9ms
## 82:  learn: 0.1889419    total: 273ms    remaining: 55.9ms
## 83:  learn: 0.1886411    total: 278ms    remaining: 53ms
## 84:  learn: 0.1879600    total: 281ms    remaining: 49.5ms
## 85:  learn: 0.1874500    total: 283ms    remaining: 46ms
## 86:  learn: 0.1866907    total: 286ms    remaining: 42.8ms
## 87:  learn: 0.1865893    total: 291ms    remaining: 39.7ms
## 88:  learn: 0.1859817    total: 295ms    remaining: 36.4ms
## 89:  learn: 0.1854022    total: 297ms    remaining: 33ms
## 90:  learn: 0.1850236    total: 299ms    remaining: 29.6ms
## 91:  learn: 0.1847806    total: 303ms    remaining: 26.3ms
## 92:  learn: 0.1844543    total: 305ms    remaining: 23ms
## 93:  learn: 0.1838846    total: 308ms    remaining: 19.7ms
## 94:  learn: 0.1835589    total: 311ms    remaining: 16.4ms
## 95:  learn: 0.1828398    total: 313ms    remaining: 13ms
## 96:  learn: 0.1824852    total: 315ms    remaining: 9.74ms
## 97:  learn: 0.1820398    total: 318ms    remaining: 6.5ms
## 98:  learn: 0.1817094    total: 322ms    remaining: 3.26ms
## 99:  learn: 0.1809423    total: 326ms    remaining: 0us
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  777  73
##        yes  23  31
##                                           
##                Accuracy : 0.8938          
##                  95% CI : (0.8719, 0.9131)
##     No Information Rate : 0.885           
##     P-Value [Acc > NIR] : 0.2187          
##                                           
##                   Kappa : 0.3405          
##                                           
##  Mcnemar's Test P-Value : 5.702e-07       
##                                           
##             Sensitivity : 0.9712          
##             Specificity : 0.2981          
##          Pos Pred Value : 0.9141          
##          Neg Pred Value : 0.5741          
##              Prevalence : 0.8850          
##          Detection Rate : 0.8595          
##    Detection Prevalence : 0.9403          
##       Balanced Accuracy : 0.6347          
##                                           
##        'Positive' Class : no              
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##    2.14    0.20   55.35
#Start time
t1 <- proc.time()

rf_function(dataset_mix)
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 42 on full training set
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  763  61
##        yes  37  43
##                                           
##                Accuracy : 0.8916          
##                  95% CI : (0.8695, 0.9111)
##     No Information Rate : 0.885           
##     P-Value [Acc > NIR] : 0.28629         
##                                           
##                   Kappa : 0.4082          
##                                           
##  Mcnemar's Test P-Value : 0.02016         
##                                           
##             Sensitivity : 0.9537          
##             Specificity : 0.4135          
##          Pos Pred Value : 0.9260          
##          Neg Pred Value : 0.5375          
##              Prevalence : 0.8850          
##          Detection Rate : 0.8440          
##    Detection Prevalence : 0.9115          
##       Balanced Accuracy : 0.6836          
##                                           
##        'Positive' Class : no              
## 
#Stop time
proc.time()-t1
##    user  system elapsed 
##   10.01    0.11  131.03

CLOSE THE CLUSTER

stopCluster(cl)

STOP TIME

proc.time()-t
##    user  system elapsed 
##   39.14    1.42  624.94