library(caret)
library(pROC)
library(tidyverse)
Pick any two classifiers of (SVM,Logistic,DecisionTree,NaiveBayes). Pick heart or ecoli dataset. Heart is simpler and ecoli compounds the problem as it is NOT a balanced dataset. From a grading perspective both carry the same weight.
read.csv('https://raw.githubusercontent.com/mkivenson/Machine-Learning-Big-Data/master/heart.csv',head=T,sep=',',stringsAsFactors=F) -> heart
heart$target <- as.factor(heart$target)
heart$sex <- as.factor(heart$sex)
heart$cp <- as.factor(heart$cp)
heart$fbs <- as.factor(heart$fbs)
heart$restecg <- as.factor(heart$restecg)
heart$exang <- as.factor(heart$exang)
heart$slope <- as.factor(heart$slope)
heart$ca <- as.factor(heart$ca)
heart$thal <- as.factor(heart$thal)
For each classifier, Set a seed (43)
Do a 80/20 split and determine the Accuracy, AUC and as many metrics as returned by the Caret package (confusionMatrix) Call this the base_metric. Note down as best as you can development (engineering) cost as well as computing cost(elapsed time).
# do a 80/20 split
set.seed(43)
smp_size <- floor(0.8 * nrow(heart))
train_ind <- sample(seq_len(nrow(heart)), size = smp_size)
train_heart <- heart[ train_ind,]
test_heart <- heart[-train_ind,]
The following function will train and score models as needed for base models, bootstapped models, cross-validated models, and random forest trees. It can be used for other datasets as well.
eval_model <- function(train_method, tr, model_name){
# begin timer
ptm <- proc.time()
# set seed
set.seed(43)
# FOR BASE MODEL
if (grepl("Base", model_name, fixed=TRUE)) {
# train model
dt_model = train(
form = target ~ .,
data = train_heart,
trControl = tr,
method = train_method
)
print(dt_model)
# evaluate model
model_cm <- confusionMatrix(predict(dt_model, subset(test_heart, select = -c(target))), test_heart$target)
print(paste(model_name, 'results'))
print(model_cm)
# end timer
elapsed_time <- (proc.time() - ptm)[[3]]
# determine the Accuracy, AUC and as many metrics as returned by the Caret package (confusionMatrix).
# store results
accuracy <- model_cm$overall[[1]]
auc_val <- as.numeric(auc(roc(test_heart$target, factor(predict(dt_model, test_heart), ordered = TRUE))))
sensitivity <- model_cm$byClass[[1]]
specificity <- model_cm$byClass[[2]]
precision <- model_cm$byClass[[5]]
recall <- model_cm$byClass[[6]]
f1 <- model_cm$byClass[[7]]
# FOR BOOTSTRAP MODEL
} else if (grepl("Boot", model_name, fixed=TRUE)){
dt_model = train(
form = target ~ .,
data = heart,
trControl = tr,
method = train_method
)
# end timer
elapsed_time <- (proc.time() - ptm)[[3]]
accuracy <- c()
auc_val <- c()
sensitivity <- c()
specificity <- c()
precision <- c()
recall <- c()
f1 <- c()
i <- 1
pred_df <- dt_model$pred
for (resample in unique(pred_df$Resample)){
temp <- filter(pred_df, Resample == resample)
model_cm <- confusionMatrix(temp$pred, temp$obs)
accuracy[i] <- model_cm$overall[[1]]
auc_val[[i]] <- auc(roc(as.numeric(temp$pred, ordered = TRUE), as.numeric(temp$obs, ordered = TRUE)))
sensitivity[[i]] <- model_cm$byClass[[1]]
specificity[[i]] <- model_cm$byClass[[2]]
precision[[i]] <- model_cm$byClass[[5]]
recall[[i]] <- model_cm$byClass[[6]]
f1[[i]] <- model_cm$byClass[[7]]
i <- i + 1
}
accuracy <- mean(accuracy)
auc_val <- mean(auc_val)
sensitivity <- mean(sensitivity)
specificity <- mean(specificity)
precision <- mean(precision)
recall <- mean(recall)
f1 <- mean(f1)
# FOR RANDOM FOREST
} else if (grepl("RF", model_name, fixed=TRUE)){
# train model
dt_model = train(
form = target ~ .,
data = train_heart,
trControl = tr,
ntree = as.numeric(str_sub(model_name, start= -2)),
method = train_method
)
print(dt_model)
# evaluate model
model_cm <- confusionMatrix(predict(dt_model, subset(test_heart, select = -c(target))), test_heart$target)
print(paste(model_name, 'results'))
print(model_cm)
# end timer
elapsed_time <- (proc.time() - ptm)[[3]]
# determine the Accuracy, AUC and as many metrics as returned by the Caret package (confusionMatrix).
# store results
accuracy <- model_cm$overall[[1]]
auc_val <- as.numeric(auc(roc(test_heart$target, factor(predict(dt_model, test_heart), ordered = TRUE))))
sensitivity <- model_cm$byClass[[1]]
specificity <- model_cm$byClass[[2]]
precision <- model_cm$byClass[[5]]
recall <- model_cm$byClass[[6]]
f1 <- model_cm$byClass[[7]]
# FOR CROSS VALIDATION
} else {
dt_model = train(
form = target ~ .,
data = heart,
trControl = tr,
method = train_method
)
print(dt_model)
model_cm <- confusionMatrix(dt_model$pred[order(dt_model$pred$rowIndex),]$pred, heart$target)
print(paste(model_name, 'results'))
print(model_cm)
# end timer
elapsed_time <- (proc.time() - ptm)[[3]]
# determine the Accuracy, AUC and as many metrics as returned by the Caret package (confusionMatrix).
# store results
accuracy <- model_cm$overall[[1]]
auc_val <- as.numeric(auc(roc(test_heart$target, factor(predict(dt_model, test_heart), ordered = TRUE))))
sensitivity <- model_cm$byClass[[1]]
specificity <- model_cm$byClass[[2]]
precision <- model_cm$byClass[[5]]
recall <- model_cm$byClass[[6]]
f1 <- model_cm$byClass[[7]]
}
full_results <- rbind(accuracy,
auc_val,
sensitivity,
specificity,
precision,
recall,
f1,
elapsed_time)
colnames(full_results) <- c(model_name)
return(full_results)
}
# DECISION TREE MODEL - BASE METRIC
dt_base <- eval_model("rpart", trainControl(method="none"), "DT Base")
## CART
##
## 242 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: None
## [1] "DT Base results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 0 0
## 1 27 34
##
## Accuracy : 0.5574
## 95% CI : (0.4245, 0.6845)
## No Information Rate : 0.5574
## P-Value [Acc > NIR] : 0.5531
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 5.624e-07
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.5574
## Prevalence : 0.4426
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 0
##
# SUPPORT VECTOR MACHINE MODEL - BASE METRICcaret
svm_base <- eval_model("svmLinear", trainControl(method="none"), "SVM Base")
## Support Vector Machines with Linear Kernel
##
## 242 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: None
## [1] "SVM Base results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 20 2
## 1 7 32
##
## Accuracy : 0.8525
## 95% CI : (0.7383, 0.9302)
## No Information Rate : 0.5574
## P-Value [Acc > NIR] : 8.993e-07
##
## Kappa : 0.6952
##
## Mcnemar's Test P-Value : 0.1824
##
## Sensitivity : 0.7407
## Specificity : 0.9412
## Pos Pred Value : 0.9091
## Neg Pred Value : 0.8205
## Prevalence : 0.4426
## Detection Rate : 0.3279
## Detection Prevalence : 0.3607
## Balanced Accuracy : 0.8410
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Start with the original dataset and set a seed (43). Then run a cross validation of 5 and 10 of the model on the training set. Determine the same set of metrics and compare the cv_metrics with the base_metric. Note down as best as you can development (engineering) cost as well as computing cost(elapsed time).
# DECISION TREE MODEL - 5 Cross Validation Folds
dt_5cv <- eval_model("rpart", tr = trainControl(method = "cv", number = 5, savePredictions = 'final'), "DT 5 cv")
## CART
##
## 303 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 243, 242, 242, 243, 242
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.03623188 0.7621311 0.5186339
## 0.03985507 0.7391803 0.4718201
## 0.48550725 0.6400000 0.2408988
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.03623188.
## [1] "DT 5 cv results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 98 32
## 1 40 133
##
## Accuracy : 0.7624
## 95% CI : (0.7104, 0.8092)
## No Information Rate : 0.5446
## P-Value [Acc > NIR] : 3.29e-15
##
## Kappa : 0.5187
##
## Mcnemar's Test P-Value : 0.4094
##
## Sensitivity : 0.7101
## Specificity : 0.8061
## Pos Pred Value : 0.7538
## Neg Pred Value : 0.7688
## Prevalence : 0.4554
## Detection Rate : 0.3234
## Detection Prevalence : 0.4290
## Balanced Accuracy : 0.7581
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# SUPPORT VECTOR MACHINE MODEL - 5 Cross Validation Folds
svm_5cv <- eval_model("svmLinear", tr = trainControl(method = "cv", number = 5, savePredictions = 'final'), "SVM 5 cv")
## Support Vector Machines with Linear Kernel
##
## 303 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 243, 242, 242, 243, 242
## Resampling results:
##
## Accuracy Kappa
## 0.835082 0.6664592
##
## Tuning parameter 'C' was held constant at a value of 1
## [1] "SVM 5 cv results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 110 22
## 1 28 143
##
## Accuracy : 0.835
## 95% CI : (0.7883, 0.875)
## No Information Rate : 0.5446
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6661
##
## Mcnemar's Test P-Value : 0.4795
##
## Sensitivity : 0.7971
## Specificity : 0.8667
## Pos Pred Value : 0.8333
## Neg Pred Value : 0.8363
## Prevalence : 0.4554
## Detection Rate : 0.3630
## Detection Prevalence : 0.4356
## Balanced Accuracy : 0.8319
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# DECISION TREE MODEL - 10 Cross Validation Folds
dt_10cv <- eval_model("rpart", trainControl(method = "cv", number = 10, savePredictions = 'final'), "DT 10 cv")
## CART
##
## 303 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 273, 272, 273, 273, 273, 273, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.03623188 0.7493548 0.4924633
## 0.03985507 0.7493548 0.4917245
## 0.48550725 0.6560215 0.2761508
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.03985507.
## [1] "DT 10 cv results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 96 34
## 1 42 131
##
## Accuracy : 0.7492
## 95% CI : (0.6964, 0.797)
## No Information Rate : 0.5446
## P-Value [Acc > NIR] : 1.515e-13
##
## Kappa : 0.4919
##
## Mcnemar's Test P-Value : 0.422
##
## Sensitivity : 0.6957
## Specificity : 0.7939
## Pos Pred Value : 0.7385
## Neg Pred Value : 0.7572
## Prevalence : 0.4554
## Detection Rate : 0.3168
## Detection Prevalence : 0.4290
## Balanced Accuracy : 0.7448
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# SUPPORT VECTOR MACHINE MODEL - 10 Cross Validation Folds
svm_10cv <- eval_model("svmLinear", trainControl(method = "cv", number = 10, savePredictions = 'final'), "SVM 10 cv")
## Support Vector Machines with Linear Kernel
##
## 303 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 273, 272, 273, 273, 273, 273, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8383871 0.671804
##
## Tuning parameter 'C' was held constant at a value of 1
## [1] "SVM 10 cv results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 107 18
## 1 31 147
##
## Accuracy : 0.8383
## 95% CI : (0.7919, 0.8779)
## No Information Rate : 0.5446
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.6714
##
## Mcnemar's Test P-Value : 0.08648
##
## Sensitivity : 0.7754
## Specificity : 0.8909
## Pos Pred Value : 0.8560
## Neg Pred Value : 0.8258
## Prevalence : 0.4554
## Detection Rate : 0.3531
## Detection Prevalence : 0.4125
## Balanced Accuracy : 0.8331
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
Start with the original dataset and set a seed (43) Then run a bootstrap of 200 resamples.
# DECISION TREE MODEL - BOOTSTRAP
dt_bt <- eval_model("rpart", trainControl(method="boot", number=200, savePredictions = 'final', returnResamp = 'final'), "DT Bootstrap")
print(dt_bt)
## DT Bootstrap
## accuracy 0.7420239
## auc_val 0.7458831
## sensitivity 0.7039119
## specificity 0.7768658
## precision 0.7329613
## recall 0.7039119
## f1 0.7115944
## elapsed_time 4.6400000
set.seed(43)
# SUPPORT VECTOR MACHINE MODEL - BOOTSTRAP
svm_bt <- eval_model("svmLinear", trainControl(method="boot", number=200, savePredictions = 'final', returnResamp = 'final'), "SVM Bootstrap")
print(svm_bt)
## SVM Bootstrap
## accuracy 0.8233320
## auc_val 0.8254587
## sensitivity 0.7807475
## specificity 0.8612693
## precision 0.8290195
## recall 0.7807475
## f1 0.8016091
## elapsed_time 13.0000000
Compute the same set of metrics and for each of the two classifiers and build a three column table for each experiment (base, bootstrap, cross-validated). Note down as best as you can development (engineering) cost as well as computing cost(elapsed time).
data.frame(cbind(dt_base, dt_5cv, dt_10cv, dt_bt, svm_base, svm_5cv, svm_10cv, svm_bt))
## DT.Base DT.5.cv DT.10.cv DT.Bootstrap SVM.Base SVM.5.cv
## accuracy 0.557377 0.7623762 0.7491749 0.7420239 0.8524590 0.8349835
## auc_val 0.500000 0.7854031 0.7412854 0.7458831 0.8409586 0.8300654
## sensitivity 0.000000 0.7101449 0.6956522 0.7039119 0.7407407 0.7971014
## specificity 1.000000 0.8060606 0.7939394 0.7768658 0.9411765 0.8666667
## precision NA 0.7538462 0.7384615 0.7329613 0.9090909 0.8333333
## recall 0.000000 0.7101449 0.6956522 0.7039119 0.7407407 0.7971014
## f1 NA 0.7313433 0.7164179 0.7115944 0.8163265 0.8148148
## elapsed_time 0.610000 0.7100000 0.8300000 4.6400000 1.0400000 1.1700000
## SVM.10.cv SVM.Bootstrap
## accuracy 0.8382838 0.8233320
## auc_val 0.8300654 0.8254587
## sensitivity 0.7753623 0.7807475
## specificity 0.8909091 0.8612693
## precision 0.8560000 0.8290195
## recall 0.7753623 0.7807475
## f1 0.8136882 0.8016091
## elapsed_time 0.7600000 13.0000000
For the same dataset, set seed (43) split 80/20.
# do a 80/20 split
set.seed(43)
smp_size <- floor(0.8 * nrow(heart))
train_ind <- sample(seq_len(nrow(heart)), size = smp_size)
train_heart <- heart[ train_ind,]
test_heart <- heart[-train_ind,]
Using randomForest grow three different forests varuing the number of trees atleast three times. Start with seeding and fresh split for each forest. Note down as best as you can development (engineering) cost as well as computing cost(elapsed time) for each run. And compare these results with the experiment in Part A. Submit a pdf and executable script in python or R.
rf_10 <- eval_model("rf", trainControl(), "RF 10")
## Random Forest
##
## 242 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7792646 0.5549714
## 12 0.7551120 0.5039891
## 22 0.7556300 0.5050744
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 10 results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 18 1
## 1 9 33
##
## Accuracy : 0.8361
## 95% CI : (0.7191, 0.9185)
## No Information Rate : 0.5574
## P-Value [Acc > NIR] : 3.844e-06
##
## Kappa : 0.6573
##
## Mcnemar's Test P-Value : 0.02686
##
## Sensitivity : 0.6667
## Specificity : 0.9706
## Pos Pred Value : 0.9474
## Neg Pred Value : 0.7857
## Prevalence : 0.4426
## Detection Rate : 0.2951
## Detection Prevalence : 0.3115
## Balanced Accuracy : 0.8186
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
rf_50 <- eval_model("rf", trainControl(), "RF 50")
## Random Forest
##
## 242 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7972460 0.5896982
## 12 0.7651698 0.5239571
## 22 0.7512177 0.4950400
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 50 results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 19 2
## 1 8 32
##
## Accuracy : 0.8361
## 95% CI : (0.7191, 0.9185)
## No Information Rate : 0.5574
## P-Value [Acc > NIR] : 3.844e-06
##
## Kappa : 0.66
##
## Mcnemar's Test P-Value : 0.1138
##
## Sensitivity : 0.7037
## Specificity : 0.9412
## Pos Pred Value : 0.9048
## Neg Pred Value : 0.8000
## Prevalence : 0.4426
## Detection Rate : 0.3115
## Detection Prevalence : 0.3443
## Balanced Accuracy : 0.8224
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
rf_99 <- eval_model("rf", trainControl(), "RF 99")
## Random Forest
##
## 242 samples
## 13 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.7968746 0.5887862
## 12 0.7688465 0.5307189
## 22 0.7515108 0.4957248
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 99 results"
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 20 2
## 1 7 32
##
## Accuracy : 0.8525
## 95% CI : (0.7383, 0.9302)
## No Information Rate : 0.5574
## P-Value [Acc > NIR] : 8.993e-07
##
## Kappa : 0.6952
##
## Mcnemar's Test P-Value : 0.1824
##
## Sensitivity : 0.7407
## Specificity : 0.9412
## Pos Pred Value : 0.9091
## Neg Pred Value : 0.8205
## Prevalence : 0.4426
## Detection Rate : 0.3279
## Detection Prevalence : 0.3607
## Balanced Accuracy : 0.8410
##
## 'Positive' Class : 0
##
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
All three random forest models performed well - better than the decision tree models from part A and comparable with the SVM models. However, computation time for the Random forest models is approximately double the time for SVM.
data.frame(cbind(rf_10, rf_50, rf_99))
## RF.10 RF.50 RF.99
## accuracy 0.8360656 0.8360656 0.8524590
## auc_val 0.8186275 0.8077342 0.8409586
## sensitivity 0.6666667 0.7037037 0.7407407
## specificity 0.9705882 0.9411765 0.9411765
## precision 0.9473684 0.9047619 0.9090909
## recall 0.6666667 0.7037037 0.7407407
## f1 0.7826087 0.7916667 0.8163265
## elapsed_time 1.2500000 1.7900000 2.9200000
The following dataframe shows a comprehensive summary of all models created in this exercise.
I would recommend cross validation over bootstrapping, because it was less computationally expensive and yielded comparable results. Bootstrapping had higher development cost, as it required the aggregation of 200 re-samples to calculate performance metrics.
All three SVM models created yielded high accuracy, AUC, and F1 metrics. As suggested by the Pareto principle, the base SVM model seems to do the majority of the work. However, the 5-fold cross validated model has comparable results and slightly higher (yet still low) elapsed time. Since cross validation stabilizes the results of a model, it is preferable to use the cross-validated model. The SVM 10-fold CV model does not add much accuracy or stability, so Occam’s razor suggests that the simpler model (the 5-fold CV) should be used.
The base decision tree model did not perform well, however applying cross validation helped with parameter selection. 5 fold cross validation yields better results than 10 fold cross validation, and in less elapsed time. As per the Occam’s razor principal, the simpler solution (5-fold CV) should be used, and there is no added benefit in using 10 fold CV.
The RF model with 99 trees had the same performance as the base SVM model, but required more computational time. Therefore, the simpler model (SVM) has the advantage according to Occam’s razor.
data.frame(cbind(dt_base, dt_5cv, dt_10cv, dt_bt, svm_base, svm_5cv, svm_10cv, svm_bt, rf_10, rf_50, rf_99))
## DT.Base DT.5.cv DT.10.cv DT.Bootstrap SVM.Base SVM.5.cv
## accuracy 0.557377 0.7623762 0.7491749 0.7420239 0.8524590 0.8349835
## auc_val 0.500000 0.7854031 0.7412854 0.7458831 0.8409586 0.8300654
## sensitivity 0.000000 0.7101449 0.6956522 0.7039119 0.7407407 0.7971014
## specificity 1.000000 0.8060606 0.7939394 0.7768658 0.9411765 0.8666667
## precision NA 0.7538462 0.7384615 0.7329613 0.9090909 0.8333333
## recall 0.000000 0.7101449 0.6956522 0.7039119 0.7407407 0.7971014
## f1 NA 0.7313433 0.7164179 0.7115944 0.8163265 0.8148148
## elapsed_time 0.610000 0.7100000 0.8300000 4.6400000 1.0400000 1.1700000
## SVM.10.cv SVM.Bootstrap RF.10 RF.50 RF.99
## accuracy 0.8382838 0.8233320 0.8360656 0.8360656 0.8524590
## auc_val 0.8300654 0.8254587 0.8186275 0.8077342 0.8409586
## sensitivity 0.7753623 0.7807475 0.6666667 0.7037037 0.7407407
## specificity 0.8909091 0.8612693 0.9705882 0.9411765 0.9411765
## precision 0.8560000 0.8290195 0.9473684 0.9047619 0.9090909
## recall 0.7753623 0.7807475 0.6666667 0.7037037 0.7407407
## f1 0.8136882 0.8016091 0.7826087 0.7916667 0.8163265
## elapsed_time 0.7600000 13.0000000 1.2500000 1.7900000 2.9200000