1 FRESA.CAD Classifiers

##Sonar, Mines vs. Rocks Data Set

1.1 Benchmarking

1.2 Cross-Validation of all the different ML-Methods in FRESA.CAD

The FRESA.CAD::randomCV will be run several machine learning methods. But first let run the
cross-validation on the BSWiMS method.

We will use the same train and testing partition on the other ML methods.



BSWiMScv <- randomCV(theData,
                     theOutcome,
                     fittingFunction = BSWiMS.model,
                     trainFraction = fraction,
                     repetitions = reps)
#> .[++++-].[+++-].[+++++++-].[++++++-].[++++-].[+++++++-].[++++-].[++++-].[+++++-].[+++++-].[+++++++-].[+++-].[++++++-].[++++-].[++++++-].[+++-].[++++-].[+++++-].[++++-].[+++++++-].[+++++++-].[++++-].[++++-].[++++-].[++-].[++++-].[++++++-].[+++++-].[++++++-].[+++++++-].[++++-].[++++++-].[++++-].[++++-].[+++++-].[+++++-].[++++-].[+++++-].[++++-].[+++++-].[++++++-].[+++++++-].[++++++-].[+++-].[++++++-].[++++-].[+++-].[+++-].[++++++-].[+++++-].[++++-].[++++++-].[++++++++-].[+++-].[++++++-].[++++-].[++++-].[++++-].[+++++++-].[++++++-].[++++-].[+++-].[+++-].[++++++-].[+++-].[+++++-].[+++++++-].[++++-].[+++++-].[+++++-].[+++++-].[++++-].[++++-].[+++++++-].[++++-].[+++-].[++++++-].[++++-].[++++-].[++++++-].[++++++-].[+++++++-].[++++-].[++++-].[++++-].[++++-].[+++-].[++++++-].[++++-].[++++-].[++++++-].[+++++-].[+++-].[++++-].[++++++-].[+++-].[+++++++-].[++++++-].[++++-].[+++++-].[++++-].[++++++-].[++++++++-].[+++++++-].[+++-].[++++-].[+++++++-].[+++++-].[+++-].[++++++-].[++++++-].[+++-].[++++-].[+++++++-].[++++++-].[++++-].[++++++-].[++++++-].[++++-].[+++++-].[+++++++-].[++++-].[+++++++-].[++++++-].[+++++-].[++++++-].[+++++-].[+++++-].[+++++-].[+++++-].[++++-].[+++++-].[++++-].[+++-].[+++++-].[+++-].[+-].[+++++-].[+++++-].[++++-].[+++-].[++++-].[+++++-].[++++++-].[+++++-].[++++-].[++++++-].[+++++-].[+++-].[+++-].[++++-].[+-].[+++++-].[+++++-].[++++++++-].[+++++-].[++++++-].[++++++-].[+++++-].[+++++-].[+++++++-].[++++-].[++++++-].[+++-].[+++-].[+++-].[++++-].[+++-].[+++++++-].[+-].[+++++-].[++++-].[++++-].[+++++-].[+++++-].[++++-].[+++-].[+++++-].[+++-].[++++++-].[+++++-].[++++-].[++++++-].[++++++-].[++++-].[++++-].[++++++-].[++-].[++++-].[+++++++-].[+++++-].[++++-].[+++++-].[++++-].[++++++-].[+++++-].[+++++++-].[++-].[+++++++-].[++++-]

bs <- predictionStats_binary(BSWiMScv$medianTest,"BSWiMS")
#> BSWiMS

Let us do the same for another set of common ML methods:

eBSWiMScv <- randomCV(fittingFunction = BSWiMS.model,
                      trainSampleSets = BSWiMScv$trainSamplesSets,
                      NumberofRepeats = -1)



NAIVEBAYEScv <- randomCV(fittingFunction=NAIVE_BAYES,
                  trainSampleSets=BSWiMScv$trainSamplesSets,
                  featureSelectionFunction = BSWiMScv$selectedFeaturesSet,pca=TRUE,usekernel = TRUE)


RawNAIVEBAYEScv <- randomCV(fittingFunction=NAIVE_BAYES,
                  trainSampleSets=BSWiMScv$trainSamplesSets,
                  featureSelectionFunction = BSWiMScv$selectedFeaturesSet,pca=FALSE)


BESScv <- randomCV(fittingFunction=BESS,
                   trainSampleSets=BSWiMScv$trainSamplesSets)

BESSGoldencv <- randomCV(fittingFunction=BESS,
                         trainSampleSets=BSWiMScv$trainSamplesSets,
                         method="gsection")


RSSsignaturecv <- randomCV(fittingFunction = CVsignature,
                         trainSampleSets = BSWiMScv$trainSamplesSets,
                         featureSelectionFunction = univariate_Wilcoxon,
                         featureSelection.control = list(thr = 0.9),
                         method = "RSS")

SpearmanSignaturecv <- randomCV(fittingFunction = CVsignature,
                         trainSampleSets = BSWiMScv$trainSamplesSets,
                         featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                         method = "spearman")


ManhatanSignaturecv <- randomCV(fittingFunction = CVsignature,
                         trainSampleSets = BSWiMScv$trainSamplesSets,
                         featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                         method = "MAN")


pearsonSignaturecv <- randomCV(fittingFunction = CVsignature,
                         trainSampleSets = BSWiMScv$trainSamplesSets,
                         featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                         method = "pearson")


kendallSignaturecv <- randomCV(fittingFunction = CVsignature,
                         trainSampleSets = BSWiMScv$trainSamplesSets,
                         featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                         method = "kendall")



LASSO1SEcv <- randomCV(fittingFunction=LASSO_1SE,
                   trainSampleSets=BSWiMScv$trainSamplesSets,
                   family = "binomial")


Ridgecv <- randomCV(fittingFunction=GLMNET_RIDGE_MIN,
                   trainSampleSets=BSWiMScv$trainSamplesSets,
                   family = "binomial")

ELASTICNETcv <- randomCV(fittingFunction=GLMNET_ELASTICNET_MIN,
                   trainSampleSets=BSWiMScv$trainSamplesSets,
                   family = "binomial")

SVMcv <- randomCV(fittingFunction=e1071::svm,
                   trainSampleSets=BSWiMScv$trainSamplesSets,
                  featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                  asFactor=TRUE,
                  probability = TRUE)


TunnedSVMcv <- randomCV(fittingFunction=TUNED_SVM,
                   trainSampleSets=BSWiMScv$trainSamplesSets,
                  featureSelectionFunction = RSSsignaturecv$selectedFeaturesSet,
                  asFactor=TRUE,
                  probability = TRUE,gamma = 10^(-5:-1), cost = 10^(-3:1)
                      )


HCLAS_BSWiMScv <- randomCV(theData,
                     theOutcome,fittingFunction=HLCM_EM,
                   trainSampleSets=BSWiMScv$trainSamplesSets,hysteresis = 0.1)
#bs <- predictionStats_binary(HCLAS_BSWiMScv$medianTest,"Latent Class BSWiMS KNN")


# HCLAS_GLMScv <- randomCV(theData,
#                        theOutcome,
#                        fittingFunction=HLCM_EM,
#                      trainSampleSets=BSWiMScv$trainSamplesSets,hysteresis = 0.1,
#                      method=filteredFit,
#                      fitmethod=glm,family="binomial",
#                       filtermethod.control=list(pvalue=0.1,limit=0.10),
#                      classModel.Control=list(scaleMethod = "OrderLogit"))
# 
# bs <- predictionStats_binary(HCLAS_GLMScv$medianTest,"HCLAS Logit KNN")

# 
# HCLAS_GLMScv <- randomCV(theData,
#                       theOutcome,
#                       fittingFunction=HLCM_EM,
#                     trainSampleSets=BSWiMScv$trainSamplesSets,hysteresis = 0.1,
#                     method=filteredFit,
#                     fitmethod=glm,family="binomial",
#                       filtermethod.control=list(pvalue=0.1,limit=0.1),
#                     )
#  bs <- predictionStats_binary(HCLAS_GLMScv$medianTest,"HCLAS Logit KNN")

 # 
# HCLAS_GLMScv <- randomCV(theData,
#                      theOutcome,
#                      fittingFunction=HCLAS_EM_CLUSTER,
#                    trainSampleSets=BSWiMScv$trainSamplesSets,hysteresis = 0.1,
#                    method=filteredFit,
#                    classmethod=glm,family="binomial",
#                      filtermethod.control=list(pvalue=0.1,limit=0.10),
#                    classMethod=e1071::svm,
#                    classModel.Control=list(probability = TRUE) )
# bs <- predictionStats_binary(HCLAS_GLMScv$medianTest,"HCLAS Logit SVM")

#HCLAS_SVM_BSWiMScv <- randomCV(theData,
#                     theOutcome, fittingFunction=HCLAS_CLUSTER,
#                   trainSampleSets=BSWiMScv$trainSamplesSets,hysteresis = 0.10,
#                   classMethod=e1071::svm,
#                   classModel.Control=list(probability = TRUE) )

#bs <- predictionStats_binary(HCLAS_SVM_BSWiMScv$medianTest,"HCLAS BSWiMS SVM")



HCLASLASSOcv <- randomCV(theData,
                     theOutcome,fittingFunction = HLCM_EM,
                     hysteresis = 0.1,
                     trainSampleSets=BSWiMScv$trainSamplesSets,
                     method=LASSO_1SE,
                     family = "binomial")

#bs <- predictionStats_binary(HCLASLASSOcv$medianTest,"HCLAS LASSO")


GMVEBSWiMSCV <- randomCV(fittingFunction=GMVEBSWiMS,
                         trainSampleSets=BSWiMScv$trainSamplesSets)

ClustClassCV <- randomCV(fittingFunction=ClustClass,
                         trainSampleSets=BSWiMScv$trainSamplesSets,
                         clustermethod=Mclust,
                         clustermethod.control=list(G = 2))

1.4 Reporting the results of the Benchmark procedure

Once done, we can compare the CV test results using the plot() function.
The plot function also generates summary tables of the CV results.

Classifier Performance (continued below)
  HCLAS_BSWiMS RF HCLAS_LASSO TunnedSVM SVM_Wilcox KNN
BER 0.146 0.163 0.166 0.167 0.178 0.192
ACC 0.856 0.841 0.841 0.837 0.822 0.812
AUC 0.942 0.942 0.912 0.929 0.921 0.883
SEN 0.892 0.901 0.946 0.892 0.838 0.883
SPE 0.814 0.773 0.722 0.773 0.804 0.732
CIDX 0.908 0.942 0.869 0.928 0.918 0.863
Table continues below
  NAIVEBAYES_PCA ENS eBSWiMS SVM Ridge ELASTICNET
BER 0.211 0.215 0.216 0.221 0.226 0.226
ACC 0.788 0.784 0.784 0.779 0.774 0.774
AUC 0.857 0.885 0.825 0.859 0.861 0.862
SEN 0.784 0.784 0.784 0.793 0.766 0.766
SPE 0.794 0.784 0.784 0.763 0.784 0.784
CIDX 0.795 0.885 0.815 0.854 0.867 0.865
Table continues below
  ClustClass GMVEBSWiMS RPART NAIVEBAYES_RAW BeSS_Golden
BER 0.233 0.235 0.24 0.243 0.243
ACC 0.769 0.764 0.764 0.76 0.76
AUC 0.83 0.854 0.813 0.813 0.806
SEN 0.802 0.757 0.829 0.802 0.802
SPE 0.732 0.773 0.691 0.711 0.711
CIDX 0.803 0.797 0.764 0.808 0.79
Table continues below
  LASSO1SE LASSO BSWiMS BeSS_Seq ManhatanSignature
BER 0.254 0.26 0.26 0.264 0.267
ACC 0.745 0.74 0.74 0.736 0.74
AUC 0.828 0.821 0.82 0.783 0.823
SEN 0.73 0.739 0.739 0.73 0.847
SPE 0.763 0.742 0.742 0.742 0.619
CIDX 0.826 0.827 0.817 0.771 0.831
  RSSsignature SpearmanSignature kendallSignature PearsonSignature
BER 0.28 0.314 0.319 0.343
ACC 0.721 0.678 0.673 0.659
AUC 0.805 0.725 0.733 0.719
SEN 0.757 0.559 0.568 0.685
SPE 0.68 0.814 0.794 0.629
CIDX 0.813 0.719 0.726 0.714
McNemar’s Test (continued below)
  BSWiMS eBSWiMS RSSsignature PearsonSignature
BSWiMS 1 0.808 0.17 0.553
eBSWiMS 0.808 1 0.262 0.633
RSSsignature 0.17 0.262 1 0.753
PearsonSignature 0.553 0.633 0.753 1
SpearmanSignature 0 0 0 0
ManhatanSignature 0 0 0 0.037
kendallSignature 0.001 0 0 0
BeSS_Seq 0.866 0.746 0.16 0.473
BeSS_Golden 0.114 0.16 0.777 0.547
LASSO1SE 0.405 0.317 0.048 0.346
Ridge 0.841 0.715 0.128 0.467
ELASTICNET 0.841 0.715 0.128 0.467
TunnedSVM 0.02 0.033 0.317 0.292
SVM_Wilcox 0.369 0.493 0.612 1
HCLAS_BSWiMS 0.114 0.139 0.763 0.553
HCLAS_LASSO 0 0 0.015 0.017
GMVEBSWiMS 0.808 0.655 0.139 0.467
ClustClass 0.117 0.144 1 0.722
NAIVEBAYES_PCA 1 0.853 0.206 0.553
NAIVEBAYES_RAW 0.05 0.05 0.724 0.579
RF 0.023 0.024 0.317 0.311
LASSO 1 0.827 0.157 0.547
RPART 0.059 0.061 0.345 0.258
KNN 0.001 0.002 0.16 0.128
SVM.mRMR 0.394 0.491 0.493 0.904
ENS 0.819 1 0.209 0.622
Table continues below
  SpearmanSignature ManhatanSignature
BSWiMS 0 0
eBSWiMS 0 0
RSSsignature 0 0
PearsonSignature 0 0.037
SpearmanSignature 1 0
ManhatanSignature 0 1
kendallSignature 0.18 0
BeSS_Seq 0.001 0
BeSS_Golden 0 0.031
LASSO1SE 0.001 0
Ridge 0.001 0
ELASTICNET 0.001 0
TunnedSVM 0 0.086
SVM_Wilcox 0 0.001
HCLAS_BSWiMS 0 0.02
HCLAS_LASSO 0 0.866
GMVEBSWiMS 0.001 0
ClustClass 0 0.003
NAIVEBAYES_PCA 0 0
NAIVEBAYES_RAW 0 0.013
RF 0 0.077
LASSO 0 0
RPART 0 0.189
KNN 0 0.162
SVM.mRMR 0 0
ENS 0 0
Table continues below
  kendallSignature BeSS_Seq BeSS_Golden LASSO1SE
BSWiMS 0.001 0.866 0.114 0.405
eBSWiMS 0 0.746 0.16 0.317
RSSsignature 0 0.16 0.777 0.048
PearsonSignature 0 0.473 0.547 0.346
SpearmanSignature 0.18 0.001 0 0.001
ManhatanSignature 0 0 0.031 0
kendallSignature 1 0.003 0 0.005
BeSS_Seq 0.003 1 0.012 0.715
BeSS_Golden 0 0.012 1 0.033
LASSO1SE 0.005 0.715 0.033 1
Ridge 0.004 1 0.034 0.67
ELASTICNET 0.004 1 0.034 0.67
TunnedSVM 0 0.014 0.516 0.006
SVM_Wilcox 0 0.257 0.398 0.157
HCLAS_BSWiMS 0 0.093 1 0.037
HCLAS_LASSO 0 0 0.019 0
GMVEBSWiMS 0.002 1 0.071 0.617
ClustClass 0 0.128 0.746 0.028
NAIVEBAYES_PCA 0.002 0.841 0.077 0.532
NAIVEBAYES_RAW 0 0.093 1 0.007
RF 0 0.019 0.537 0.004
LASSO 0.002 0.819 0.077 0.439
RPART 0 0.036 0.492 0.018
KNN 0 0.003 0.25 0
SVM.mRMR 0 0.353 0.317 0.071
ENS 0.001 0.637 0.083 0.285
Table continues below
  Ridge ELASTICNET TunnedSVM SVM_Wilcox
BSWiMS 0.841 0.841 0.02 0.369
eBSWiMS 0.715 0.715 0.033 0.493
RSSsignature 0.128 0.128 0.317 0.612
PearsonSignature 0.467 0.467 0.292 1
SpearmanSignature 0.001 0.001 0 0
ManhatanSignature 0 0 0.086 0.001
kendallSignature 0.004 0.004 0 0
BeSS_Seq 1 1 0.014 0.257
BeSS_Golden 0.034 0.034 0.516 0.398
LASSO1SE 0.67 0.67 0.006 0.157
Ridge 1 NA 0.009 0.201
ELASTICNET NA 1 0.009 0.201
TunnedSVM 0.009 0.009 1 0.013
SVM_Wilcox 0.201 0.201 0.013 1
HCLAS_BSWiMS 0.063 0.063 0.394 0.353
HCLAS_LASSO 0 0 0.034 0.001
GMVEBSWiMS 1 1 0.007 0.221
ClustClass 0.072 0.072 0.317 0.612
NAIVEBAYES_PCA 0.841 0.841 0.011 0.317
NAIVEBAYES_RAW 0.071 0.071 0.505 0.411
RF 0.009 0.009 1 0.083
LASSO 0.796 0.796 0.013 0.317
RPART 0.029 0.029 0.873 0.149
KNN 0.001 0.001 0.577 0.028
SVM.mRMR 0.297 0.297 0.068 0.835
ENS 0.593 0.593 0.012 0.371
Table continues below
  HCLAS_BSWiMS HCLAS_LASSO GMVEBSWiMS ClustClass
BSWiMS 0.114 0 0.808 0.117
eBSWiMS 0.139 0 0.655 0.144
RSSsignature 0.763 0.015 0.139 1
PearsonSignature 0.553 0.017 0.467 0.722
SpearmanSignature 0 0 0.001 0
ManhatanSignature 0.02 0.866 0 0.003
kendallSignature 0 0 0.002 0
BeSS_Seq 0.093 0 1 0.128
BeSS_Golden 1 0.019 0.071 0.746
LASSO1SE 0.037 0 0.617 0.028
Ridge 0.063 0 1 0.072
ELASTICNET 0.063 0 1 0.072
TunnedSVM 0.394 0.034 0.007 0.317
SVM_Wilcox 0.353 0.001 0.221 0.612
HCLAS_BSWiMS 1 0.003 0.078 0.724
HCLAS_LASSO 0.003 1 0 0.004
GMVEBSWiMS 0.078 0 1 0.083
ClustClass 0.724 0.004 0.083 1
NAIVEBAYES_PCA 0.086 0 0.835 0.131
NAIVEBAYES_RAW 1 0.029 0.048 0.724
RF 0.433 0.041 0.009 0.273
LASSO 0.086 0 0.819 0.088
RPART 0.435 0.123 0.029 0.336
KNN 0.178 0.117 0.001 0.039
SVM.mRMR 0.273 0.001 0.251 0.394
ENS 0.095 0 0.637 0.144
Table continues below
  NAIVEBAYES_PCA NAIVEBAYES_RAW RF LASSO
BSWiMS 1 0.05 0.023 1
eBSWiMS 0.853 0.05 0.024 0.827
RSSsignature 0.206 0.724 0.317 0.157
PearsonSignature 0.553 0.579 0.311 0.547
SpearmanSignature 0 0 0 0
ManhatanSignature 0 0.013 0.077 0
kendallSignature 0.002 0 0 0.002
BeSS_Seq 0.841 0.093 0.019 0.819
BeSS_Golden 0.077 1 0.537 0.077
LASSO1SE 0.532 0.007 0.004 0.439
Ridge 0.841 0.071 0.009 0.796
ELASTICNET 0.841 0.071 0.009 0.796
TunnedSVM 0.011 0.505 1 0.013
SVM_Wilcox 0.317 0.411 0.083 0.317
HCLAS_BSWiMS 0.086 1 0.433 0.086
HCLAS_LASSO 0 0.029 0.041 0
GMVEBSWiMS 0.835 0.048 0.009 0.819
ClustClass 0.131 0.724 0.273 0.088
NAIVEBAYES_PCA 1 0.086 0.013 1
NAIVEBAYES_RAW 0.086 1 0.433 0.077
RF 0.013 0.433 1 0.016
LASSO 1 0.077 0.016 1
RPART 0.039 0.466 0.862 0.047
KNN 0.001 0.194 0.59 0.001
SVM.mRMR 0.371 0.257 0.068 0.248
ENS 0.808 0.083 0.016 0.763
  RPART KNN SVM.mRMR ENS
BSWiMS 0.059 0.001 0.394 0.819
eBSWiMS 0.061 0.002 0.491 1
RSSsignature 0.345 0.16 0.493 0.209
PearsonSignature 0.258 0.128 0.904 0.622
SpearmanSignature 0 0 0 0
ManhatanSignature 0.189 0.162 0 0
kendallSignature 0 0 0 0.001
BeSS_Seq 0.036 0.003 0.353 0.637
BeSS_Golden 0.492 0.25 0.317 0.083
LASSO1SE 0.018 0 0.071 0.285
Ridge 0.029 0.001 0.297 0.593
ELASTICNET 0.029 0.001 0.297 0.593
TunnedSVM 0.873 0.577 0.068 0.012
SVM_Wilcox 0.149 0.028 0.835 0.371
HCLAS_BSWiMS 0.435 0.178 0.273 0.095
HCLAS_LASSO 0.123 0.117 0.001 0
GMVEBSWiMS 0.029 0.001 0.251 0.637
ClustClass 0.336 0.039 0.394 0.144
NAIVEBAYES_PCA 0.039 0.001 0.371 0.808
NAIVEBAYES_RAW 0.466 0.194 0.257 0.083
RF 0.862 0.59 0.068 0.016
LASSO 0.047 0.001 0.248 0.763
RPART 1 0.773 0.116 0.057
KNN 0.773 1 0.005 0.001
SVM.mRMR 0.116 0.005 1 0.439
ENS 0.057 0.001 0.439 1

Finally, I will compare the selected features of the different methods by plotting the selection frequency in a heat-map plot