1 FRESA.CAD Filters

##Sonar, Mines vs. Rocks Data Set

1.1 Benchmarking Filters on a logistic model

1.2 Cross-Validation of all the different Filter-Methods in FRESA.CAD

The FRESA.CAD::randomCV will be run several filter methods before glm fit for logistic modeling

Let us do the same for another set of common ML methods


glm_Kendall_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_correlation,
                      filtermethod.control=list(method ="kendall",pvalue=0.05,limit=0.1))


glm_spearman_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_correlation,
                      filtermethod.control=list(method ="spearman",pvalue=0.05,limit=0.1))


glm_pearson_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_correlation,
                      filtermethod.control=list(method ="pearson",pvalue=0.05,limit=0.1))


glm_tstudent_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_tstudent,
                      filtermethod.control=list(pvalue=0.05,limit=0.1))


glm_zIDI_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_Logit,
                      filtermethod.control=list(uniTest="zIDI",pvalue=0.05,limit=0.1))


glm_zNRI_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_Logit,
                      filtermethod.control=list(uniTest="zNRI",pvalue=0.05,limit=0.1))


glm_FLogit_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_residual,
                      filtermethod.control=list(uniTest="Ftest",type="LOGIT",pvalue=0.05,limit=0.1))


glm_BinLogit_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_residual,
                      filtermethod.control=list(uniTest="Binomial",type="LOGIT",pvalue=0.05,limit=0.1))



glm_tStudentLogit_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_residual,
                      filtermethod.control=list(uniTest="tStudent",type="LOGIT",pvalue=0.05,limit=0.1))


glm_WilcoxLogit_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=univariate_residual,
                      filtermethod.control=list(uniTest="Wilcox",type="LOGIT",pvalue=0.05,limit=0.1))



glm_mRMR_CV <- randomCV(fittingFunction = filteredFit,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      fitmethod=glm,
                      family="binomial",
                      filtermethod=mRMR.classic_FRESA,
                      filtermethod.control=list())

LASSO1SE_CV <- randomCV(fittingFunction = LASSO_1SE,
                      trainSampleSets = glm_WilcoxCV$trainSamplesSets,
                      family="binomial",
                      )

BESScv <- randomCV(fittingFunction=BESS,
                   trainSampleSets=glm_WilcoxCV$trainSamplesSets)

BESSGoldencv <- randomCV(fittingFunction=BESS,
                         trainSampleSets=glm_WilcoxCV$trainSamplesSets,
                         method="gsection")

BSWIMScv <- randomCV(fittingFunction=BSWiMS.model,
                   trainSampleSets=glm_WilcoxCV$trainSamplesSets)

eBSWIMScv <- randomCV(fittingFunction=BSWiMS.model,
                   trainSampleSets=glm_WilcoxCV$trainSamplesSets,
                   NumberofRepeats = -1)

HCLUSBSWIMScv <- randomCV(fittingFunction=HLCM_EM,
                   trainSampleSets=glm_WilcoxCV$trainSamplesSets,hysteresis = 0.10
                   )
bs <- predictionStats_binary(HCLUSBSWIMScv$medianTest,"HLCM_EM")

1.4 Reporting the results of the Benchmark procedure

Once done, we can compare the CV test results using the plot() function. The plot function also generates summary tables of the CV results.

Classifier Performance (continued below)
  HLCM_EM RF HLCM SVM WilcoxLogit ENS
BER 0.122 0.138 0.141 0.213 0.219 0.225
ACC 0.88 0.865 0.861 0.788 0.779 0.774
AUC 0.946 0.944 0.94 0.873 0.862 0.874
SEN 0.919 0.91 0.901 0.82 0.775 0.784
SPE 0.835 0.814 0.814 0.753 0.784 0.763
CIDX 0.906 0.946 0.882 0.869 0.862 0.874
Table continues below
  tStudentLogit FLogit zIDI Spearman Wilcox Kendall
BER 0.228 0.229 0.23 0.234 0.235 0.235
ACC 0.769 0.769 0.769 0.764 0.764 0.764
AUC 0.857 0.858 0.858 0.858 0.858 0.858
SEN 0.757 0.757 0.757 0.757 0.757 0.757
SPE 0.784 0.784 0.784 0.773 0.773 0.773
CIDX 0.859 0.863 0.862 0.859 0.859 0.858
Table continues below
  zNRI RPART BinLogit eBSWiMS LASSO tstudent Pearson
BER 0.236 0.236 0.24 0.24 0.244 0.245 0.245
ACC 0.764 0.764 0.76 0.76 0.755 0.755 0.755
AUC 0.843 0.802 0.846 0.824 0.833 0.855 0.855
SEN 0.766 0.793 0.766 0.775 0.748 0.757 0.757
SPE 0.763 0.732 0.753 0.742 0.763 0.753 0.753
CIDX 0.847 0.749 0.847 0.817 0.838 0.858 0.856
  BESS_Golden KNN LASSO_1SE BESS_Seq mRMR BSWiMS
BER 0.248 0.248 0.249 0.25 0.255 0.273
ACC 0.755 0.76 0.75 0.75 0.745 0.726
AUC 0.787 0.885 0.831 0.796 0.831 0.825
SEN 0.811 0.874 0.739 0.757 0.766 0.73
SPE 0.691 0.629 0.763 0.742 0.722 0.722
CIDX 0.792 0.879 0.838 0.794 0.837 0.826

Finally, I will compare the selected features of the different methods by plotting the selection frequency in a heat-map plot