##Sonar, Mines vs. Rocks Data Set
op <- par(no.readonly = TRUE)
data(Sonar)
Sonar$Class <- 1*(Sonar$Class == "M")
ExperimentName <- "Sonar_ALL_Filter_Methods_95"
bswimsReps <- 20;
theData <- Sonar;
theOutcome <- "Class";
reps <- 200;
fraction <- 0.95;
BSIWIMSFileName <- paste(ExperimentName,"ALLFRESAFilter.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"ALLFilterCVMethod.RDATA",sep = "_")The FRESA.CAD::randomCV will be run several filter methods before glm fit for logistic modeling
glm_WilcoxCV <- randomCV(theData,
theOutcome,
fittingFunction = filteredFit,
trainFraction = fraction,
repetitions = reps,
fitmethod=glm,
family="binomial"
)
#> ........................................................................................................................................................................................................
bs <- predictionStats_binary(glm_WilcoxCV$medianTest,"SVM_Wilcox")
#> SVM_WilcoxLet us do the same for another set of common ML methods
glm_Kendall_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_correlation,
filtermethod.control=list(method ="kendall",pvalue=0.05,limit=0.1))
glm_spearman_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_correlation,
filtermethod.control=list(method ="spearman",pvalue=0.05,limit=0.1))
glm_pearson_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_correlation,
filtermethod.control=list(method ="pearson",pvalue=0.05,limit=0.1))
glm_tstudent_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_tstudent,
filtermethod.control=list(pvalue=0.05,limit=0.1))
glm_zIDI_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_Logit,
filtermethod.control=list(uniTest="zIDI",pvalue=0.05,limit=0.1))
glm_zNRI_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_Logit,
filtermethod.control=list(uniTest="zNRI",pvalue=0.05,limit=0.1))
glm_FLogit_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_residual,
filtermethod.control=list(uniTest="Ftest",type="LOGIT",pvalue=0.05,limit=0.1))
glm_BinLogit_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_residual,
filtermethod.control=list(uniTest="Binomial",type="LOGIT",pvalue=0.05,limit=0.1))
glm_tStudentLogit_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_residual,
filtermethod.control=list(uniTest="tStudent",type="LOGIT",pvalue=0.05,limit=0.1))
glm_WilcoxLogit_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=univariate_residual,
filtermethod.control=list(uniTest="Wilcox",type="LOGIT",pvalue=0.05,limit=0.1))
glm_mRMR_CV <- randomCV(fittingFunction = filteredFit,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
fitmethod=glm,
family="binomial",
filtermethod=mRMR.classic_FRESA,
filtermethod.control=list())
LASSO1SE_CV <- randomCV(fittingFunction = LASSO_1SE,
trainSampleSets = glm_WilcoxCV$trainSamplesSets,
family="binomial",
)
BESScv <- randomCV(fittingFunction=BESS,
trainSampleSets=glm_WilcoxCV$trainSamplesSets)
BESSGoldencv <- randomCV(fittingFunction=BESS,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,
method="gsection")
BSWIMScv <- randomCV(fittingFunction=BSWiMS.model,
trainSampleSets=glm_WilcoxCV$trainSamplesSets)
eBSWIMScv <- randomCV(fittingFunction=BSWiMS.model,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,
NumberofRepeats = -1)
HCLUSBSWIMScv <- randomCV(fittingFunction=HLCM_EM,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,hysteresis = 0.10
)
bs <- predictionStats_binary(HCLUSBSWIMScv$medianTest,"HLCM_EM")
HCLUSBSWIMS2cv <- randomCV(fittingFunction=HLCM,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,hysteresis = 0.10
)
bs <- predictionStats_binary(HCLUSBSWIMS2cv$medianTest,"HLCM")
HCLAS_GLMScv <- randomCV(theData,
theOutcome,
fittingFunction=HLCM,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,hysteresis = 0.1,
method=filteredFit,
fitmethod=glm,family="binomial",
filtermethod.control=list(pvalue=0.05,limit= 10),
)
bs <- predictionStats_binary(HCLAS_GLMScv$medianTest,"HCLAS Logit KNN")
HCLAS_GLMScv <- randomCV(theData,
theOutcome,
fittingFunction=HLCM_EM,
trainSampleSets=glm_WilcoxCV$trainSamplesSets,hysteresis = 0.20,
method=filteredFit,
fitmethod=glm,family="binomial",
filtermethod.control=list(pvalue=0.05,limit=0.1),
)
bs <- predictionStats_binary(HCLAS_GLMScv$medianTest,"HCLAS Logit KNN")By running the FRESA.CAD::BinaryBenchmark function will compare the performance to RF, RPART, LASSO,KNN, and SVM
par(op);
par(mfrow = c(2,2),cex = 0.5);
cp <- BinaryBenchmark(referenceCV = list(Wilcox = glm_WilcoxCV,
Kendall = glm_Kendall_CV,
Spearman = glm_spearman_CV,
Pearson = glm_pearson_CV,
tstudent = glm_tstudent_CV,
zIDI = glm_zIDI_CV,
zNRI = glm_zNRI_CV,
FLogit = glm_FLogit_CV,
BinLogit = glm_BinLogit_CV,
WilcoxLogit = glm_WilcoxLogit_CV,
tStudentLogit = glm_tStudentLogit_CV,
LASSO_1SE = LASSO1SE_CV,
BESS_Seq = BESScv,
BESS_Golden = BESSGoldencv,
BSWiMS = BSWIMScv,
eBSWiMS = eBSWIMScv,
mRMR = glm_mRMR_CV,
HLCM_EM = HCLUSBSWIMScv,
HLCM = HCLUSBSWIMS2cv
))Once done, we can compare the CV test results using the plot() function. The plot function also generates summary tables of the CV results.
#par(op);
par(mfrow = c(1,1),cex = 0.75,xpd = T,pty='m', mar = c(4,3,3,10)) # Making space for the legend
prBenchmark <- plot(cp)| HLCM_EM | RF | HLCM | SVM | WilcoxLogit | ENS | |
|---|---|---|---|---|---|---|
| BER | 0.122 | 0.138 | 0.141 | 0.213 | 0.219 | 0.225 |
| ACC | 0.88 | 0.865 | 0.861 | 0.788 | 0.779 | 0.774 |
| AUC | 0.946 | 0.944 | 0.94 | 0.873 | 0.862 | 0.874 |
| SEN | 0.919 | 0.91 | 0.901 | 0.82 | 0.775 | 0.784 |
| SPE | 0.835 | 0.814 | 0.814 | 0.753 | 0.784 | 0.763 |
| CIDX | 0.906 | 0.946 | 0.882 | 0.869 | 0.862 | 0.874 |
| tStudentLogit | FLogit | zIDI | Spearman | Wilcox | Kendall | |
|---|---|---|---|---|---|---|
| BER | 0.228 | 0.229 | 0.23 | 0.234 | 0.235 | 0.235 |
| ACC | 0.769 | 0.769 | 0.769 | 0.764 | 0.764 | 0.764 |
| AUC | 0.857 | 0.858 | 0.858 | 0.858 | 0.858 | 0.858 |
| SEN | 0.757 | 0.757 | 0.757 | 0.757 | 0.757 | 0.757 |
| SPE | 0.784 | 0.784 | 0.784 | 0.773 | 0.773 | 0.773 |
| CIDX | 0.859 | 0.863 | 0.862 | 0.859 | 0.859 | 0.858 |
| zNRI | RPART | BinLogit | eBSWiMS | LASSO | tstudent | Pearson | |
|---|---|---|---|---|---|---|---|
| BER | 0.236 | 0.236 | 0.24 | 0.24 | 0.244 | 0.245 | 0.245 |
| ACC | 0.764 | 0.764 | 0.76 | 0.76 | 0.755 | 0.755 | 0.755 |
| AUC | 0.843 | 0.802 | 0.846 | 0.824 | 0.833 | 0.855 | 0.855 |
| SEN | 0.766 | 0.793 | 0.766 | 0.775 | 0.748 | 0.757 | 0.757 |
| SPE | 0.763 | 0.732 | 0.753 | 0.742 | 0.763 | 0.753 | 0.753 |
| CIDX | 0.847 | 0.749 | 0.847 | 0.817 | 0.838 | 0.858 | 0.856 |
| BESS_Golden | KNN | LASSO_1SE | BESS_Seq | mRMR | BSWiMS | |
|---|---|---|---|---|---|---|
| BER | 0.248 | 0.248 | 0.249 | 0.25 | 0.255 | 0.273 |
| ACC | 0.755 | 0.76 | 0.75 | 0.75 | 0.745 | 0.726 |
| AUC | 0.787 | 0.885 | 0.831 | 0.796 | 0.831 | 0.825 |
| SEN | 0.811 | 0.874 | 0.739 | 0.757 | 0.766 | 0.73 |
| SPE | 0.691 | 0.629 | 0.763 | 0.742 | 0.722 | 0.722 |
| CIDX | 0.792 | 0.879 | 0.838 | 0.794 | 0.837 | 0.826 |
Finally, I will compare the selected features of the different methods by plotting the selection frequency in a heat-map plot
par(mfrow = c(1,1),cex = 1.0,xpd = F,pty='m', mar = c(3,3,3,3)) # Making space for the legend
cp$featureSelectionFrequency$FS_mRMR <- NULL
topf <- apply(cp$featureSelectionFrequency,1,mean)
gplots::heatmap.2((as.matrix(cp$featureSelectionFrequency[order(-topf),])),Rowv=FALSE,dendrogram = "column",trace = "none",mar = c(5,10),main = "Features",cexRow = 0.5,cexCol = 0.5,srtCol = 25)