1 FRESA.CAD Sonar Benchmark

1.1 Sonar, Mines vs. Rocks Data Set

op <- par(no.readonly = TRUE)

data(Sonar)
Sonar$Class <- 1*(Sonar$Class == "M")
#Sonar.mat <- as.data.frame(model.matrix(Class~.*.,Sonar))
Sonar.mat <- as.data.frame(model.matrix(Class~.,Sonar))
Sonar.mat$`(Intercept)` <- NULL
Sonar.mat$Class <- as.numeric(Sonar$Class)

fnames <- colnames(Sonar.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(Sonar.mat) <- fnames

ExperimentName <- "Sonar"
bswimsReps <- 20;
theData <- Sonar.mat;
theOutcome <- "Class";
reps <- 60;
fraction <- 0.75;

BSIWIMSFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")

1.2 Benchmarking



BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = -bswimsReps)

save(BSWiMSMODEL,file = BSIWIMSFileName)

cv <- randomCV(theData,theOutcome,
               BSWiMS.model,fraction, 
               reps,NumberofRepeats = 0);

ps <- predictionStats_binary(cv$medianTest,plotname = "eBSWiMS",cex = 0.8)


par(mfrow = c(2,2),cex = 0.6);
cp2 <- BinaryBenchmark(referenceCV = cv,referenceName = "eBSWiMS",referenceFilterName = "BSWiMS")

par(op );

save(cp2,file = CVFileName)


par(mfrow = c(2,2),cex = 0.6);
cp <- BinaryBenchmark(theData,theOutcome,reps,fraction)

par(op );

save(cp,file = CVFileName)
#ps <- predictionStats_binary(cp$TheCVEvaluations$Reference$medianTest,plotname = "BSWiMS",cex = 0.8)

1.3 Results

1.3.1 Classifier Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
3.982 0.167 0.02183 1.082 0.008167 0.014 5.275
learningTime <- -1*cp$cpuElapsedTimes
par(mfrow = c(2,1),cex = 1.0);
pr <- plot(cp)

par(op);
par(mfrow = c(2,1),cex = 1.0);
pr2 <- plot(cp2)

par(op);

1.3.2 Radar Plots



library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BER","ACC","AUC","SEN","SPE"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BER","ACC","AUC","SEN","SPE","CPU")

classRanks$BER <- -classRanks$BER
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","IDI","t-test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BER","ACC","AUC","SEN","SPE"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BER","ACC","AUC","SEN","SPE","Jaccard","SIZE")
filterRanks$BER <- -filterRanks$BER
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


detach("package:fmsb", unload=TRUE)

par(mfrow = c(1,1))
par(op)

1.3.3 Feature Analysis



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.1,]
gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.5)



topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network")

pander::pander(summary(BSWiMSMODEL$bagging$bagged.model,caption="Colon",round = 3))
  • coefficients:

    Table continues below
      Estimate lower OR upper u.Accuracy r.Accuracy
    V11 4.595 21.15 99.03 463.7 0.7452 0.6839
    V45 4.262 13.57 70.96 371 0.6394 0.7201
    V36 -2.544 0.02743 0.07854 0.2249 0.6587 0.696
    V12 0.2103 1.13 1.234 1.348 0.7404 0.7173
    V10 0.2613 1.161 1.299 1.452 0.6971 0.6807
    V46 0.36 1.224 1.433 1.678 0.6106 0.7452
    V44 0.1146 1.063 1.121 1.183 0.5913 0.709
    V35 -0.1617 0.7883 0.8507 0.9181 0.5865 0.7044
    V47 2.171 3.096 8.768 24.83 0.6202 0.7096
    V37 -0.1239 0.8304 0.8835 0.94 0.6346 0.7288
    V9 0.105 1.051 1.111 1.174 0.6923 0.7178
    V48 0.6813 1.353 1.977 2.887 0.6875 0.7418
    V49 1.332 1.802 3.79 7.973 0.6827 0.7425
    V4 1.292 1.757 3.642 7.548 0.6058 0.7433
    V51 1.938 2.295 6.945 21.02 0.6683 0.768
    V1 0.4892 1.228 1.631 2.166 0.601 0.7138
    V43 0.3014 1.135 1.352 1.61 0.5865 0.7447
    V34 -0.04082 0.9372 0.96 0.9833 0.5865 0.6982
    V58 1.906 2.079 6.726 21.76 0.5721 0.7159
    V54 2.379 2.478 10.79 46.99 0.5385 0.7175
    V13 0.0349 1.013 1.036 1.058 0.6683 0.7103
    V59 2.896 2.926 18.1 112 0.5 0.7175
    V42 0.03723 1.014 1.038 1.063 0.5577 0.7134
    V21 0.1165 1.043 1.124 1.21 0.6298 0.7221
    V23 0.07047 1.025 1.073 1.123 0.5577 0.7298
    V22 0.08959 1.031 1.094 1.16 0.5865 0.7281
    V8 0.02966 1.01 1.03 1.05 0.6346 0.7019
    V2 0.1566 1.054 1.17 1.298 0.5577 0.7078
    V5 0.2066 1.068 1.23 1.416 0.625 0.7169
    V52 5.908 6.319 368 21432 0.6394 0.7419
    V20 0.09366 1.028 1.098 1.173 0.6394 0.7223
    V24 0.02498 1.007 1.025 1.044 0.5481 0.7085
    V3 0.1461 1.027 1.157 1.304 0.5721 0.7139
    V19 0.002362 1 1.002 1.004 0.5433 0.7019
    Table continues below
      full.Accuracy u.AUC r.AUC full.AUC IDI NRI
    V11 0.7747 0.7418 0.6814 0.7736 0.1357 0.7796
    V45 0.7588 0.6446 0.7167 0.7574 0.1187 0.6981
    V36 0.7601 0.6516 0.6957 0.7586 0.0997 0.5923
    V12 0.7788 0.736 0.7147 0.7773 0.09346 0.7177
    V10 0.7455 0.6961 0.6765 0.7436 0.08644 0.7239
    V46 0.7704 0.6124 0.7435 0.7693 0.09342 0.6728
    V44 0.7192 0.5924 0.7057 0.7172 0.07616 0.5467
    V35 0.7467 0.5775 0.704 0.7443 0.07429 0.3676
    V47 0.7421 0.6201 0.7058 0.7398 0.0781 0.5263
    V37 0.7798 0.6258 0.7268 0.779 0.07006 0.5342
    V9 0.7526 0.6929 0.7153 0.7513 0.06188 0.5825
    V48 0.7566 0.6877 0.7402 0.755 0.05789 0.6024
    V49 0.7622 0.6832 0.7409 0.7606 0.05863 0.6922
    V4 0.7668 0.6053 0.7418 0.7659 0.05107 0.5622
    V51 0.7983 0.6664 0.7675 0.7968 0.05844 0.5693
    V1 0.7522 0.6027 0.711 0.7493 0.04972 0.4575
    V43 0.7585 0.5834 0.7432 0.7572 0.05707 0.6359
    V34 0.7404 0.5769 0.6974 0.7384 0.04577 0.3506
    V58 0.7465 0.5711 0.7134 0.7442 0.04171 0.4619
    V54 0.7449 0.5331 0.7149 0.7419 0.04364 0.327
    V13 0.7404 0.6658 0.7082 0.7389 0.04776 0.5046
    V59 0.7492 0.4893 0.7149 0.7454 0.03749 0.5232
    V42 0.7256 0.5485 0.7116 0.722 0.04453 0.4722
    V21 0.7544 0.6239 0.7198 0.7525 0.0429 0.6495
    V23 0.7505 0.544 0.7277 0.7478 0.04057 0.5075
    V22 0.7691 0.5782 0.726 0.7669 0.03927 0.4275
    V8 0.7019 0.6304 0.6986 0.6986 0.03682 0.4215
    V2 0.7276 0.5563 0.7044 0.7242 0.03784 0.3209
    V5 0.7464 0.622 0.7143 0.7447 0.03636 0.4945
    V52 0.7609 0.6394 0.7402 0.7599 0.04074 0.4149
    V20 0.7457 0.6362 0.7199 0.7433 0.03539 0.6399
    V24 0.7513 0.5265 0.7068 0.7512 0.03198 0.3833
    V3 0.7452 0.5698 0.7112 0.7437 0.02657 0.3737
    V19 0.7067 0.5376 0.6986 0.7025 0.01916 0.1791
      z.IDI z.NRI Frequency
    V11 5.834 6.13 18
    V45 5.05 5.623 15.8
    V36 4.74 4.478 23.85
    V12 4.656 5.534 1
    V10 4.589 5.698 2.1
    V46 4.474 5.339 1.45
    V44 4.169 4.123 0.75
    V35 4.159 2.723 1.95
    V47 4.088 4.084 9.8
    V37 3.914 4 1
    V9 3.703 4.552 0.65
    V48 3.524 4.645 2.25
    V49 3.512 5.493 2.45
    V4 3.476 4.442 2.45
    V51 3.43 4.33 1
    V1 3.381 3.512 0.65
    V43 3.376 4.896 1.8
    V34 3.334 2.576 0.9
    V58 3.182 3.512 0.55
    V54 3.169 2.401 0.85
    V13 3.159 3.762 0.4
    V59 3.115 4.027 0.85
    V42 3.076 3.527 0.65
    V21 3.071 4.941 1.95
    V23 3.015 3.789 0.95
    V22 2.982 3.16 1.35
    V8 2.969 3.246 0.35
    V2 2.938 2.39 0.45
    V5 2.871 3.778 0.8
    V52 2.849 3.121 3.2
    V20 2.8 4.87 1.9
    V24 2.676 2.828 0.55
    V3 2.395 2.807 0.5
    V19 2.212 1.295 0.15
  • Accuracy: 0.8077
  • tAUC: 0.8068
  • sensitivity: 0.8198
  • specificity: 0.7938
  • bootstrap:



hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LOGIT",rankingTest = "zIDI",uniType = "Binary")[,c("controlMean","controlStd","caseMean","caseStd","ROCAUC","WilcoxRes.p")] 

cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features (continued below)
  controlMean controlStd caseMean caseStd ROCAUC WilcoxRes.p
V11 0.1747 0.1134 0.2896 0.1254 0.7811 0
V12 0.1916 0.1347 0.3015 0.1241 0.7429 0
V10 0.1593 0.1132 0.251 0.1374 0.7327 0
V49 0.0384 0.0304 0.0637 0.0364 0.7313 0
V9 0.1374 0.0999 0.2135 0.1222 0.7308 0
V48 0.0695 0.0482 0.1106 0.0671 0.7063 0
V13 0.2262 0.1381 0.3144 0.1307 0.7047 0
V51 0.0123 0.0086 0.0194 0.0135 0.6994 0
V47 0.0945 0.0678 0.1469 0.0945 0.6974 0
V52 0.0105 0.0071 0.016 0.0108 0.688 0
V46 0.1169 0.0938 0.1988 0.1514 0.6871 0
V45 0.1423 0.0957 0.2452 0.1741 0.6727 0
V4 0.0414 0.0312 0.0648 0.0545 0.6652 0
V36 0.4607 0.2623 0.3186 0.2484 0.6652 0
V5 0.062 0.0472 0.0867 0.0598 0.6537 2e-04
V1 0.0225 0.0147 0.035 0.0271 0.6523 1e-04
V44 0.1751 0.1074 0.2481 0.1444 0.6511 1e-04
V21 0.5423 0.2488 0.6674 0.2524 0.6445 1e-04
V35 0.4555 0.2612 0.3376 0.2455 0.6415 6e-04
V8 0.1176 0.0798 0.1498 0.0872 0.641 8e-04
V43 0.2118 0.1303 0.2769 0.1398 0.6409 2e-04
V37 0.4173 0.243 0.317 0.2281 0.6281 6e-04
V6 0.0962 0.065 0.1119 0.0526 0.6246 0.0018
V20 0.5002 0.2594 0.6179 0.2541 0.6236 9e-04
V2 0.0303 0.024 0.0455 0.0378 0.6228 0.0015
V50 0.0178 0.0126 0.0227 0.0142 0.6218 0.003
V3 0.036 0.0291 0.0507 0.044 0.6165 0.006
V14 0.269 0.1663 0.3207 0.1597 0.6147 0.0058
V22 0.5693 0.2606 0.6723 0.2428 0.6137 0.0028
V58 0.0067 0.0048 0.0091 0.0075 0.5984 0.0118
  Frequency
V11 0.99
V12 0.95
V10 0.7783
V49 0.8667
V9 0.8117
V48 0.7817
V13 0.7133
V51 0.745
V47 0.7167
V52 0.82
V46 0.675
V45 0.815
V4 0.7717
V36 0.91
V5 0.6883
V1 0.6867
V44 0.6883
V21 0.7617
V35 0.615
V8 0.5983
V43 0.61
V37 0.7017
V6 0.54
V20 0.625
V2 0.545
V50 0.5217
V3 0.5317
V14 0.5667
V22 0.6583
V58 0.5