#FRESA.CAD Benchmark

##Colon Cancer Data Set



ColonData <- read.delim("./Colon/cancerColonb.txt")
Colon <- ColonData[,-1]
rownames(Colon) <- ColonData[,1]
colonC <- Colon
colonC$Class <- as.factor(Colon$Class)

ls <- LASSO_1SE(Class~.,Colon)

ExperimentName <- "Colon"
bswimsReps <- 20;
theData <- Colon;
theOutcome <- "Class";
reps <- 75;
fraction <- 0.8;
theData[,1:ncol(theData)] <- sapply(theData,as.numeric)

#sf <- univariate_Wilcoxon(data=theData, Outcome="Class",pvalue=0.5)
#theData <- theData[,c("Class",names(sf))]



BSWiMSFileName <- paste(ExperimentName,"BSWIMSMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")

##Benchmarking


BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = 1)


save(BSWiMSMODEL,file = BSWiMSFileName)

load(file = BSWiMSFileName)

par(mfrow = c(2,2),cex=0.6);
cp <- BinaryBenchmark(theData,theOutcome,reps,fraction)

save(cp,file = CVFileName)
par(mfrow = c(1,1),cex=1.0);


load(file = CVFileName)

##Results

###Classifier Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
par(mfrow = c(2,1),cex=1.0);
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
7.519 2.197 0.4525 0.3317 0.009333 0.02173 10.08
par(mfrow = c(1,1),cex=1.0);
learningTime <- -1*cp$cpuElapsedTimes
pr <- plot(cp)

0.0.1 Radar Plots

op <- par(no.readonly = TRUE)

library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BER","ACC","AUC","SEN","SPE"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BER","ACC","AUC","SEN","SPE","CPU")

classRanks$BER <- -classRanks$BER
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO_MIN","RF","IDI","tStudent","kendall","mRMR.classic")

filterRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BER","ACC","AUC","SEN","SPE"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BER","ACC","AUC","SEN","SPE","Jaccard","SIZE")
filterRanks$BER <- -filterRanks$BER
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


detach("package:fmsb", unload=TRUE)

par(mfrow = c(1,1))
par(op)

0.0.2 Feature Analysis



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.1,]
gplots::heatmap.2(as.matrix(selFrequency),trace = "none",mar = c(10,10),main = "Features",cexRow = 0.25,cexCol = 0.5)



topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.5)

pander::pander(summary(BSWiMSMODEL$bagging$bagged.model,caption="Colon",round = 3))


hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.5,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LOGIT",rankingTest = "zIDI",uniType = "Binary")[,c("controlMean","controlStd","caseMean","caseStd","ROCAUC","WilcoxRes.p")] 

100 : T52185 200 : D16431


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features (continued below)
  controlMean controlStd caseMean caseStd ROCAUC
R87126 806.5 546.1 271.7 185.5 0.8841
H08393 55.01 24.97 132.6 80.18 0.875
R36977 125.7 63.3 375.6 340.1 0.8648
M22382 405.2 220.3 1142 868.5 0.8648
M26383 114.9 168.6 398.1 475.6 0.8534
H40095 319.1 215.5 800.5 529.5 0.8409
X63629 57.35 31.71 154.8 108.2 0.8352
J05032 81.3 55.47 194.7 120.5 0.833
X12671 333.3 263.5 850.5 582.4 0.8295
Z50753 447.6 216.3 233.9 119 0.8284
J02854 907.3 682.6 222.9 266.8 0.8261
U09564 116.4 63.94 262.9 176.3 0.8239
H43887 850.5 547.3 330.4 414.5 0.8216
M63391 2303 1538 597.1 568.4 0.8205
M76378.2 1052 844.1 260.3 172 0.8136
M36634 214.5 146.9 93.32 96.29 0.8125
T86473 156.4 94.12 407 323.7 0.8114
X14958 350.8 209.1 716.1 434.5 0.8102
M76378 1283 798.5 465.9 357.5 0.8068
T47377 259.1 265.6 930.2 813.4 0.8057
U30825 173.4 60.48 353.9 235.7 0.8023
M26697 852.3 478.3 1848 1101 0.8023
R84411 338.9 225.6 747.4 483.9 0.7989
T71025 1892 697.2 1103 675.2 0.7977
X54942 123 86.76 337.1 299.6 0.7932
R08183 356.8 248.3 840.3 593.4 0.7932
M76378.1 1306 805.3 490.9 375.2 0.792
D31885 354.3 252.2 768.2 563.3 0.7886
H06524 362.9 272.6 144.6 183 0.7875
X55715 851.7 464.2 1555 833.8 0.7852
  WilcoxRes.p Frequency
R87126 0 0.852
H08393 0 0.8867
R36977 0 0.5187
M22382 0 0.4813
M26383 1e-04 0.608
H40095 0 0.548
X63629 0 0.8347
J05032 0 0.4213
X12671 0 0.5707
Z50753 0 0.8027
J02854 0 0.7293
U09564 0 0.412
H43887 0 0.6627
M63391 0 0.7893
M76378.2 0 0.5373
M36634 0 0.3187
T86473 0 0.356
X14958 0 0.3373
M76378 0 0.3693
T47377 1e-04 0.7773
U30825 1e-04 0.312
M26697 1e-04 0.1707
R84411 1e-04 0.4813
T71025 0 0.792
X54942 1e-04 0.3973
R08183 1e-04 0.5
M76378.1 0 0.2613
D31885 2e-04 0.1613
H06524 1e-04 0.224
X55715 2e-04 0.3253