Loading the libraries and setting up functions

library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")

error.bar <- function(x, y, upper, lower=upper, length=0.05,...){
if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
stop("vectors must be same length")
arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}

barPlotCiError<-  function(citable,metricname,thesets,themethod,main,...)
{
colnames(citable) <- c(metricname,"lower","upper")
rownames(citable) <- rep(thesets,length(themethod))
pander::pander(citable,caption=main,round = 3)
citable <- citable[order(rep(1:length(thesets),length(themethod))),]
barmatrix <- matrix(citable[,1],length(themethod),length(thesets))
colnames(barmatrix) <- thesets
rownames(barmatrix) <- themethod
pander::pander(barmatrix,caption=main,round = 3)



barp <- barplot(barmatrix,cex.names=0.7,las=2,ylim=c(0.0,1.0),main=main,ylab=metricname,beside=TRUE,legend = themethod, xaxt="n",...)
xpos <- t(barp[as.integer(length(themethod)/2)+1,])

text(cex=0.7, x=xpos, y=-0.1, thesets, xpd=TRUE, srt=45)

error.bar(barp,citable[,1],citable[,3]-citable[,1],citable[,1]-citable[,2])

return(barp)
}

Loading the data sets



#trainLabeled <- read.delim("./Arcene/ARCENE/trainSet.txt")
#validLabeled <- read.delim("./Arcene/ARCENE/arcene_valid.txt")
#testLabeled <- read.delim("./Arcene/ARCENE/arcene_test.txt")

trainLabeled <- read.delim("trainSet.txt")
validLabeled <- read.delim("arcene_valid.txt")
testLabeled <- read.delim("arcene_test.txt")

trainLabeled$Labels <-  1*(trainLabeled$Labels>0)
validLabeled$Labels <-  1*(validLabeled$Labels>0)


arcene.norm <- rbind(trainLabeled,validLabeled)

Some functions to collect performance data

diagTable <- NULL
citable <- NULL
errorcitable <- NULL

thesets <- c("BSWIMS","Fwd:Ensemble","SVM:BSWIMS","SVM:Forward")
theMethod <- c("CV Test","Val. Full","Val. CV Features")


collectPerfomance<-  function(bswimsmodel)
{
diagTable <- NULL
citable <- NULL
errorcitable <- NULL


pm<-plotModels.ROC(bswimsmodel$cvObject$Models.testPrediction,main="B:SWiMS",predictor="Prediction",theCVfolds=10,cex=0.90)
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

pm<-plotModels.ROC(bswimsmodel$cvObject$Models.testPrediction,main="Ensemble B:SWiMS",predictor="Ensemble.B.SWiMS",theCVfolds=10,cex=0.90)
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


bswimsmodel$cvObject$Models.testPrediction$usrFitFunction_Sel <- bswimsmodel$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
bswimsmodel$cvObject$Models.testPrediction$usrFitFunction <- bswimsmodel$cvObject$Models.testPrediction$usrFitFunction -0.5

pm<-plotModels.ROC(bswimsmodel$cvObject$Models.testPrediction,main="SVM B:SWiMS Features",predictor="usrFitFunction_Sel",theCVfolds=10,cex=0.90)
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


pm<-plotModels.ROC(bswimsmodel$cvObject$Models.testPrediction,main="SVM: Top",predictor="usrFitFunction",theCVfolds=10,cex=0.90)
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

pr <- predict(bswimsmodel$BSWiMS.model,validLabeled)
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),pr),main="B:SWiMS")
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


prm2 <- ensemblePredict(bswimsmodel$BSWiMS.models$forward.selection.list,trainLabeled,validLabeled)
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),prm2$ensemblePredict),main="Ensemble Forward")
diagTable$EnsembleForward <- pm$predictionTable
ci <- epi.tests(diagTable$EnsembleForward)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


ds <- trainLabeled[,c("Labels",names(bswimsmodel$BSWiMS.models$bagging$frequencyTable))]
psvm <- svm(formula("Labels ~ ."),ds)
prsvm <- predict(psvm,validLabeled)-0.5
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),prsvm),main="SVM: B:SWiMS Selection")
diagTable$SVM_BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$SVM_BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


bg <- baggedModel(bswimsmodel$BSWiMS.models$forward.selection.list,trainLabeled,type="LOGIT",univariate=bswimsmodel$univariateAnalysis)

ds <- trainLabeled[,c("Labels",names(bg$frequencyTable))]
psvm <- svm(formula("Labels ~ ."),ds)
prsvm <- predict(psvm,validLabeled)-0.5
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),prsvm),main="SVM: Forward Selection")
diagTable$SVM_BaggingForward <- pm$predictionTable
ci <- epi.tests(diagTable$SVM_BaggingForward)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


bg <- baggedModel(bswimsmodel$cvObject$allBSWiMSFormulas.list,trainLabeled,type="LOGIT",univariate=bswimsmodel$univariateAnalysis)
pr <- predict(bg$bagged.model,validLabeled)
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),pr),main="CV B:SWiMS")
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

pr <- ensemblePredict(bswimsmodel$cvObject$allBSWiMSFormulas.list,trainLabeled,validLabeled)
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),pr$ensemblePredict),main="CV Ensemble B:SWiMS")
diagTable$BaggingBSWIMS <- pm$predictionTable
ci <- epi.tests(diagTable$BaggingBSWIMS)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

#ds <- trainLabeled[,c("Labels",names(bg$frequencyTable))]
ds <- trainLabeled[,c("Labels",names(bg$bagged.model$coefficients)[-1])]
psvm <- svm(formula("Labels ~ ."),ds)
prsvm <- predict(psvm,validLabeled)-0.5
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),prsvm),main="SVM: CV B:SWiMS Features")
diagTable$SVM_BaggingForward <- pm$predictionTable
ci <- epi.tests(diagTable$SVM_BaggingForward)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

ds <- trainLabeled[,c("Labels",as.character(bswimsmodel$univariateAnalysis$Name[1:bswimsmodel$used.variables]))]
psvm <- svm(formula("Labels ~ ."),ds)
prsvm <- predict(psvm,validLabeled)-0.5
pm<-plotModels.ROC(cbind(as.vector(validLabeled$Labels),prsvm),main="SVM: Top Univariate")
diagTable$SVM_BaggingForward <- pm$predictionTable
ci <- epi.tests(diagTable$SVM_BaggingForward)
citable <- rbind(citable,ci$elements$diag.acc)
errorcitable <- rbind(errorcitable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))


result <- list(errorcitable=errorcitable,citable=citable,diagTable=diagTable)
return(result)
}

Modeling the data


arceneCVOne <- FRESA.Model(formula = Labels ~ 1,data = trainLabeled,CVfolds = 10,repeats = 5,filter.p.value = 0.05,usrFitFun=svm)

save(arceneCVOne,file="ArceneCV_10_5_005_SVM.RDATA")

The performance



pc <- collectPerfomance(arceneCVOne)

Num. Models: 448 To Test: 491 TopFreq: 77 Thrf: 2 Removed: 414 …………………………………….. Num. Models: 607 To Test: 260 TopFreq: 50 Thrf: 1 Removed: 192 ……………………………………………………

citable <- pc$citable
errorcitable <- pc$errorcitable



bp <- barPlotCiError(as.matrix(citable),metricname="Accuracy",thesets=thesets,themethod=theMethod,main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(errorcitable),metricname="Balanced Error",thesets=thesets,themethod=theMethod,main="Balanced Error",args.legend = list(x = "topright"))

Generating two fold CV Models using all the data



arceneCV <- FRESA.Model(formula = Labels ~ 1,data = arcene.norm,filter.p.value = 0.05,CVfolds = 3,repeats = 5,usrFitFun=svm)


save(arceneCV,file="ArceneV_001_2_5_wsvn_v2.RDATA")

arceneCV$cvObject$Models.testPrediction$usrFitFunction_Sel <- arceneCV$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
arceneCV$cvObject$Models.testPrediction$usrFitFunction <- arceneCV$cvObject$Models.testPrediction$usrFitFunction -0.5

CVACCTable <- NULL
CVBETable <- NULL
pm <- plotModels.ROC(arceneCV$cvObject$LASSO.testPredictions,theCVfolds=2,main="CV LASSO",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
pm <- plotModels.ROC(arceneCV$cvObject$Models.testPrediction,theCVfolds=2,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
pm <- plotModels.ROC(arceneCV$cvObject$Models.testPrediction,theCVfolds=2,predictor="Ensemble.B.SWiMS",main="Ensemble B:SWiMS",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
pm <- plotModels.ROC(arceneCV$cvObject$Models.testPrediction,theCVfolds=2,predictor="Ensemble.Forward",main="Forward Median",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
pm <- plotModels.ROC(arceneCV$cvObject$Models.testPrediction,theCVfolds=2,predictor="usrFitFunction",main="SVM",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
pm <- plotModels.ROC(arceneCV$cvObject$Models.testPrediction,theCVfolds=2,predictor="usrFitFunction_Sel",main="SVM",cex=0.90)

ci <- epi.tests(pm$predictionTable)
CVACCTable <- rbind(CVACCTable,ci$elements$diag.acc)
CVBETable <- rbind(CVBETable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

CVthesets <- c("LASSO","B:SWiMS","B:SWiMS:Ens","Fwd:Ensemble","SVM:Filterd","SVM:BSWIMS")

bp <- barPlotCiError(as.matrix(CVACCTable),metricname="Accuracy",thesets=CVthesets,themethod=c("CV"),main="Accuracy",args.legend = list(x = "bottomright"))


bp <- barPlotCiError(as.matrix(CVBETable),metricname="Balanced Error",thesets=CVthesets,themethod=c("CV"),main="Balanced Error",args.legend = list(x = "topright"))