1 FRESA.CAD Ordinal Regresion Benchmark

1.1 Wine Quality Data sets

WhiteWinequality  <- read.csv("./Wine/winequality-white (1).csv")

WhiteWinequality$quality[WhiteWinequality$quality < 4] <- 4
WhiteWinequality$quality[WhiteWinequality$quality > 8] <- 8
#WhiteWinequality_mat <- as.data.frame(model.matrix(quality~.*.,WhiteWinequality))
WhiteWinequality_mat <- as.data.frame(model.matrix(quality~.,WhiteWinequality))
WhiteWinequality_mat$`(Intercept)` <- NULL
WhiteWinequality_mat$quality <- WhiteWinequality$quality
fnames <- colnames(WhiteWinequality_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(WhiteWinequality_mat) <- fnames

#WhiteWinequality_mat <- FRESAScale(WhiteWinequality_mat,method="RankInv")$scaledData

ExperimentName <- "White_Wine_Quality"
bswimsReps <- 3;
theData <- WhiteWinequality_mat;
theOutcome <- "quality";
reps <- 10;
fraction <- 0.20;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")
sampleTrain <- sample(nrow(theData),nrow(theData)*0.3)
sampleData <- theData[sampleTrain,]
testdata <- theData[-sampleTrain,]

1.2 Benchmark


bmodel <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = sampleData,NumberofRepeats = bswimsReps )

nmodel <- NAIVE_BAYES(formula = quality ~ .,data = sampleData,pca = TRUE,usekernel = TRUE)
pre <- predict(nmodel,testdata)
table(pre,testdata$quality)
nmodel <- NAIVE_BAYES(formula = quality ~ .,data = sampleData,pca = FALSE,usekernel = FALSE)
pre <- predict(nmodel,testdata)
table(pre,testdata$quality)


save(bmodel,file = FRESAFileName)
load(file = FRESAFileName)

par(mfrow = c(2,2));

cp <- OrdinalBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1));

save(cp,file = CVFileName)
load(file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
9.261 0.44 0.028 3.748 0.164 0.005 13.65

pr <- plot(cp,prefix = paste(ExperimentName,":"));


pander::pander(pr$metrics,caption = ExperimentName,round = 3)
White_Wine_Quality
  RF ENS SVM BSWiMS KNN RPART LASSO
KAPPA 0.519 0.501 0.489 0.436 0.428 0.426 0.416
BMAE 0.603 0.717 0.709 0.909 0.813 0.804 0.896
Kendall 0.506 0.49 0.479 0.425 0.433 0.434 0.422
Bias 0.053 0.072 0.013 0.108 0.149 0.241 0.085
ACC 0.435 0.388 0.376 0.367 0.34 0.363 0.325
SEN 0.529 0.46 0.472 0.382 0.428 0.425 0.408
AUC 0.698 0.656 0.662 0.612 0.639 0.633 0.624
pander::pander(pr$metrics_filter,caption = ExperimentName,round = 3)
White_Wine_Quality
  LASSO RF.ref mRMR RPART F.Test Kendall BSWiMS
KAPPA 0.454 0.454 0.435 0.437 0.436 0.424 0.444
BMAE 0.803 0.774 0.824 0.792 0.824 0.832 0.81
Kendall 0.446 0.455 0.44 0.435 0.442 0.43 0.435
ACC 0.371 0.371 0.364 0.365 0.356 0.352 0.368
SEN 0.424 0.427 0.406 0.429 0.425 0.409 0.431
AUC 0.639 0.639 0.629 0.641 0.639 0.631 0.64

1.3.2 Radar Plots

op <- par(no.readonly = TRUE)

par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","CPU")

classRanks$BMAE <- -classRanks$BMAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0), rgb(1.0,1.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05), rgb(1.0,1.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F.Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","Jaccard","SIZE")
filterRanks$BMAE <- -filterRanks$BMAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(bmodel),caption = "Model",round = 3)
    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      volatile.acidity -1.981 0.1043 0.138 0.1825 0.7268
      fixed.acidity -0.4781 0.5795 0.6199 0.6632 0.6428
      citric.acid 3.638 22.75 38.02 63.53 0.5209
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
      volatile.acidity 0.9646 0.7268 0.6301 0.5 0.6301
      fixed.acidity 0.5209 0.6641 0.5741 0.5371 0.6855
      citric.acid 0.6428 0.6641 0.5371 0.5741 0.6855
        IDI NRI z.IDI z.NRI Frequency
      volatile.acidity 0.1291 0.5204 20.3 14.71 0.5
      fixed.acidity 0.09454 0.499 17.76 13.85 0.5
      citric.acid 0.08178 0.6167 16.62 17.34 0.5
    • Accuracy: 0.6671
    • tAUC: 0.6793
    • sensitivity: 0.6662
    • specificity: 0.6923
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper
      volatile.acidity -4.476 0.00605 0.01138 0.0214
      total.sulfur.dioxide -0.003241 0.9963 0.9968 0.9972
      alcohol 0.5121 1.454 1.669 1.915
      residual.sugar 0.1226 1.094 1.13 1.168
      density -210.5 7.816e-132 3.843e-92 1.89e-52
      fixed.acidity -0.06673 0.904 0.9354 0.968
      Table continues below
        u.Accuracy r.Accuracy full.Accuracy u.AUC
      volatile.acidity 0.625 0.6687 0.714 0.5987
      total.sulfur.dioxide 0.63 0.5311 0.6286 0.6283
      alcohol 0.6705 0.717 0.714 0.6865
      residual.sugar 0.5761 0.7092 0.714 0.5581
      density 0.6332 0.7097 0.714 0.6266
      fixed.acidity 0.5311 0.63 0.6286 0.5172
      Table continues below
        r.AUC full.AUC IDI NRI z.IDI
      volatile.acidity 0.6969 0.7244 0.06198 0.4864 12.08
      total.sulfur.dioxide 0.5172 0.622 0.04717 0.5249 9.979
      alcohol 0.7238 0.7244 0.02385 0.2937 6.742
      residual.sugar 0.7133 0.7244 0.02184 0.3064 6.662
      density 0.7156 0.7244 0.009967 0.1542 4.397
      fixed.acidity 0.6283 0.622 0.007317 0.02281 3.712
        z.NRI Frequency
      volatile.acidity 11.19 0.5
      total.sulfur.dioxide 12.06 0.5
      alcohol 6.588 0.5
      residual.sugar 6.902 0.5
      density 3.468 0.5
      fixed.acidity 0.5093 0.5
    • Accuracy: 0.7161
    • tAUC: 0.7271
    • sensitivity: 0.6945
    • specificity: 0.7598
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper
      alcohol 0.3864 1.394 1.472 1.554
      total.sulfur.dioxide -0.00414 0.9952 0.9959 0.9965
      chlorides -8.093 5.945e-06 0.0003057 0.01572
      pH 0.4435 1.243 1.558 1.954
      sulphates 0.5859 1.295 1.797 2.492
      Table continues below
        u.Accuracy r.Accuracy full.Accuracy u.AUC
      alcohol 0.7161 0.63 0.7155 0.7163
      total.sulfur.dioxide 0.559 0.5567 0.573 0.5797
      chlorides 0.6179 0.7144 0.7155 0.6371
      pH 0.5753 0.6598 0.6703 0.556
      sulphates 0.5472 0.6107 0.6182 0.5119
      Table continues below
        r.AUC full.AUC IDI NRI z.IDI
      alcohol 0.6508 0.7195 0.1053 0.5487 17.25
      total.sulfur.dioxide 0.5267 0.5876 0.03412 0.332 8.984
      chlorides 0.7155 0.7195 0.006584 -0.03315 3.963
      pH 0.6689 0.6812 0.005449 0.1663 3.552
      sulphates 0.6245 0.6259 0.005288 0.08003 3.412
        z.NRI Frequency
      alcohol 13.71 0.5
      total.sulfur.dioxide 8.111 0.5
      chlorides -0.8017 0.5
      pH 4.027 0.5
      sulphates 1.946 0.5
    • Accuracy: 0.7134
    • tAUC: 0.7224
    • sensitivity: 0.7382
    • specificity: 0.7066
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      alcohol 0.4555 1.479 1.577 1.682 0.713
      fixed.acidity -0.2425 0.7578 0.7846 0.8125 0.5381
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
      alcohol 0.0388 0.713 0.7186 0.5 0.7186
      fixed.acidity 0.0388 0.5381 0.5386 0.5 0.5386
        IDI NRI z.IDI z.NRI Frequency
      alcohol 0.2484 0.877 31.07 25.96 0.5
      fixed.acidity 0.03335 0.1538 9.742 4.102 0.5
    • Accuracy: 0.725
    • tAUC: 0.7391
    • sensitivity: 0.7544
    • specificity: 0.7238
    • bootstrap:


topFeat <- min(ncol(bmodel$bagging$formulaNetwork),30);
shortformulaNetwork <- bmodel$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.6,cexCol = 0.6)



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.2)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.3,cexCol = 0.3,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features (continued below)
  cohortMean cohortStd kendall.r kendall.p
volatile.acidity 0.2782 0.1008 -0.1549 0
free.sulfur.dioxide 35.31 17.01 0.0173 0.115
alcohol 10.51 1.231 0.3468 0
density 0.994 0.003 -0.2666 0
fixed.acidity 6.855 0.8439 -0.0655 0
total.sulfur.dioxide 138.4 42.5 -0.1512 0
chlorides 0.0458 0.0218 -0.2449 0
pH 3.188 0.151 0.0844 0
citric.acid 0.3342 0.121 0.0146 0.1864
residual.sugar 6.391 5.072 -0.063 0
  Frequency
volatile.acidity 1
free.sulfur.dioxide 1
alcohol 0.9857
density 0.9714
fixed.acidity 0.8286
total.sulfur.dioxide 0.7857
chlorides 0.7429
pH 0.7429
citric.acid 0.5571
residual.sugar 0.5429