1 FRESA.CAD Regresion Benchmark

1.1 VLBW Data Set

library(readr)


VLBW <- read_delim("./VLBW/vlbw.txt","\t", escape_double = FALSE, trim_ws = TRUE)

VLBW$race <- NULL

VLBW <- VLBW[complete.cases(VLBW),-1]
#VLBW_mat <- as.data.frame(model.matrix(apg1~.*.,VLBW))
VLBW_mat <- as.data.frame(model.matrix(apg1~.,VLBW))
VLBW_mat$`(Intercept)` <- NULL
VLBW_mat$apg1 <- as.integer(VLBW$apg1)
VLBW_mat$apg1[VLBW_mat$apg1 <= 1] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 2] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 4] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 5] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 7] <- 6
VLBW_mat$apg1[VLBW_mat$apg1 > 8] <- 8

fnames <- colnames(VLBW_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(VLBW_mat) <- fnames


ExperimentName <- "VLBW"
bswimsReps <- 20;
theData <- VLBW_mat;
theOutcome <- "apg1";
reps <- 30;
fraction <- 0.5;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")
sampleTrain <- sample(nrow(theData),nrow(theData)*0.5)
sampleData <- theData[sampleTrain,]
testdata <- theData[-sampleTrain,]

1.2 Benchmark


bmodel <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = sampleData,NumberofRepeats = bswimsReps )
save(bmodel,file = FRESAFileName)
#load(file = FRESAFileName)

par(mfrow = c(2,2));
cp <- OrdinalBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1));

save(cp,file = CVFileName)
#load(file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
2.874 0.133 0.01 0.5803 0.01633 0.009 3.623

par(mfrow = c(2,1));
pr <- plot(cp,prefix = paste(ExperimentName,":"));

par(mfrow = c(1,1));

pander::pander(pr$metrics,caption = ExperimentName,round = 3)
VLBW
  BSWiMS RF RPART LASSO SVM KNN ENS
KAPPA 0.401 0.386 0.354 0.385 0.4 0.376 0.398
BMAE 2.238 2.183 2.208 2.236 2.212 2.24 2.231
Kendall 0.348 0.321 0.296 0.317 0.335 0.312 0.332
Bias 0.091 0.088 0.335 0.162 0.206 0.296 0.194
ACC 0.372 0.354 0.31 0.343 0.358 0.326 0.343
SEN 0.374 0.353 0.311 0.344 0.361 0.329 0.344
AUC 0.588 0.577 0.558 0.57 0.581 0.562 0.57
pander::pander(pr$metrics_filter,caption = ExperimentName,round = 3)
VLBW
  BSWiMS LASSO RPART RF.ref F.Test Kendall mRMR
KAPPA 0.39 0.383 0.387 0.378 0.375 0.369 0.4
BMAE 2.24 2.197 2.194 2.215 2.272 2.279 2.215
Kendall 0.327 0.322 0.328 0.307 0.312 0.303 0.335
ACC 0.343 0.358 0.358 0.339 0.358 0.343 0.358
SEN 0.345 0.359 0.362 0.339 0.36 0.347 0.36
AUC 0.571 0.58 0.577 0.566 0.578 0.569 0.581

1.3.2 Radar Plots

op <- par(no.readonly = TRUE)

par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","CPU")

classRanks$BMAE <- -classRanks$BMAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0), rgb(1.0,1.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05), rgb(1.0,1.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F.Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","Jaccard","SIZE")
filterRanks$BMAE <- -filterRanks$BMAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(bmodel),caption = "Model",round = 3)
    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      vent -0.7376 0.3526 0.4783 0.6487 0.59 0.6003
      gest 0.08306 1.048 1.087 1.127 0.6511 0.6462
      pltct 0.002011 1.001 1.002 1.003 0.648 0.6478
      toc 1.086 1.768 2.963 4.966 0.4351 0.6824
      meth 0.4386 1.176 1.55 2.044 0.59 0.6548
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI
      vent 0.6637 0.6432 0.6334 0.6753 0.05976 0.5553
      gest 0.6813 0.654 0.6568 0.6831 0.05362 0.5125
      pltct 0.6813 0.6563 0.6529 0.6831 0.05087 0.5931
      toc 0.6658 0.6007 0.6647 0.6768 0.04709 0.4102
      meth 0.6647 0.6376 0.6747 0.6748 0.02758 0.54
        z.IDI z.NRI Frequency
      vent 4.743 5.619 0.475
      gest 4.469 5.084 0.5
      pltct 4.436 5.912 0.5
      toc 4.124 5.458 0.5
      meth 3.111 5.425 0.4
    • Accuracy: 0.682
    • tAUC: 0.6824
    • sensitivity: 0.6816
    • specificity: 0.6833
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      cld -1.393 0.1453 0.2482 0.4241 0.6653 0.6527
      meth 0.9687 1.626 2.635 4.269 0.6527 0.6653
      vent -0.01166 0.9823 0.9884 0.9945 0.636 0.5523
      bwt 0.0002839 1 1 1 0.6025 0.6109
      lowph 0.1896 1.046 1.209 1.396 0.6025 0.6025
      pltct 0.0003098 1 1 1.001 0.6151 0.6025
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI
      cld 0.682 0.6644 0.6528 0.6825 0.09896 0.6576
      meth 0.682 0.6528 0.6644 0.6825 0.06108 0.6113
      vent 0.6381 0.6361 0.5512 0.6381 0.05569 0.5361
      bwt 0.638 0.6024 0.6109 0.6378 0.04021 0.3032
      lowph 0.6192 0.6025 0.6024 0.6191 0.02593 0.3091
      pltct 0.6527 0.6151 0.6024 0.6525 0.02376 0.3433
        z.IDI z.NRI Frequency
      cld 5.099 6.008 0.5
      meth 3.934 4.972 0.5
      vent 3.69 4.313 0.05
      bwt 3.162 2.373 0.475
      lowph 2.576 2.423 0.2
      pltct 2.505 2.694 0.25
    • Accuracy: 0.7155
    • tAUC: 0.7156
    • sensitivity: 0.6917
    • specificity: 0.7395
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      vent -0.02679 0.9665 0.9736 0.9807 0.6444 0.2259
      cld -2.858 0.02585 0.05739 0.1274 0.5063 0.431
      gest 0.09611 1.063 1.101 1.14 0.6176 0.6187
      lowph 0.1663 1.089 1.181 1.281 0.6276 0.6165
      pltct 0.001551 1.001 1.002 1.002 0.6169 0.6179
      pda -1.145 0.1656 0.3181 0.611 0.431 0.5063
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI
      vent 0.6444 0.6785 0.5 0.6785 0.1243 0.6991
      cld 0.569 0.6745 0.6193 0.702 0.1202 0.6971
      gest 0.6483 0.6007 0.6248 0.6434 0.07467 0.3702
      lowph 0.6592 0.6385 0.6036 0.656 0.04366 0.3988
      pltct 0.6461 0.622 0.6002 0.6409 0.04058 0.4714
      pda 0.569 0.6193 0.6745 0.702 0.03361 0.4769
        z.IDI z.NRI Frequency
      vent 7.221 7.241 0.05
      cld 7.023 9.475 0.5
      gest 5.404 3.641 0.45
      lowph 4.006 3.959 0.075
      pltct 4.001 4.681 0.375
      pda 3.439 6.705 0.5
    • Accuracy: 0.59
    • tAUC: 0.7024
    • sensitivity: 0.9074
    • specificity: 0.4973
    • bootstrap:


topFeat <- min(ncol(bmodel$bagging$formulaNetwork),30);
shortformulaNetwork <- bmodel$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.6,cexCol = 0.6)



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.2)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.3,cexCol = 0.3,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features
  cohortMean cohortStd kendall.r kendall.p Frequency
pltct 207 81.4 0.2027 0 0.9857
bwt 1142 235.6 0.2069 0 0.9619
vent 226 252 -0.3038 0 0.9048
meth 261 217 0.2445 0 0.9048
lowph 7.222 0.1282 0.1987 0 0.8762
gest 29.32 2.17 0.1974 0 0.8286
cld 355 123 -0.2429 0 0.7952
toc 367 111 0.1569 2e-04 0.7619
pda 388 90 -0.1617 1e-04 0.6905
hospstay 47.65 64.59 -0.0645 0.0602 0.5714