1 FRESA.CAD Regresion Benchmark

1.1 SUPPORT Data Set

library(readr)


Support_red <- read_delim("./SUPPORT/Support_red.txt","\t", escape_double = FALSE, na = "empty",trim_ws = TRUE)

Support_red <- Support_red[complete.cases(Support_red),]
#Support_red_mat <- as.data.frame(model.matrix(sfdm2~.*.,Support_red))
Support_red_mat <- as.data.frame(model.matrix(sfdm2~.,Support_red))
Support_red_mat$`(Intercept)` <- NULL
Support_red_mat$sfdm2 <- Support_red$sfdm2
Support_red_mat$sfdm2[Support_red_mat$sfdm2 == 4] <- 3; # too few samples in 4
fnames <- colnames(Support_red_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(Support_red_mat) <- fnames

table(Support_red_mat$sfdm2)

ExperimentName <- "Support"
bswimsReps <- 20;
theData <- Support_red_mat;
theOutcome <- "sfdm2";
reps <- 75;
fraction <- 0.8;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")

sampleTrain <- sample(nrow(theData),nrow(theData)*0.5)
sampleData <- theData[sampleTrain,]
testdata <- theData[-sampleTrain,]

table(sampleData$sfdm2)

#cf <- tools::compactPDF("C:/Users/Jose Tamez/Dropbox/Development/R/FRESA.CADcpp/FRESA.CAD/inst/doc/",Sys.which(Sys.getenv("R_QPDF", "qpdf")),gs_cmd ="C:/Program Files/gs/gs9.23/bin/gswin64.exe",gs_quality = "ebook")

1.2 Benchmark


bmodel <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = sampleData,NumberofRepeats = bswimsReps )
save(bmodel,file = FRESAFileName)

par(mfrow = c(2,2));

cp <- OrdinalBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1));

save(cp,file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
6.519 0.5783 0.03587 2.223 0.08973 0.0124 9.458

par(mfrow = c(2,1));
pr <- plot(cp,prefix = paste(ExperimentName,":"));

par(mfrow = c(1,1));

pander::pander(pr$metrics,caption = ExperimentName,round = 3)
Support
  RF LASSO ENS BSWiMS KNN RPART SVM
KAPPA 0.519 0.553 0.544 0.502 0.487 0.449 0.442
BMAE 1.021 0.966 0.915 0.995 1.013 1.049 1.137
Kendall 0.472 0.489 0.485 0.433 0.441 0.401 0.403
Bias -0.321 -0.108 -0.295 -0.116 -0.419 -0.509 -0.343
ACC 0.62 0.59 0.578 0.539 0.498 0.516 0.506
SEN 0.516 0.495 0.51 0.464 0.439 0.451 0.449
AUC 0.691 0.679 0.688 0.659 0.644 0.65 0.646
pander::pander(pr$metrics_filter,caption = ExperimentName,round = 3)
Support
  BSWiMS RPART F.Test LASSO RF.ref mRMR Kendall
KAPPA 0.497 0.492 0.492 0.502 0.443 0.442 0.444
BMAE 1.009 1.067 1.068 1.152 1.068 1.159 1.206
Kendall 0.438 0.416 0.416 0.423 0.409 0.406 0.406
ACC 0.501 0.5 0.522 0.564 0.551 0.495 0.498
SEN 0.444 0.462 0.457 0.458 0.494 0.401 0.412
AUC 0.648 0.651 0.652 0.653 0.675 0.62 0.622

1.3.2 Radar Plots

op <- par(no.readonly = TRUE)

par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","CPU")

classRanks$BMAE <- -classRanks$BMAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0), rgb(1.0,1.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05), rgb(1.0,1.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F.Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","Jaccard","SIZE")
filterRanks$BMAE <- -filterRanks$BMAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(bmodel),caption = "Model",round = 3)
    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      avtisst 0.0639 1.055 1.066 1.083 0.6281 0.6414
      adlsc 0.3012 1.274 1.351 1.496 0.5915 0.6421
      dzgroupComa 2.7 4.019 14.88 3662 0.4205 0.6849
      resp 0.04318 1.03 1.044 1.071 0.5988 0.6968
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI
      avtisst 0.6986 0.6503 0.663 0.7149 0.1062
      adlsc 0.6986 0.605 0.6641 0.7149 0.05949
      dzgroupComa 0.6986 0.5497 0.7014 0.7149 0.02973
      resp 0.6986 0.5832 0.7173 0.7149 0.02152
        NRI z.IDI z.NRI Frequency
      avtisst 0.5487 7.745 6.635 1
      adlsc 0.4571 5.806 5.442 1
      dzgroupComa 0.2195 4.042 4.777 1
      resp 0.4167 3.307 4.922 1
    • Accuracy: 0.6993
    • tAUC: 0.7156
    • sensitivity: 0.6604
    • specificity: 0.7708
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      avtisst 0.0553 1.054 1.057 1.079 0.6235
      resp 0.05064 1.05 1.052 1.078 0.5966
      slos -0.02356 0.9738 0.9767 0.9902 0.5208
      scoma 0.01572 1.015 1.016 1.026 0.5892
      crea 0.1755 1.173 1.192 1.368 0.6186
      dzgroupCirrhosis 0.5776 1.12 1.782 2.836 0.5183
      meanbp -0.004641 0.9916 0.9954 0.9992 0.5672
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
      avtisst 0.6721 0.7081 0.6233 0.6719 0.7079
      resp 0.6792 0.7081 0.5964 0.679 0.7079
      slos 0.6932 0.7081 0.5212 0.693 0.7079
      scoma 0.6985 0.7081 0.5886 0.6983 0.7079
      crea 0.6985 0.7099 0.6181 0.6983 0.7096
      dzgroupCirrhosis 0.6968 0.7115 0.5172 0.6966 0.7113
      meanbp 0.7115 0.7115 0.5674 0.7113 0.7113
        IDI NRI z.IDI z.NRI Frequency
      avtisst 0.06726 0.5107 5.332 5.413 1
      resp 0.03607 0.4301 4.038 4.458 1
      slos 0.02876 0.1924 3.355 2.091 1
      scoma 0.02192 0.2619 3.098 2.976 1
      crea 0.01403 0.4369 2.494 4.792 0.9
      dzgroupCirrhosis 0.01314 0.4419 2.436 5.952 0.3
      meanbp 0.01376 0.2598 2.367 2.671 0.3
    • Accuracy: 0.7139
    • tAUC: 0.7137
    • sensitivity: 0.6225
    • specificity: 0.8049
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      avtisst 0.08103 1.063 1.084 1.106 0.6727 0.7198
      slos -0.05863 0.9269 0.9431 0.9662 0.5209 0.7312
      resp 0.06377 1.05 1.066 1.093 0.6134 0.7604
      scoma 0.01865 1.012 1.019 1.029 0.6528 0.753
      crea 0.1757 1.111 1.192 1.362 0.6615 0.7647
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI
      avtisst 0.7714 0.6634 0.7092 0.7596 0.1194 0.7163
      slos 0.7714 0.5506 0.7242 0.7596 0.08623 0.4901
      resp 0.7714 0.6086 0.7489 0.7596 0.04512 0.5455
      scoma 0.7714 0.6138 0.7424 0.7596 0.02748 0.3608
      crea 0.7728 0.6284 0.7529 0.761 0.01351 0.3523
        z.IDI z.NRI Frequency
      avtisst 8.041 8.478 1
      slos 6.652 5.902 1
      resp 4.889 6.218 1
      scoma 3.856 4.395 1
      crea 2.577 4.115 0.85
    • Accuracy: 0.7702
    • tAUC: 0.7575
    • sensitivity: 0.6824
    • specificity: 0.8326
    • bootstrap:


topFeat <- min(ncol(bmodel$bagging$formulaNetwork),30);
shortformulaNetwork <- bmodel$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.6,cexCol = 0.6)



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.2)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.3,cexCol = 0.3,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features
  cohortMean cohortStd kendall.r kendall.p Frequency
avtisst 23.7 13.66 0.2985 0 1
adlsc 1.957 2.074 0.1905 0 1
meanbp 84.57 27.95 -0.1246 0 0.96
crea 1.8 1.728 0.1347 0 0.861
resp 23.63 9.506 0.0878 0.0014 0.7962
hrt 98.83 33.02 0.1452 0 0.7886
scoma 12.65 25.47 0.241 0 0.7562
dzgroupComa 764 54 0.188 0 0.7333
dzgroupCirrhosis 773 45 0.065 0.0471 0.7181