1 FRESA.CAD Regresion Benchmark

1.1 SUPPORT Data Set

library(readr)


Support_red <- read_delim("./SUPPORT/Support_red.txt","\t", escape_double = FALSE, na = "empty",trim_ws = TRUE)

Support_red <- Support_red[complete.cases(Support_red),]
#Support_red_mat <- as.data.frame(model.matrix(sfdm2~.*.,Support_red))
Support_red_mat <- as.data.frame(model.matrix(sfdm2~.,Support_red))
Support_red_mat$`(Intercept)` <- NULL
Support_red_mat$sfdm2 <- Support_red$sfdm2
Support_red_mat$sfdm2[Support_red_mat$sfdm2 == 4] <- 3; # too few samples in 4
fnames <- colnames(Support_red_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(Support_red_mat) <- fnames


bmodel <- BSWiMS.model(formula = sfdm2 ~ 1,data = Support_red_mat,NumberofRepeats = 20)

1.2 Benchmark


cp <- CVOrdBenchmark(theData = Support_red_mat, theOutcome = "sfdm2", reps = 50, fraction = 0.80, topincluded = 20 )


elapcol <- names(cp$times[[1]]) == "elapsed"
cputimes <- list(Fresa = mean(cp$times$fresatime[ elapcol ]),LASSO = mean(cp$times$LASSOtime[ elapcol ]),RF = mean(cp$times$RFtime[ elapcol ]),SVM = mean(cp$times$SVMtime[ elapcol ]))

featsize <- list(Fresa = mean(cp$featSize$FRESASize),LASSO = mean(cp$featSize$LASSOSize),Univ = mean(cp$featSize$UNIVSize))

1.2.1 Results

prlr <- predict(bmodel$oridinalModels$polr,Support_red_mat)
tb <- table(prlr,Support_red_mat$sfdm2)
pander::pander(tb)
  1 2 3 5
1 253 58 38 91
2 0 0 0 0
3 0 0 0 0
5 61 50 28 239
kp <- kappa2(cbind(prlr,Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.1296



pr <- predict(bmodel$oridinalModels,Support_red_mat)
summary(pr)
   V1              V2              V3               V4        

Min. :1.030 Min. :1.000 Min. :0.5205 Min. :0.4741
1st Qu.:1.040 1st Qu.:1.000 1st Qu.:0.5990 1st Qu.:0.6817
Median :2.012 Median :2.000 Median :0.6480 Median :0.7830
Mean :2.646 Mean :2.684 Mean :0.6666 Mean :0.7778
3rd Qu.:4.916 3rd Qu.:5.000 3rd Qu.:0.7105 3rd Qu.:0.8643
Max. :4.989 Max. :5.000 Max. :0.9784 Max. :1.0000
V5 V6
Min. :-0.99930 Min. :-1.054107
1st Qu.:-0.38052 1st Qu.:-0.390783
Median : 0.24188 Median : 0.237119
Mean : 0.01887 Mean : 0.009565
3rd Qu.: 0.38023 3rd Qu.: 0.376274
Max. : 0.52420 Max. : 0.524199

boxplot(pr[,1] ~ Support_red_mat$sfdm2)

boxplot(pr[,2] ~ Support_red_mat$sfdm2)


tb <- table(as.integer(pr[,1]+0.5),as.integer(Support_red_mat$sfdm2))
pander::pander(tb)
  1 2 3 5
1 229 24 16 72
2 16 58 8 23
3 43 21 34 26
5 26 5 8 209

kp <- kappa2(cbind(as.integer(pr[,1]+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.4886


kp <- kappa2(cbind(as.integer(pr[,2]+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.4587


pr <- predict(bmodel$bagging$bagged.model,Support_red_mat)
tb <- table(as.integer(pr+0.5),as.integer(Support_red_mat$sfdm2))
pander::pander(tb)
  1 2 3 5
0 0 1 0 0
1 3 3 1 1
2 185 42 30 59
3 109 44 31 102
4 15 17 3 109
5 2 1 1 50
6 0 0 0 9
boxplot(pr ~ Support_red_mat$sfdm2)

kp <- kappa2(cbind(as.integer(pr+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.0499



#The Times
pander::pander(cputimes)
  • Fresa: 3.426
  • LASSO: 0.0858
  • RF: 0.4046
  • SVM: 0.0668
pander::pander(featsize)
  • Fresa: 6
  • LASSO: 20.02
  • Univ: 10.54


plotMAEEvolution(cp,20,main="Mean Absolute Error (MAE)", location="topright")



bp <- barPlotCiError(as.matrix(cp$CorTable),metricname="Kendall Correlation",thesets=thesets,themethod=theMethod,main="Kendall Correlation",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kendall Correlation",round = 3)
Kendall Correlation
  Regresion Method
B:SWiMS 0.354
B:SWiMS(OLR) 0.245
B:SWiMS(SVM) 0.296
RF 0.291
RF(OLR) 0.206
RF(SVM) 0.264
LASSO 0.345
SVM 0.287
Univ.(SVM) 0.29
LASSO(SVM) 0.26
pander::pander(bp$ciTable,caption = "Kendall Correlation with 95%CI",round = 3)
Kendall Correlation with 95%CI
  Kendall Correlation lower upper
Regresion Method 0.354 0.317 0.391
Regresion Method 0.245 0.239 0.251
Regresion Method 0.296 0.287 0.304
Regresion Method 0.291 0.283 0.299
Regresion Method 0.206 0.201 0.212
Regresion Method 0.264 0.257 0.271
Regresion Method 0.345 0.312 0.378
Regresion Method 0.287 0.28 0.295
Regresion Method 0.29 0.283 0.298
Regresion Method 0.26 0.253 0.266

bp <- barPlotCiError(as.matrix(cp$KappaTable),metricname="Kappa Agreement",thesets=thesets,themethod=theMethod,main="Kappa Agreement",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kappa Agreement",round = 3)
Kappa Agreement
  Regresion Method
B:SWiMS 0.412
B:SWiMS(OLR) 0.331
B:SWiMS(SVM) 0.417
RF 0.431
RF(OLR) 0.284
RF(SVM) 0.385
LASSO 0.051
SVM 0.419
Univ.(SVM) 0.421
LASSO(SVM) 0.34
pander::pander(bp$ciTable,caption = "Kappa Agreement with 95%CI",round = 3)
Kappa Agreement with 95%CI
  Kappa Agreement lower upper
Regresion Method 0.412 0.37 0.454
Regresion Method 0.331 0.28 0.381
Regresion Method 0.417 0.37 0.465
Regresion Method 0.431 0.382 0.479
Regresion Method 0.284 0.233 0.334
Regresion Method 0.385 0.337 0.432
Regresion Method 0.051 0.03 0.072
Regresion Method 0.419 0.37 0.468
Regresion Method 0.421 0.372 0.47
Regresion Method 0.34 0.291 0.39


bp <- barPlotCiError(as.matrix(cp$MAETable),metricname="MAE",thesets=thesets,themethod=theMethod,main="MAE",offsets = c(0.5,5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "MAE",round = 3)
MAE
  Regresion Method
B:SWiMS 0.992
B:SWiMS(OLR) 1.183
B:SWiMS(SVM) 1.031
RF 1.037
RF(OLR) 1.306
RF(SVM) 1.136
LASSO 1.304
SVM 1.053
Univ.(SVM) 1.054
LASSO(SVM) 1.179
pander::pander(bp$ciTable,caption = "MAE with 95%CI",round = 3)
MAE with 95%CI
  MAE lower upper
Regresion Method 0.992 0.946 1.043
Regresion Method 1.183 1.129 1.244
Regresion Method 1.031 0.983 1.083
Regresion Method 1.037 0.989 1.089
Regresion Method 1.306 1.245 1.372
Regresion Method 1.136 1.084 1.194
Regresion Method 1.304 1.244 1.37
Regresion Method 1.053 1.005 1.107
Regresion Method 1.054 1.005 1.107
Regresion Method 1.179 1.125 1.239


bp <- barPlotCiError(as.matrix(cp$BiasTable),metricname="BIAS",thesets=thesets,themethod=theMethod,main="BIAS",offsets = c(0.5,0.5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "BIAS",round = 3)
BIAS
  Regresion Method
B:SWiMS -0.245
B:SWiMS(OLR) -0.125
B:SWiMS(SVM) -0.352
RF -0.147
RF(OLR) -0.12
RF(SVM) -0.421
LASSO -0.049
SVM -0.084
Univ.(SVM) -0.094
LASSO(SVM) -0.2
pander::pander(bp$ciTable,caption = "BIAS with 95%CI",round = 3)
BIAS with 95%CI
  BIAS lower upper
Regresion Method -0.245 -0.358 -0.132
Regresion Method -0.125 -0.262 0.012
Regresion Method -0.352 -0.477 -0.228
Regresion Method -0.147 -0.274 -0.019
Regresion Method -0.12 -0.265 0.026
Regresion Method -0.421 -0.552 -0.29
Regresion Method -0.049 -0.152 0.054
Regresion Method -0.084 -0.213 0.045
Regresion Method -0.094 -0.223 0.034
Regresion Method -0.2 -0.335 -0.065

1.3 Features


pander::pander(summary(bmodel$bagging$bagged.model,caption="fdm2 model",round = 3))
  • coefficients:

    Table continues below
      Estimate lower mean upper u.MSE
    avtisst 0.05813 0.05496 0.05813 0.06129 2.727
    slos -0.01476 -0.01513 -0.01476 -0.0144 3.257
    adlsc 0.1544 0.1452 0.1544 0.1636 3.13
    resp 0.0252 0.02473 0.0252 0.02568 3.233
    dzgroupCirrhosis 0.9275 0.8942 0.9275 0.9609 3.254
    scoma 0.001606 0.0006278 0.001606 0.002585 3.007
    dzgroupComa 0.956 0.7672 0.956 1.145 3.143
    dzgroupMOSF_w_Malig 0.02654 0.003943 0.02654 0.04913 3.217
    Table continues below
      r.MSE model.MSE NeRI F.pvalue
    avtisst 2.899 2.341 0.2781 0
    slos 2.437 2.341 -0.0007335 1.096e-08
    adlsc 2.443 2.341 0.1435 3.821e-09
    resp 2.398 2.341 0.1097 9.043e-06
    dzgroupCirrhosis 2.385 2.341 0.0934 9.35e-05
    scoma 2.374 2.336 0.1369 0.0003067
    dzgroupComa 2.406 2.34 0.1648 1.782e-06
    dzgroupMOSF_w_Malig 2.329 2.31 0.1785 0.01067
      t.pvalue Sign.pvalue Wilcox.pvalue
    avtisst 2.339e-24 5.906e-16 9.333e-22
    slos 2.549e-06 0.4437 0.006662
    adlsc 0.001134 2.248e-05 0.0002395
    resp 0.004032 0.0009353 0.00155
    dzgroupCirrhosis 0.007812 0.004159 1.582e-10
    scoma 0.04293 4.969e-05 0.02761
    dzgroupComa 0.00127 1.235e-06 0.002763
    dzgroupMOSF_w_Malig 0.03227 1.86e-07 0.004132
  • MSE: 2.334
  • R2: 0.2876
  • bootstrap:

pander::pander(summary(bmodel$oridinalModels,caption="Ordinal model",round = 3))
    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      slos 0.06456 1.062 1.067 1.071 0.6959
      adlsc 0.415 1.499 1.514 1.53 0.6178
      dzgroupComa 0.6407 1.562 1.898 2.306 0.5239
      dzgroupMOSF_w_Malig 0.264 0.6436 1.302 2.634 0.5143
      scoma 0.004125 1 1.004 1.008 0.6083
      dzgroupCirrhosis -0.2196 0.6235 0.8029 1.034 0.5159
      meanbp 0.001196 1 1.001 1.002 0.5541
      dzgroupCHF -0.002638 0.9709 0.9974 1.025 0.5478
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC
      slos 0.6424 0.7723 0.6959 0.6424
      adlsc 0.7043 0.7723 0.6178 0.7043
      dzgroupComa 0.7795 0.7732 0.5239 0.7795
      dzgroupMOSF_w_Malig 0.7739 0.7661 0.5143 0.7739
      scoma 0.7755 0.7662 0.6083 0.7755
      dzgroupCirrhosis 0.768 0.7755 0.5159 0.768
      meanbp 0.7675 0.7635 0.5541 0.7675
      dzgroupCHF 0.7739 0.7739 0.5478 0.7739
        full.AUC IDI NRI z.IDI z.NRI
      slos 0.7723 0.2325 0.9051 14.28 13.26
      adlsc 0.7723 0.1409 0.7573 10.39 10.53
      dzgroupComa 0.7732 0.002265 0.03243 0.9471 1.537
      dzgroupMOSF_w_Malig 0.7661 0.001615 0.07882 0.7343 1.82
      scoma 0.7662 0.006816 0.2752 2.075 4.22
      dzgroupCirrhosis 0.7755 0.004483 -0.1614 1.702 -3.357
      meanbp 0.7635 0.006993 0.2197 2.188 2.772
      dzgroupCHF 0.7739 5.294e-05 -0.3822 0.1922 -5.209
    • Accuracy: 0.7691
    • tAUC: 0.7691
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      dzgroupCirrhosis 2.389 9.699 10.9 12.25 0.5417
      adlsc -0.07368 0.8478 0.929 1.018 0.4861
      meanbp -0.009351 0.9845 0.9907 0.997 0.5648
      wblc 0.004931 0.997 1.005 1.013 0.4954
      avtisst 9.703e-05 0.9998 1 1 0.5139
      scoma -0.001369 0.9975 0.9986 0.9998 0.5324
      hrt 0.0008421 0.9997 1.001 1.002 0.4861
      dzgroupCHF 0.0171 0.8053 1.017 1.285 0.5046
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
      dzgroupCirrhosis 0.5331 0.5648 0.5417 0.5331 0.5648
      adlsc 0.573 0.5874 0.4861 0.573 0.5874
      meanbp 0.4838 0.6427 0.5648 0.4838 0.6427
      wblc 0.5833 0.5849 0.4954 0.5833 0.5849
      avtisst 0.5417 0.5694 0.5139 0.5417 0.5694
      scoma 0.5694 0.537 0.5324 0.5694 0.537
      hrt 0.6435 0.6157 0.4861 0.6435 0.6157
      dzgroupCHF 0.6157 0.6157 0.5046 0.6157 0.6157
        IDI NRI z.IDI z.NRI
      dzgroupCirrhosis 0.03546 0.1694 2.818 2.629
      adlsc 0.01105 -0.1152 1.579 -0.8492
      meanbp 0.03591 0.2562 2.905 1.907
      wblc 0.006123 0.1296 1.222 0.9725
      avtisst 0.0009975 0.07407 0.5619 0.5481
      scoma 0.02509 0.1852 2.296 1.601
      hrt 0.008319 -0.03704 1.427 -0.2723
      dzgroupCHF 0.0001534 0.05556 0.1434 0.6178
    • Accuracy: 0.6667
    • tAUC: 0.6667
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      slos -0.07551 0.9254 0.9273 0.9291 0.6333
      avtisst 0.08315 1.082 1.087 1.092 0.6197
      resp 0.0414 1.039 1.042 1.046 0.5652
      scoma 0.01663 1.016 1.017 1.017 0.5909
      dzgroupComa 0.2012 1.051 1.223 1.422 0.5455
      dzgroupMOSF_w_Malig 0.2717 1.084 1.312 1.588 0.5439
      dzgroupCOPD -0.02938 0.9266 0.971 1.018 0.5379
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC
      slos 0.6771 0.778 0.6333 0.6771
      avtisst 0.6884 0.778 0.6197 0.6884
      resp 0.7452 0.778 0.5652 0.7452
      scoma 0.7401 0.781 0.5909 0.7401
      dzgroupComa 0.7511 0.7663 0.5455 0.7511
      dzgroupMOSF_w_Malig 0.7682 0.7735 0.5439 0.7682
      dzgroupCOPD 0.7818 0.7636 0.5379 0.7818
        full.AUC IDI NRI z.IDI z.NRI
      slos 0.778 0.2281 0.8218 13.62 12.03
      avtisst 0.778 0.1325 0.7039 9.99 9.729
      resp 0.778 0.02022 0.2085 4.202 2.693
      scoma 0.781 0.02169 0.3957 4.017 5.776
      dzgroupComa 0.7663 0.009892 0.1303 2.608 2.698
      dzgroupMOSF_w_Malig 0.7735 0.01107 0.1667 2.79 3.048
      dzgroupCOPD 0.7636 0.002876 0.1515 1.23 3.205
    • Accuracy: 0.7848
    • tAUC: 0.7848
    • bootstrap:

gain <- length(bmodel$oridinalModels$formulas)/(20*3)
bgm <- baggedModel(bmodel$oridinalModels$formulas,Support_red_mat,type="LM")

……

gplots::heatmap.2(gain*bgm$formulaNetwork,trace="none",mar=c(10,10),main="B:SWiMS Formula Network")