1 FRESA.CAD Regresion Benchmark

1.1 VLBW Data Set

library(readr)


VLBW <- read_delim("./VLBW/vlbw.txt","\t", escape_double = FALSE, trim_ws = TRUE)

VLBW$race <- NULL

VLBW <- VLBW[complete.cases(VLBW),-1]
#VLBW_mat <- as.data.frame(model.matrix(apg1~.*.,VLBW))
VLBW_mat <- as.data.frame(model.matrix(apg1~.,VLBW))
VLBW_mat$`(Intercept)` <- NULL
VLBW_mat$apg1 <- as.integer(VLBW$apg1)
VLBW_mat$apg1[VLBW_mat$apg1 <= 1] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 2] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 4] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 5] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 7] <- 6
VLBW_mat$apg1[VLBW_mat$apg1 > 8] <- 8

fnames <- colnames(VLBW_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(VLBW_mat) <- fnames

table (VLBW_mat$apg1)

bmodel <- BSWiMS.model(formula = apg1 ~ 1,data = VLBW_mat,NumberofRepeats = 20)

1.2 Benchmark


cp <- CVOrdBenchmark(theData = VLBW_mat, theOutcome = "apg1", reps = 50, fraction = 0.80, topincluded = 16 )


elapcol <- names(cp$times[[1]]) == "elapsed"
cputimes <- list(Fresa = mean(cp$times$fresatime[ elapcol ]),LASSO = mean(cp$times$LASSOtime[ elapcol ]),RF = mean(cp$times$RFtime[ elapcol ]),SVM = mean(cp$times$SVMtime[ elapcol ]))

featsize <- list(Fresa = mean(cp$featSize$FRESASize),LASSO = mean(cp$featSize$LASSOSize),Univ = mean(cp$featSize$UNIVSize))

1.2.1 Results

prolr <- predict(bmodel$oridinalModels$polr,VLBW_mat)
boxplot(as.numeric(as.character(prolr)) ~ VLBW_mat$apg1)

table(as.numeric(as.character(prolr)),VLBW_mat$apg1)
 1  3  6  8

1 74 50 29 14 3 6 10 10 2 6 27 33 54 41 8 11 20 36 61

kp <- kappa2(cbind(as.numeric(as.character(prolr)),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2179


pr <- predict(bmodel$oridinalModels,VLBW_mat)
boxplot(pr[,1] ~ VLBW_mat$apg1)


tb <- table(as.integer(pr[,1]+0.5),VLBW_mat$apg1)
pander::pander(tb)
  1 3 6 8
1 67 43 23 13
3 9 19 14 9
4 1 0 0 0
5 0 0 0 1
6 22 24 54 39
8 19 27 38 56
tb <- table(as.integer(pr[,1]+0.5),as.integer(pr[,2]+0.5))
pander::pander(tb)
  1 3 5 6 7 8
1 129 15 0 0 0 2
3 4 44 0 2 0 1
4 1 0 0 0 0 0
5 0 1 0 0 0 0
6 10 5 1 83 1 39
8 2 3 1 0 0 134

kp <- kappa2(cbind(as.integer(pr[,1]+0.5),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2119


kp <- kappa2(cbind(as.integer(pr[,2]+0.5),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2128




#The Times
pander::pander(cputimes)
  • Fresa: 1.725
  • LASSO: 0.0692
  • RF: 0.2168
  • SVM: 0.0398
pander::pander(featsize)
  • Fresa: 7.82
  • LASSO: 10.16
  • Univ: 10.08


plotMAEEvolution(cp,16,main="Mean Absolute Error (MAE)", location="topright")



bp <- barPlotCiError(as.matrix(cp$CorTable),metricname="Kendall Correlation",thesets=thesets,themethod=theMethod,main="Kendall Correlation",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kendall Correlation",round = 3)
Kendall Correlation
  Regresion Method
B:SWiMS 0.229
B:SWiMS(OLR) 0.264
B:SWiMS(SVM) 0.238
RF 0.243
RF(OLR) 0.246
RF(SVM) 0.22
LASSO 0.28
SVM 0.257
Univ.(SVM) 0.232
LASSO(SVM) 0.221
pander::pander(bp$ciTable,caption = "Kendall Correlation with 95%CI",round = 3)
Kendall Correlation with 95%CI
  Kendall Correlation lower upper
Regresion Method 0.229 0.178 0.281
Regresion Method 0.264 0.25 0.278
Regresion Method 0.238 0.225 0.251
Regresion Method 0.243 0.229 0.256
Regresion Method 0.246 0.231 0.26
Regresion Method 0.22 0.206 0.233
Regresion Method 0.28 0.23 0.331
Regresion Method 0.257 0.244 0.271
Regresion Method 0.232 0.219 0.246
Regresion Method 0.221 0.208 0.233

bp <- barPlotCiError(as.matrix(cp$KappaTable),metricname="Kappa Agreement",thesets=thesets,themethod=theMethod,main="Kappa Agreement",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kappa Agreement",round = 3)
Kappa Agreement
  Regresion Method
B:SWiMS 0.12
B:SWiMS(OLR) 0.197
B:SWiMS(SVM) 0.126
RF 0.145
RF(OLR) 0.174
RF(SVM) 0.131
LASSO 0.012
SVM 0.184
Univ.(SVM) 0.107
LASSO(SVM) 0.155
pander::pander(bp$ciTable,caption = "Kappa Agreement with 95%CI",round = 3)
Kappa Agreement with 95%CI
  Kappa Agreement lower upper
Regresion Method 0.12 0.072 0.169
Regresion Method 0.197 0.147 0.248
Regresion Method 0.126 0.077 0.175
Regresion Method 0.145 0.094 0.195
Regresion Method 0.174 0.124 0.224
Regresion Method 0.131 0.082 0.181
Regresion Method 0.012 -0.014 0.038
Regresion Method 0.184 0.133 0.235
Regresion Method 0.107 0.059 0.155
Regresion Method 0.155 0.107 0.204


bp <- barPlotCiError(as.matrix(cp$MAETable),metricname="MAE",thesets=thesets,themethod=theMethod,main="MAE",offsets = c(0.5,5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "MAE",round = 3)
MAE
  Regresion Method
B:SWiMS 2.296
B:SWiMS(OLR) 2.109
B:SWiMS(SVM) 2.308
RF 2.206
RF(OLR) 2.179
RF(SVM) 2.26
LASSO 2.103
SVM 2.205
Univ.(SVM) 2.345
LASSO(SVM) 2.339
pander::pander(bp$ciTable,caption = "MAE with 95%CI",round = 3)
MAE with 95%CI
  MAE lower upper
Regresion Method 2.296 2.16 2.452
Regresion Method 2.109 1.983 2.252
Regresion Method 2.308 2.17 2.464
Regresion Method 2.206 2.075 2.355
Regresion Method 2.179 2.049 2.326
Regresion Method 2.26 2.126 2.413
Regresion Method 2.103 1.977 2.245
Regresion Method 2.205 2.074 2.354
Regresion Method 2.345 2.205 2.504
Regresion Method 2.339 2.2 2.497


bp <- barPlotCiError(as.matrix(cp$BiasTable),metricname="BIAS",thesets=thesets,themethod=theMethod,main="BIAS",offsets = c(0.5,0.5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "BIAS",round = 3)
BIAS
  Regresion Method
B:SWiMS 0.357
B:SWiMS(OLR) 0.079
B:SWiMS(SVM) 0.276
RF 0.053
RF(OLR) 0.122
RF(SVM) 0.415
LASSO -0.014
SVM 0.32
Univ.(SVM) 0.209
LASSO(SVM) 0.377
pander::pander(bp$ciTable,caption = "BIAS with 95%CI",round = 3)
BIAS with 95%CI
  BIAS lower upper
Regresion Method 0.357 0.077 0.637
Regresion Method 0.079 -0.194 0.353
Regresion Method 0.276 -0.01 0.562
Regresion Method 0.053 -0.224 0.331
Regresion Method 0.122 -0.155 0.4
Regresion Method 0.415 0.136 0.695
Regresion Method -0.014 -0.233 0.204
Regresion Method 0.32 0.04 0.6
Regresion Method 0.209 -0.079 0.498
Regresion Method 0.377 0.083 0.67

1.3 Features


pander::pander(summary(bmodel$bagging$bagged.model,caption="fdm2 model",round = 3))
  • coefficients:

    Table continues below
      Estimate lower mean upper u.MSE r.MSE
    vent -0.6263 -0.6263 -0.6263 -0.6263 6.413 6.04
    meth 0.5662 0.5662 0.5662 0.5662 6.665 6.021
    bwt 0.00103 0.00103 0.00103 0.00103 6.567 5.941
    pltct 0.002206 0.002206 0.002206 0.002206 6.731 5.876
    cld -0.4231 -0.4231 -0.4231 -0.4231 6.665 6.292
    toc 0.4668 0.4668 0.4668 0.4668 6.968 6.353
    lowph 1.443 1.443 1.443 1.443 6.715 6.304
    gest 0.07034 0.07034 0.07034 0.07034 6.767 6.245
    Table continues below
      model.MSE NeRI F.pvalue t.pvalue Sign.pvalue
    vent 5.788 0.1255 6.477e-06 0.01699 0.003451
    meth 5.788 0.1297 1.415e-05 0.01347 0.002608
    bwt 5.788 0.02092 0.000409 0.06787 0.3403
    pltct 5.788 0.05439 0.007001 0.05308 0.1264
    cld 6.15 0.1548 0.0009386 0.002551 0.0004116
    toc 6.15 0.06695 8.045e-05 0.002903 0.07807
    lowph 6.15 0.08368 0.0005859 0.01332 0.03717
    gest 6.15 0.06695 0.006853 0.04288 0.07807
      Wilcox.pvalue
    vent 0.02768
    meth 0.06959
    bwt 0.06766
    pltct 0.03294
    cld 0.001628
    toc 0.01248
    lowph 0.02019
    gest 0.0223
  • MSE: 5.746
  • R2: 0.2019
  • bootstrap:


pander::pander(summary(bmodel$oridinalModels,caption="Ordinal model",round = 3))
    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      vent -0.7001 0.4965 0.4965 0.4965 0.5801 0.5108
      Table continues below
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI
      vent 0.5801 0.5764 0.5 0.5764 0.02644 0.3057
        z.IDI z.NRI
      vent 2.487 2.498
    • Accuracy: 0.5801
    • tAUC: 0.5764
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy r.Accuracy
      cld -0.8893 0.4109 0.4109 0.4109 0.6033 0.5331
        full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI
      cld 0.6033 0.5895 0.5 0.5895 0.03912 0.358 3.087 3.1
    • Accuracy: 0.6033
    • tAUC: 0.5895
    • bootstrap:

    • coefficients:

      Table continues below
        Estimate lower OR upper u.Accuracy
      vent -0.9193 0.3793 0.3988 0.4193 0.6032
      deliveryvaginal -0.5976 0.5501 0.5501 0.5501 0.5628
      cld -0.1315 0.8768 0.8768 0.8768 0.5425
      pda -0.02171 0.9622 0.9785 0.9951 0.5344
      Table continues below
        r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
      vent 0.5587 0.585 0.6082 0.5567 0.5762
      deliveryvaginal 0.6032 0.583 0.563 0.6082 0.5726
      cld 0.5223 0.5425 0.5584 0.5 0.5584
      pda 0.5223 0.5344 0.5514 0.5 0.5514
        IDI NRI z.IDI z.NRI
      vent 0.05877 0.4329 3.9 3.585
      deliveryvaginal 0.02667 0.2519 2.543 1.993
      cld 0.02734 0.2336 2.666 2.676
      pda 0.02468 0.2055 2.533 2.543
    • Accuracy: 0.583
    • tAUC: 0.5726
    • bootstrap:

gain <- length(bmodel$oridinalModels$formulas)/(20*3)
bgm <- baggedModel(bmodel$oridinalModels$formulas,VLBW_mat,type="LM")

…………

gplots::heatmap.2(gain*bgm$formulaNetwork,trace="none",mar=c(10,10),main="B:SWiMS Formula Network")