1 FRESA.CAD Regresion Benchmark

1.1 VLBW Data Set

library(readr)


VLBW <- read_delim("./VLBW/vlbw.txt","\t", escape_double = FALSE, trim_ws = TRUE)

VLBW$race <- NULL

VLBW <- VLBW[complete.cases(VLBW),-1]
#VLBW_mat <- as.data.frame(model.matrix(apg1~.*.,VLBW))
VLBW_mat <- as.data.frame(model.matrix(apg1~.,VLBW))
VLBW_mat$`(Intercept)` <- NULL
VLBW_mat$apg1 <- as.integer(VLBW$apg1)
VLBW_mat$apg1[VLBW_mat$apg1 <= 1] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 2] <- 1
VLBW_mat$apg1[VLBW_mat$apg1 == 4] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 5] <- 3
VLBW_mat$apg1[VLBW_mat$apg1 == 7] <- 6
VLBW_mat$apg1[VLBW_mat$apg1 > 8] <- 8

fnames <- colnames(VLBW_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(VLBW_mat) <- fnames

table (VLBW_mat$apg1)

bmodel <- BSWiMS.model(formula = apg1 ~ 1,data = VLBW_mat,NumberofRepeats = 20)

1.2 Benchmark


cp <- CVOrdBenchmark(theData = VLBW_mat, theOutcome = "apg1", reps = 50, fraction = 0.80, topincluded = 16 )


elapcol <- names(cp$times[[1]]) == "elapsed"
cputimes <- list(Fresa = mean(cp$times$fresatime[ elapcol ]),LASSO = mean(cp$times$LASSOtime[ elapcol ]),RF = mean(cp$times$RFtime[ elapcol ]),SVM = mean(cp$times$SVMtime[ elapcol ]))

featsize <- list(Fresa = mean(cp$featSize$FRESASize),LASSO = mean(cp$featSize$LASSOSize),Univ = mean(cp$featSize$UNIVSize))

1.2.1 Results

prolr <- predict(bmodel$oridinalModels$polr,VLBW_mat)
boxplot(as.numeric(as.character(prolr)) ~ VLBW_mat$apg1)

table(as.numeric(as.character(prolr)),VLBW_mat$apg1)

 1  3  6  8

1 74 50 29 14 3 6 10 10 2 6 27 33 54 41 8 11 20 36 61

kp <- kappa2(cbind(as.numeric(as.character(prolr)),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2179


pr <- predict(bmodel$oridinalModels,VLBW_mat)
boxplot(pr[,1] ~ VLBW_mat$apg1)


tb <- table(as.integer(pr[,1]+0.5),VLBW_mat$apg1)
pander::pander(tb)

	1	3	6	8
1	67	43	23	13
3	9	19	14	9
4	1	0	0	0
5	0	0	0	1
6	22	24	54	39
8	19	27	38	56

tb <- table(as.integer(pr[,1]+0.5),as.integer(pr[,2]+0.5))
pander::pander(tb)

	1	3	5	6	7	8
1	129	15	0	0	0	2
3	4	44	0	2	0	1
4	1	0	0	0	0	0
5	0	1	0	0	0	0
6	10	5	1	83	1	39
8	2	3	1	0	0	134


kp <- kappa2(cbind(as.integer(pr[,1]+0.5),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2119


kp <- kappa2(cbind(as.integer(pr[,2]+0.5),VLBW_mat$apg1),"unweighted")
pander::pander(kp$value)

0.2128




#The Times
pander::pander(cputimes)

Fresa: 1.725
LASSO: 0.0692
RF: 0.2168
SVM: 0.0398

pander::pander(featsize)

Fresa: 7.82
LASSO: 10.16
Univ: 10.08



plotMAEEvolution(cp,16,main="Mean Absolute Error (MAE)", location="topright")



bp <- barPlotCiError(as.matrix(cp$CorTable),metricname="Kendall Correlation",thesets=thesets,themethod=theMethod,main="Kendall Correlation",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kendall Correlation",round = 3)

Kendall Correlation
	Regresion Method
B:SWiMS	0.229
B:SWiMS(OLR)	0.264
B:SWiMS(SVM)	0.238
RF	0.243
RF(OLR)	0.246
RF(SVM)	0.22
LASSO	0.28
SVM	0.257
Univ.(SVM)	0.232
LASSO(SVM)	0.221

pander::pander(bp$ciTable,caption = "Kendall Correlation with 95%CI",round = 3)

Kendall Correlation with 95%CI
	Kendall Correlation	lower	upper
Regresion Method	0.229	0.178	0.281
Regresion Method	0.264	0.25	0.278
Regresion Method	0.238	0.225	0.251
Regresion Method	0.243	0.229	0.256
Regresion Method	0.246	0.231	0.26
Regresion Method	0.22	0.206	0.233
Regresion Method	0.28	0.23	0.331
Regresion Method	0.257	0.244	0.271
Regresion Method	0.232	0.219	0.246
Regresion Method	0.221	0.208	0.233


bp <- barPlotCiError(as.matrix(cp$KappaTable),metricname="Kappa Agreement",thesets=thesets,themethod=theMethod,main="Kappa Agreement",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kappa Agreement",round = 3)

Kappa Agreement
	Regresion Method
B:SWiMS	0.12
B:SWiMS(OLR)	0.197
B:SWiMS(SVM)	0.126
RF	0.145
RF(OLR)	0.174
RF(SVM)	0.131
LASSO	0.012
SVM	0.184
Univ.(SVM)	0.107
LASSO(SVM)	0.155

pander::pander(bp$ciTable,caption = "Kappa Agreement with 95%CI",round = 3)

Kappa Agreement with 95%CI
	Kappa Agreement	lower	upper
Regresion Method	0.12	0.072	0.169
Regresion Method	0.197	0.147	0.248
Regresion Method	0.126	0.077	0.175
Regresion Method	0.145	0.094	0.195
Regresion Method	0.174	0.124	0.224
Regresion Method	0.131	0.082	0.181
Regresion Method	0.012	-0.014	0.038
Regresion Method	0.184	0.133	0.235
Regresion Method	0.107	0.059	0.155
Regresion Method	0.155	0.107	0.204



bp <- barPlotCiError(as.matrix(cp$MAETable),metricname="MAE",thesets=thesets,themethod=theMethod,main="MAE",offsets = c(0.5,5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "MAE",round = 3)

MAE
	Regresion Method
B:SWiMS	2.296
B:SWiMS(OLR)	2.109
B:SWiMS(SVM)	2.308
RF	2.206
RF(OLR)	2.179
RF(SVM)	2.26
LASSO	2.103
SVM	2.205
Univ.(SVM)	2.345
LASSO(SVM)	2.339

pander::pander(bp$ciTable,caption = "MAE with 95%CI",round = 3)

MAE with 95%CI
	MAE	lower	upper
Regresion Method	2.296	2.16	2.452
Regresion Method	2.109	1.983	2.252
Regresion Method	2.308	2.17	2.464
Regresion Method	2.206	2.075	2.355
Regresion Method	2.179	2.049	2.326
Regresion Method	2.26	2.126	2.413
Regresion Method	2.103	1.977	2.245
Regresion Method	2.205	2.074	2.354
Regresion Method	2.345	2.205	2.504
Regresion Method	2.339	2.2	2.497



bp <- barPlotCiError(as.matrix(cp$BiasTable),metricname="BIAS",thesets=thesets,themethod=theMethod,main="BIAS",offsets = c(0.5,0.5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "BIAS",round = 3)

BIAS
	Regresion Method
B:SWiMS	0.357
B:SWiMS(OLR)	0.079
B:SWiMS(SVM)	0.276
RF	0.053
RF(OLR)	0.122
RF(SVM)	0.415
LASSO	-0.014
SVM	0.32
Univ.(SVM)	0.209
LASSO(SVM)	0.377

pander::pander(bp$ciTable,caption = "BIAS with 95%CI",round = 3)

BIAS with 95%CI
	BIAS	lower	upper
Regresion Method	0.357	0.077	0.637
Regresion Method	0.079	-0.194	0.353
Regresion Method	0.276	-0.01	0.562
Regresion Method	0.053	-0.224	0.331
Regresion Method	0.122	-0.155	0.4
Regresion Method	0.415	0.136	0.695
Regresion Method	-0.014	-0.233	0.204
Regresion Method	0.32	0.04	0.6
Regresion Method	0.209	-0.079	0.498
Regresion Method	0.377	0.083	0.67

1.3 Features


pander::pander(summary(bmodel$bagging$bagged.model,caption="fdm2 model",round = 3))

coefficients:

Table continues below
	Estimate	lower	mean	upper	u.MSE	r.MSE
vent	-0.6263	-0.6263	-0.6263	-0.6263	6.413	6.04
meth	0.5662	0.5662	0.5662	0.5662	6.665	6.021
bwt	0.00103	0.00103	0.00103	0.00103	6.567	5.941
pltct	0.002206	0.002206	0.002206	0.002206	6.731	5.876
cld	-0.4231	-0.4231	-0.4231	-0.4231	6.665	6.292
toc	0.4668	0.4668	0.4668	0.4668	6.968	6.353
lowph	1.443	1.443	1.443	1.443	6.715	6.304
gest	0.07034	0.07034	0.07034	0.07034	6.767	6.245

Table continues below
	model.MSE	NeRI	F.pvalue	t.pvalue	Sign.pvalue
vent	5.788	0.1255	6.477e-06	0.01699	0.003451
meth	5.788	0.1297	1.415e-05	0.01347	0.002608
bwt	5.788	0.02092	0.000409	0.06787	0.3403
pltct	5.788	0.05439	0.007001	0.05308	0.1264
cld	6.15	0.1548	0.0009386	0.002551	0.0004116
toc	6.15	0.06695	8.045e-05	0.002903	0.07807
lowph	6.15	0.08368	0.0005859	0.01332	0.03717
gest	6.15	0.06695	0.006853	0.04288	0.07807

	Wilcox.pvalue
vent	0.02768
meth	0.06959
bwt	0.06766
pltct	0.03294
cld	0.001628
toc	0.01248
lowph	0.02019
gest	0.0223

MSE: 5.746
R2: 0.2019
bootstrap:


pander::pander(summary(bmodel$oridinalModels,caption="Ordinal model",round = 3))

- coefficients:
  
  Table continues below
  
  Estimate lower OR upper u.Accuracy r.Accuracy
  
  vent -0.7001 0.4965 0.4965 0.4965 0.5801 0.5108
  
  Table continues below
  
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
  
  vent 0.5801 0.5764 0.5 0.5764 0.02644 0.3057
  
  z.IDI z.NRI
  
  vent 2.487 2.498
- Accuracy: 0.5801
- tAUC: 0.5764
- bootstrap:
- coefficients:
  
  Table continues below
  
  Estimate lower OR upper u.Accuracy r.Accuracy
  
  cld -0.8893 0.4109 0.4109 0.4109 0.6033 0.5331
  
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI
  
  cld 0.6033 0.5895 0.5 0.5895 0.03912 0.358 3.087 3.1
- Accuracy: 0.6033
- tAUC: 0.5895
- bootstrap:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
vent	-0.7001	0.4965	0.4965	0.4965	0.5801	0.5108

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI
vent	0.5801	0.5764	0.5	0.5764	0.02644	0.3057

	z.IDI	z.NRI
vent	2.487	2.498

Table continues below
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
cld	-0.8893	0.4109	0.4109	0.4109	0.6033	0.5331

	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI	z.IDI	z.NRI
cld	0.6033	0.5895	0.5	0.5895	0.03912	0.358	3.087	3.1

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
vent	-0.9193	0.3793	0.3988	0.4193	0.6032
deliveryvaginal	-0.5976	0.5501	0.5501	0.5501	0.5628
cld	-0.1315	0.8768	0.8768	0.8768	0.5425
pda	-0.02171	0.9622	0.9785	0.9951	0.5344

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
vent	0.5587	0.585	0.6082	0.5567	0.5762
deliveryvaginal	0.6032	0.583	0.563	0.6082	0.5726
cld	0.5223	0.5425	0.5584	0.5	0.5584
pda	0.5223	0.5344	0.5514	0.5	0.5514

	IDI	NRI	z.IDI	z.NRI
vent	0.05877	0.4329	3.9	3.585
deliveryvaginal	0.02667	0.2519	2.543	1.993
cld	0.02734	0.2336	2.666	2.676
pda	0.02468	0.2055	2.533	2.543

Accuracy: 0.583
tAUC: 0.5726
bootstrap:

gain <- length(bmodel$oridinalModels$formulas)/(20*3)
bgm <- baggedModel(bmodel$oridinalModels$formulas,VLBW_mat,type="LM")

…………

gplots::heatmap.2(gain*bgm$formulaNetwork,trace="none",mar=c(10,10),main="B:SWiMS Formula Network")

Benchmarks for VLBW Data SET

José Tamez-Peña

April 30, 2018

1 FRESA.CAD Regresion Benchmark

1.1 VLBW Data Set

1.2 Benchmark

1.2.1 Results

1.3 Features