Benchmarks for SUPPORT Data SET

1.1 SUPPORT Data Set

library(readr)


Support_red <- read_delim("./SUPPORT/Support_red.txt","\t", escape_double = FALSE, na = "empty",trim_ws = TRUE)

Support_red <- Support_red[complete.cases(Support_red),]
#Support_red_mat <- as.data.frame(model.matrix(sfdm2~.*.,Support_red))
Support_red_mat <- as.data.frame(model.matrix(sfdm2~.,Support_red))
Support_red_mat$`(Intercept)` <- NULL
Support_red_mat$sfdm2 <- Support_red$sfdm2
Support_red_mat$sfdm2[Support_red_mat$sfdm2 == 4] <- 3; # too few samples in 4
fnames <- colnames(Support_red_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(Support_red_mat) <- fnames


bmodel <- BSWiMS.model(formula = sfdm2 ~ 1,data = Support_red_mat,NumberofRepeats = 20)

1.2 Benchmark


cp <- CVOrdBenchmark(theData = Support_red_mat, theOutcome = "sfdm2", reps = 50, fraction = 0.80, topincluded = 20 )


elapcol <- names(cp$times[[1]]) == "elapsed"
cputimes <- list(Fresa = mean(cp$times$fresatime[ elapcol ]),LASSO = mean(cp$times$LASSOtime[ elapcol ]),RF = mean(cp$times$RFtime[ elapcol ]),SVM = mean(cp$times$SVMtime[ elapcol ]))

featsize <- list(Fresa = mean(cp$featSize$FRESASize),LASSO = mean(cp$featSize$LASSOSize),Univ = mean(cp$featSize$UNIVSize))

1.2.1 Results

prlr <- predict(bmodel$oridinalModels$polr,Support_red_mat)
tb <- table(prlr,Support_red_mat$sfdm2)
pander::pander(tb)

	1	2	3	5
1	253	58	38	91
2	0	0	0	0
3	0	0	0	0
5	61	50	28	239

kp <- kappa2(cbind(prlr,Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.1296



pr <- predict(bmodel$oridinalModels,Support_red_mat)
summary(pr)

   V1              V2              V3               V4

Min. :1.030 Min. :1.000 Min. :0.5205 Min. :0.4741
1st Qu.:1.040 1st Qu.:1.000 1st Qu.:0.5990 1st Qu.:0.6817
Median :2.012 Median :2.000 Median :0.6480 Median :0.7830
Mean :2.646 Mean :2.684 Mean :0.6666 Mean :0.7778
3rd Qu.:4.916 3rd Qu.:5.000 3rd Qu.:0.7105 3rd Qu.:0.8643
Max. :4.989 Max. :5.000 Max. :0.9784 Max. :1.0000
V5 V6
Min. :-0.99930 Min. :-1.054107
1st Qu.:-0.38052 1st Qu.:-0.390783
Median : 0.24188 Median : 0.237119
Mean : 0.01887 Mean : 0.009565
3rd Qu.: 0.38023 3rd Qu.: 0.376274
Max. : 0.52420 Max. : 0.524199

boxplot(pr[,1] ~ Support_red_mat$sfdm2)

boxplot(pr[,2] ~ Support_red_mat$sfdm2)


tb <- table(as.integer(pr[,1]+0.5),as.integer(Support_red_mat$sfdm2))
pander::pander(tb)

	1	2	3	5
1	229	24	16	72
2	16	58	8	23
3	43	21	34	26
5	26	5	8	209


kp <- kappa2(cbind(as.integer(pr[,1]+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.4886


kp <- kappa2(cbind(as.integer(pr[,2]+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.4587


pr <- predict(bmodel$bagging$bagged.model,Support_red_mat)
tb <- table(as.integer(pr+0.5),as.integer(Support_red_mat$sfdm2))
pander::pander(tb)

	1	2	3	5
0	0	1	0	0
1	3	3	1	1
2	185	42	30	59
3	109	44	31	102
4	15	17	3	109
5	2	1	1	50
6	0	0	0	9

boxplot(pr ~ Support_red_mat$sfdm2)

kp <- kappa2(cbind(as.integer(pr+0.5),Support_red_mat$sfdm2),"unweighted")
pander::pander(kp$value)

0.0499



#The Times
pander::pander(cputimes)

Fresa: 3.426
LASSO: 0.0858
RF: 0.4046
SVM: 0.0668

pander::pander(featsize)

Fresa: 6
LASSO: 20.02
Univ: 10.54



plotMAEEvolution(cp,20,main="Mean Absolute Error (MAE)", location="topright")



bp <- barPlotCiError(as.matrix(cp$CorTable),metricname="Kendall Correlation",thesets=thesets,themethod=theMethod,main="Kendall Correlation",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kendall Correlation",round = 3)

Kendall Correlation
	Regresion Method
B:SWiMS	0.354
B:SWiMS(OLR)	0.245
B:SWiMS(SVM)	0.296
RF	0.291
RF(OLR)	0.206
RF(SVM)	0.264
LASSO	0.345
SVM	0.287
Univ.(SVM)	0.29
LASSO(SVM)	0.26

pander::pander(bp$ciTable,caption = "Kendall Correlation with 95%CI",round = 3)

Kendall Correlation with 95%CI
	Kendall Correlation	lower	upper
Regresion Method	0.354	0.317	0.391
Regresion Method	0.245	0.239	0.251
Regresion Method	0.296	0.287	0.304
Regresion Method	0.291	0.283	0.299
Regresion Method	0.206	0.201	0.212
Regresion Method	0.264	0.257	0.271
Regresion Method	0.345	0.312	0.378
Regresion Method	0.287	0.28	0.295
Regresion Method	0.29	0.283	0.298
Regresion Method	0.26	0.253	0.266


bp <- barPlotCiError(as.matrix(cp$KappaTable),metricname="Kappa Agreement",thesets=thesets,themethod=theMethod,main="Kappa Agreement",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "Kappa Agreement",round = 3)

Kappa Agreement
	Regresion Method
B:SWiMS	0.412
B:SWiMS(OLR)	0.331
B:SWiMS(SVM)	0.417
RF	0.431
RF(OLR)	0.284
RF(SVM)	0.385
LASSO	0.051
SVM	0.419
Univ.(SVM)	0.421
LASSO(SVM)	0.34

pander::pander(bp$ciTable,caption = "Kappa Agreement with 95%CI",round = 3)

Kappa Agreement with 95%CI
	Kappa Agreement	lower	upper
Regresion Method	0.412	0.37	0.454
Regresion Method	0.331	0.28	0.381
Regresion Method	0.417	0.37	0.465
Regresion Method	0.431	0.382	0.479
Regresion Method	0.284	0.233	0.334
Regresion Method	0.385	0.337	0.432
Regresion Method	0.051	0.03	0.072
Regresion Method	0.419	0.37	0.468
Regresion Method	0.421	0.372	0.47
Regresion Method	0.34	0.291	0.39



bp <- barPlotCiError(as.matrix(cp$MAETable),metricname="MAE",thesets=thesets,themethod=theMethod,main="MAE",offsets = c(0.5,5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "MAE",round = 3)

MAE
	Regresion Method
B:SWiMS	0.992
B:SWiMS(OLR)	1.183
B:SWiMS(SVM)	1.031
RF	1.037
RF(OLR)	1.306
RF(SVM)	1.136
LASSO	1.304
SVM	1.053
Univ.(SVM)	1.054
LASSO(SVM)	1.179

pander::pander(bp$ciTable,caption = "MAE with 95%CI",round = 3)

MAE with 95%CI
	MAE	lower	upper
Regresion Method	0.992	0.946	1.043
Regresion Method	1.183	1.129	1.244
Regresion Method	1.031	0.983	1.083
Regresion Method	1.037	0.989	1.089
Regresion Method	1.306	1.245	1.372
Regresion Method	1.136	1.084	1.194
Regresion Method	1.304	1.244	1.37
Regresion Method	1.053	1.005	1.107
Regresion Method	1.054	1.005	1.107
Regresion Method	1.179	1.125	1.239



bp <- barPlotCiError(as.matrix(cp$BiasTable),metricname="BIAS",thesets=thesets,themethod=theMethod,main="BIAS",offsets = c(0.5,0.5),args.legend = list(x = "bottomright"))

pander::pander(bp$barMatrix,caption = "BIAS",round = 3)

BIAS
	Regresion Method
B:SWiMS	-0.245
B:SWiMS(OLR)	-0.125
B:SWiMS(SVM)	-0.352
RF	-0.147
RF(OLR)	-0.12
RF(SVM)	-0.421
LASSO	-0.049
SVM	-0.084
Univ.(SVM)	-0.094
LASSO(SVM)	-0.2

pander::pander(bp$ciTable,caption = "BIAS with 95%CI",round = 3)

BIAS with 95%CI
	BIAS	lower	upper
Regresion Method	-0.245	-0.358	-0.132
Regresion Method	-0.125	-0.262	0.012
Regresion Method	-0.352	-0.477	-0.228
Regresion Method	-0.147	-0.274	-0.019
Regresion Method	-0.12	-0.265	0.026
Regresion Method	-0.421	-0.552	-0.29
Regresion Method	-0.049	-0.152	0.054
Regresion Method	-0.084	-0.213	0.045
Regresion Method	-0.094	-0.223	0.034
Regresion Method	-0.2	-0.335	-0.065

1.3 Features


pander::pander(summary(bmodel$bagging$bagged.model,caption="fdm2 model",round = 3))

coefficients:

Table continues below
	Estimate	lower	mean	upper	u.MSE
avtisst	0.05813	0.05496	0.05813	0.06129	2.727
slos	-0.01476	-0.01513	-0.01476	-0.0144	3.257
adlsc	0.1544	0.1452	0.1544	0.1636	3.13
resp	0.0252	0.02473	0.0252	0.02568	3.233
dzgroupCirrhosis	0.9275	0.8942	0.9275	0.9609	3.254
scoma	0.001606	0.0006278	0.001606	0.002585	3.007
dzgroupComa	0.956	0.7672	0.956	1.145	3.143
dzgroupMOSF_w_Malig	0.02654	0.003943	0.02654	0.04913	3.217

Table continues below
	r.MSE	model.MSE	NeRI	F.pvalue
avtisst	2.899	2.341	0.2781	0
slos	2.437	2.341	-0.0007335	1.096e-08
adlsc	2.443	2.341	0.1435	3.821e-09
resp	2.398	2.341	0.1097	9.043e-06
dzgroupCirrhosis	2.385	2.341	0.0934	9.35e-05
scoma	2.374	2.336	0.1369	0.0003067
dzgroupComa	2.406	2.34	0.1648	1.782e-06
dzgroupMOSF_w_Malig	2.329	2.31	0.1785	0.01067

	t.pvalue	Sign.pvalue	Wilcox.pvalue
avtisst	2.339e-24	5.906e-16	9.333e-22
slos	2.549e-06	0.4437	0.006662
adlsc	0.001134	2.248e-05	0.0002395
resp	0.004032	0.0009353	0.00155
dzgroupCirrhosis	0.007812	0.004159	1.582e-10
scoma	0.04293	4.969e-05	0.02761
dzgroupComa	0.00127	1.235e-06	0.002763
dzgroupMOSF_w_Malig	0.03227	1.86e-07	0.004132

MSE: 2.334
R2: 0.2876
bootstrap:

pander::pander(summary(bmodel$oridinalModels,caption="Ordinal model",round = 3))

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
slos	0.06456	1.062	1.067	1.071	0.6959
adlsc	0.415	1.499	1.514	1.53	0.6178
dzgroupComa	0.6407	1.562	1.898	2.306	0.5239
dzgroupMOSF_w_Malig	0.264	0.6436	1.302	2.634	0.5143
scoma	0.004125	1	1.004	1.008	0.6083
dzgroupCirrhosis	-0.2196	0.6235	0.8029	1.034	0.5159
meanbp	0.001196	1	1.001	1.002	0.5541
dzgroupCHF	-0.002638	0.9709	0.9974	1.025	0.5478

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
slos	0.6424	0.7723	0.6959	0.6424
adlsc	0.7043	0.7723	0.6178	0.7043
dzgroupComa	0.7795	0.7732	0.5239	0.7795
dzgroupMOSF_w_Malig	0.7739	0.7661	0.5143	0.7739
scoma	0.7755	0.7662	0.6083	0.7755
dzgroupCirrhosis	0.768	0.7755	0.5159	0.768
meanbp	0.7675	0.7635	0.5541	0.7675
dzgroupCHF	0.7739	0.7739	0.5478	0.7739

	full.AUC	IDI	NRI	z.IDI	z.NRI
slos	0.7723	0.2325	0.9051	14.28	13.26
adlsc	0.7723	0.1409	0.7573	10.39	10.53
dzgroupComa	0.7732	0.002265	0.03243	0.9471	1.537
dzgroupMOSF_w_Malig	0.7661	0.001615	0.07882	0.7343	1.82
scoma	0.7662	0.006816	0.2752	2.075	4.22
dzgroupCirrhosis	0.7755	0.004483	-0.1614	1.702	-3.357
meanbp	0.7635	0.006993	0.2197	2.188	2.772
dzgroupCHF	0.7739	5.294e-05	-0.3822	0.1922	-5.209

Accuracy: 0.7691
tAUC: 0.7691
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
dzgroupCirrhosis	2.389	9.699	10.9	12.25	0.5417
adlsc	-0.07368	0.8478	0.929	1.018	0.4861
meanbp	-0.009351	0.9845	0.9907	0.997	0.5648
wblc	0.004931	0.997	1.005	1.013	0.4954
avtisst	9.703e-05	0.9998	1	1	0.5139
scoma	-0.001369	0.9975	0.9986	0.9998	0.5324
hrt	0.0008421	0.9997	1.001	1.002	0.4861
dzgroupCHF	0.0171	0.8053	1.017	1.285	0.5046

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
dzgroupCirrhosis	0.5331	0.5648	0.5417	0.5331	0.5648
adlsc	0.573	0.5874	0.4861	0.573	0.5874
meanbp	0.4838	0.6427	0.5648	0.4838	0.6427
wblc	0.5833	0.5849	0.4954	0.5833	0.5849
avtisst	0.5417	0.5694	0.5139	0.5417	0.5694
scoma	0.5694	0.537	0.5324	0.5694	0.537
hrt	0.6435	0.6157	0.4861	0.6435	0.6157
dzgroupCHF	0.6157	0.6157	0.5046	0.6157	0.6157

	IDI	NRI	z.IDI	z.NRI
dzgroupCirrhosis	0.03546	0.1694	2.818	2.629
adlsc	0.01105	-0.1152	1.579	-0.8492
meanbp	0.03591	0.2562	2.905	1.907
wblc	0.006123	0.1296	1.222	0.9725
avtisst	0.0009975	0.07407	0.5619	0.5481
scoma	0.02509	0.1852	2.296	1.601
hrt	0.008319	-0.03704	1.427	-0.2723
dzgroupCHF	0.0001534	0.05556	0.1434	0.6178

Accuracy: 0.6667
tAUC: 0.6667
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
slos	-0.07551	0.9254	0.9273	0.9291	0.6333
avtisst	0.08315	1.082	1.087	1.092	0.6197
resp	0.0414	1.039	1.042	1.046	0.5652
scoma	0.01663	1.016	1.017	1.017	0.5909
dzgroupComa	0.2012	1.051	1.223	1.422	0.5455
dzgroupMOSF_w_Malig	0.2717	1.084	1.312	1.588	0.5439
dzgroupCOPD	-0.02938	0.9266	0.971	1.018	0.5379

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
slos	0.6771	0.778	0.6333	0.6771
avtisst	0.6884	0.778	0.6197	0.6884
resp	0.7452	0.778	0.5652	0.7452
scoma	0.7401	0.781	0.5909	0.7401
dzgroupComa	0.7511	0.7663	0.5455	0.7511
dzgroupMOSF_w_Malig	0.7682	0.7735	0.5439	0.7682
dzgroupCOPD	0.7818	0.7636	0.5379	0.7818

	full.AUC	IDI	NRI	z.IDI	z.NRI
slos	0.778	0.2281	0.8218	13.62	12.03
avtisst	0.778	0.1325	0.7039	9.99	9.729
resp	0.778	0.02022	0.2085	4.202	2.693
scoma	0.781	0.02169	0.3957	4.017	5.776
dzgroupComa	0.7663	0.009892	0.1303	2.608	2.698
dzgroupMOSF_w_Malig	0.7735	0.01107	0.1667	2.79	3.048
dzgroupCOPD	0.7636	0.002876	0.1515	1.23	3.205

Accuracy: 0.7848
tAUC: 0.7848
bootstrap:

gain <- length(bmodel$oridinalModels$formulas)/(20*3)
bgm <- baggedModel(bmodel$oridinalModels$formulas,Support_red_mat,type="LM")

……

gplots::heatmap.2(gain*bgm$formulaNetwork,trace="none",mar=c(10,10),main="B:SWiMS Formula Network")

Benchmarks for SUPPORT Data SET

José Tamez-Peña

April 30, 2018

1 FRESA.CAD Regresion Benchmark

1.1 SUPPORT Data Set

1.2 Benchmark

1.2.1 Results

1.3 Features