1 FRESA.CAD Regresion Benchmark

1.1 SUPPORT Data Set

library(readr)


Support_red <- read_delim("./SUPPORT/Support_red.txt","\t", escape_double = FALSE, na = "empty",trim_ws = TRUE)

Support_red <- Support_red[complete.cases(Support_red),]
#Support_red_mat <- as.data.frame(model.matrix(sfdm2~.*.,Support_red))
Support_red_mat <- as.data.frame(model.matrix(sfdm2~.,Support_red))
Support_red_mat$`(Intercept)` <- NULL
Support_red_mat$sfdm2 <- Support_red$sfdm2
Support_red_mat$sfdm2[Support_red_mat$sfdm2 == 4] <- 3; # too few samples in 4
fnames <- colnames(Support_red_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(Support_red_mat) <- fnames

table(Support_red_mat$sfdm2)

ExperimentName <- "Support"
bswimsReps <- 20;
theData <- Support_red_mat;
theOutcome <- "sfdm2";
reps <- 75;
fraction <- 0.8;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")

sampleTrain <- sample(nrow(theData),nrow(theData)*0.5)
sampleData <- theData[sampleTrain,]
testdata <- theData[-sampleTrain,]

table(sampleData$sfdm2)

#cf <- tools::compactPDF("C:/Users/Jose Tamez/Dropbox/Development/R/FRESA.CADcpp/FRESA.CAD/inst/doc/",Sys.which(Sys.getenv("R_QPDF", "qpdf")),gs_cmd ="C:/Program Files/gs/gs9.23/bin/gswin64.exe",gs_quality = "ebook")

1.2 Benchmark


bmodel <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = sampleData,NumberofRepeats = bswimsReps )
save(bmodel,file = FRESAFileName)

par(mfrow = c(2,2));

cp <- OrdinalBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1));

save(cp,file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)


#The Times
pander::pander(cp$cpuElapsedTimes)

BSWiMS	RF	RPART	LASSO	SVM	KNN	ENS
6.519	0.5783	0.03587	2.223	0.08973	0.0124	9.458


par(mfrow = c(2,1));
pr <- plot(cp,prefix = paste(ExperimentName,":"));

par(mfrow = c(1,1));

pander::pander(pr$metrics,caption = ExperimentName,round = 3)

Support
	RF	LASSO	ENS	BSWiMS	KNN	RPART	SVM
KAPPA	0.519	0.553	0.544	0.502	0.487	0.449	0.442
BMAE	1.021	0.966	0.915	0.995	1.013	1.049	1.137
Kendall	0.472	0.489	0.485	0.433	0.441	0.401	0.403
Bias	-0.321	-0.108	-0.295	-0.116	-0.419	-0.509	-0.343
ACC	0.62	0.59	0.578	0.539	0.498	0.516	0.506
SEN	0.516	0.495	0.51	0.464	0.439	0.451	0.449
AUC	0.691	0.679	0.688	0.659	0.644	0.65	0.646

pander::pander(pr$metrics_filter,caption = ExperimentName,round = 3)

Support
	BSWiMS	RPART	F.Test	LASSO	RF.ref	mRMR	Kendall
KAPPA	0.497	0.492	0.492	0.502	0.443	0.442	0.444
BMAE	1.009	1.067	1.068	1.152	1.068	1.159	1.206
Kendall	0.438	0.416	0.416	0.423	0.409	0.406	0.406
ACC	0.501	0.5	0.522	0.564	0.551	0.495	0.498
SEN	0.444	0.462	0.457	0.458	0.494	0.401	0.412
AUC	0.648	0.651	0.652	0.653	0.675	0.62	0.622

1.3.2 Radar Plots

op <- par(no.readonly = TRUE)

par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","CPU")

classRanks$BMAE <- -classRanks$BMAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0), rgb(1.0,1.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05), rgb(1.0,1.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F.Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","Jaccard","SIZE")
filterRanks$BMAE <- -filterRanks$BMAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(bmodel),caption = "Model",round = 3)

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
avtisst	0.0639	1.055	1.066	1.083	0.6281	0.6414
adlsc	0.3012	1.274	1.351	1.496	0.5915	0.6421
dzgroupComa	2.7	4.019	14.88	3662	0.4205	0.6849
resp	0.04318	1.03	1.044	1.071	0.5988	0.6968

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI
avtisst	0.6986	0.6503	0.663	0.7149	0.1062
adlsc	0.6986	0.605	0.6641	0.7149	0.05949
dzgroupComa	0.6986	0.5497	0.7014	0.7149	0.02973
resp	0.6986	0.5832	0.7173	0.7149	0.02152

	NRI	z.IDI	z.NRI	Frequency
avtisst	0.5487	7.745	6.635	1
adlsc	0.4571	5.806	5.442	1
dzgroupComa	0.2195	4.042	4.777	1
resp	0.4167	3.307	4.922	1

Accuracy: 0.6993
tAUC: 0.7156
sensitivity: 0.6604
specificity: 0.7708
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
avtisst	0.0553	1.054	1.057	1.079	0.6235
resp	0.05064	1.05	1.052	1.078	0.5966
slos	-0.02356	0.9738	0.9767	0.9902	0.5208
scoma	0.01572	1.015	1.016	1.026	0.5892
crea	0.1755	1.173	1.192	1.368	0.6186
dzgroupCirrhosis	0.5776	1.12	1.782	2.836	0.5183
meanbp	-0.004641	0.9916	0.9954	0.9992	0.5672

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
avtisst	0.6721	0.7081	0.6233	0.6719	0.7079
resp	0.6792	0.7081	0.5964	0.679	0.7079
slos	0.6932	0.7081	0.5212	0.693	0.7079
scoma	0.6985	0.7081	0.5886	0.6983	0.7079
crea	0.6985	0.7099	0.6181	0.6983	0.7096
dzgroupCirrhosis	0.6968	0.7115	0.5172	0.6966	0.7113
meanbp	0.7115	0.7115	0.5674	0.7113	0.7113

	IDI	NRI	z.IDI	z.NRI	Frequency
avtisst	0.06726	0.5107	5.332	5.413	1
resp	0.03607	0.4301	4.038	4.458	1
slos	0.02876	0.1924	3.355	2.091	1
scoma	0.02192	0.2619	3.098	2.976	1
crea	0.01403	0.4369	2.494	4.792	0.9
dzgroupCirrhosis	0.01314	0.4419	2.436	5.952	0.3
meanbp	0.01376	0.2598	2.367	2.671	0.3

Accuracy: 0.7139
tAUC: 0.7137
sensitivity: 0.6225
specificity: 0.8049
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
avtisst	0.08103	1.063	1.084	1.106	0.6727	0.7198
slos	-0.05863	0.9269	0.9431	0.9662	0.5209	0.7312
resp	0.06377	1.05	1.066	1.093	0.6134	0.7604
scoma	0.01865	1.012	1.019	1.029	0.6528	0.753
crea	0.1757	1.111	1.192	1.362	0.6615	0.7647

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI
avtisst	0.7714	0.6634	0.7092	0.7596	0.1194	0.7163
slos	0.7714	0.5506	0.7242	0.7596	0.08623	0.4901
resp	0.7714	0.6086	0.7489	0.7596	0.04512	0.5455
scoma	0.7714	0.6138	0.7424	0.7596	0.02748	0.3608
crea	0.7728	0.6284	0.7529	0.761	0.01351	0.3523

	z.IDI	z.NRI	Frequency
avtisst	8.041	8.478	1
slos	6.652	5.902	1
resp	4.889	6.218	1
scoma	3.856	4.395	1
crea	2.577	4.115	0.85

Accuracy: 0.7702
tAUC: 0.7575
sensitivity: 0.6824
specificity: 0.8326
bootstrap:


topFeat <- min(ncol(bmodel$bagging$formulaNetwork),30);
shortformulaNetwork <- bmodel$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.6,cexCol = 0.6)



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.2)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.3,cexCol = 0.3,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)

Features
	cohortMean	cohortStd	kendall.r	kendall.p	Frequency
avtisst	23.7	13.66	0.2985	0	1
adlsc	1.957	2.074	0.1905	0	1
meanbp	84.57	27.95	-0.1246	0	0.96
crea	1.8	1.728	0.1347	0	0.861
resp	23.63	9.506	0.0878	0.0014	0.7962
hrt	98.83	33.02	0.1452	0	0.7886
scoma	12.65	25.47	0.241	0	0.7562
dzgroupComa	764	54	0.188	0	0.7333
dzgroupCirrhosis	773	45	0.065	0.0471	0.7181

Benchmarks for SUPPORT Data SET

José Tamez-Peña

April 30, 2018

1 FRESA.CAD Regresion Benchmark

1.1 SUPPORT Data Set

1.2 Benchmark

1.3 Results

1.3.1 Model Selection Results

1.3.2 Radar Plots

1.3.3 Features Analysis