1 FRESA.CAD Ordinal Regresion Benchmark

1.1 Wine Quality Data sets

WhiteWinequality  <- read.csv("./Wine/winequality-white (1).csv")

WhiteWinequality$quality[WhiteWinequality$quality < 4] <- 4
WhiteWinequality$quality[WhiteWinequality$quality > 8] <- 8
#WhiteWinequality_mat <- as.data.frame(model.matrix(quality~.*.,WhiteWinequality))
WhiteWinequality_mat <- as.data.frame(model.matrix(quality~.,WhiteWinequality))
WhiteWinequality_mat$`(Intercept)` <- NULL
WhiteWinequality_mat$quality <- WhiteWinequality$quality
fnames <- colnames(WhiteWinequality_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(WhiteWinequality_mat) <- fnames

#WhiteWinequality_mat <- FRESAScale(WhiteWinequality_mat,method="RankInv")$scaledData

ExperimentName <- "White_Wine_Quality"
bswimsReps <- 3;
theData <- WhiteWinequality_mat;
theOutcome <- "quality";
reps <- 10;
fraction <- 0.20;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")
sampleTrain <- sample(nrow(theData),nrow(theData)*0.3)
sampleData <- theData[sampleTrain,]
testdata <- theData[-sampleTrain,]

1.2 Benchmark


bmodel <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = sampleData,NumberofRepeats = bswimsReps )

nmodel <- NAIVE_BAYES(formula = quality ~ .,data = sampleData,pca = TRUE,usekernel = TRUE)
pre <- predict(nmodel,testdata)
table(pre,testdata$quality)
nmodel <- NAIVE_BAYES(formula = quality ~ .,data = sampleData,pca = FALSE,usekernel = FALSE)
pre <- predict(nmodel,testdata)
table(pre,testdata$quality)


save(bmodel,file = FRESAFileName)
load(file = FRESAFileName)

par(mfrow = c(2,2));

cp <- OrdinalBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1));

save(cp,file = CVFileName)
load(file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)


#The Times
pander::pander(cp$cpuElapsedTimes)

BSWiMS	RF	RPART	LASSO	SVM	KNN	ENS
9.261	0.44	0.028	3.748	0.164	0.005	13.65


pr <- plot(cp,prefix = paste(ExperimentName,":"));


pander::pander(pr$metrics,caption = ExperimentName,round = 3)

White_Wine_Quality
	RF	ENS	SVM	BSWiMS	KNN	RPART	LASSO
KAPPA	0.519	0.501	0.489	0.436	0.428	0.426	0.416
BMAE	0.603	0.717	0.709	0.909	0.813	0.804	0.896
Kendall	0.506	0.49	0.479	0.425	0.433	0.434	0.422
Bias	0.053	0.072	0.013	0.108	0.149	0.241	0.085
ACC	0.435	0.388	0.376	0.367	0.34	0.363	0.325
SEN	0.529	0.46	0.472	0.382	0.428	0.425	0.408
AUC	0.698	0.656	0.662	0.612	0.639	0.633	0.624

pander::pander(pr$metrics_filter,caption = ExperimentName,round = 3)

White_Wine_Quality
	LASSO	RF.ref	mRMR	RPART	F.Test	Kendall	BSWiMS
KAPPA	0.454	0.454	0.435	0.437	0.436	0.424	0.444
BMAE	0.803	0.774	0.824	0.792	0.824	0.832	0.81
Kendall	0.446	0.455	0.44	0.435	0.442	0.43	0.435
ACC	0.371	0.371	0.364	0.365	0.356	0.352	0.368
SEN	0.424	0.427	0.406	0.429	0.425	0.409	0.431
AUC	0.639	0.639	0.629	0.641	0.639	0.631	0.64

1.3.2 Radar Plots

op <- par(no.readonly = TRUE)

par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","CPU")

classRanks$BMAE <- -classRanks$BMAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0), rgb(1.0,1.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05), rgb(1.0,1.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F.Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BMAE[1],pr$minMaxMetrics$KAPPA[2],pr$minMaxMetrics$Kendall[2],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$AUC[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BMAE[2],0,0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BMAE","KAPPA","Kendall","ACC","SEN","AUC"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BMAE","KAPPA","Kendall","ACC","SEN","AUC","Jaccard","SIZE")
filterRanks$BMAE <- -filterRanks$BMAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(bmodel),caption = "Model",round = 3)

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
volatile.acidity	-1.981	0.1043	0.138	0.1825	0.7268
fixed.acidity	-0.4781	0.5795	0.6199	0.6632	0.6428
citric.acid	3.638	22.75	38.02	63.53	0.5209

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
volatile.acidity	0.9646	0.7268	0.6301	0.5	0.6301
fixed.acidity	0.5209	0.6641	0.5741	0.5371	0.6855
citric.acid	0.6428	0.6641	0.5371	0.5741	0.6855

	IDI	NRI	z.IDI	z.NRI	Frequency
volatile.acidity	0.1291	0.5204	20.3	14.71	0.5
fixed.acidity	0.09454	0.499	17.76	13.85	0.5
citric.acid	0.08178	0.6167	16.62	17.34	0.5

Accuracy: 0.6671
tAUC: 0.6793
sensitivity: 0.6662
specificity: 0.6923
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper
volatile.acidity	-4.476	0.00605	0.01138	0.0214
total.sulfur.dioxide	-0.003241	0.9963	0.9968	0.9972
alcohol	0.5121	1.454	1.669	1.915
residual.sugar	0.1226	1.094	1.13	1.168
density	-210.5	7.816e-132	3.843e-92	1.89e-52
fixed.acidity	-0.06673	0.904	0.9354	0.968

Table continues below
	u.Accuracy	r.Accuracy	full.Accuracy	u.AUC
volatile.acidity	0.625	0.6687	0.714	0.5987
total.sulfur.dioxide	0.63	0.5311	0.6286	0.6283
alcohol	0.6705	0.717	0.714	0.6865
residual.sugar	0.5761	0.7092	0.714	0.5581
density	0.6332	0.7097	0.714	0.6266
fixed.acidity	0.5311	0.63	0.6286	0.5172

Table continues below
	r.AUC	full.AUC	IDI	NRI	z.IDI
volatile.acidity	0.6969	0.7244	0.06198	0.4864	12.08
total.sulfur.dioxide	0.5172	0.622	0.04717	0.5249	9.979
alcohol	0.7238	0.7244	0.02385	0.2937	6.742
residual.sugar	0.7133	0.7244	0.02184	0.3064	6.662
density	0.7156	0.7244	0.009967	0.1542	4.397
fixed.acidity	0.6283	0.622	0.007317	0.02281	3.712

	z.NRI	Frequency
volatile.acidity	11.19	0.5
total.sulfur.dioxide	12.06	0.5
alcohol	6.588	0.5
residual.sugar	6.902	0.5
density	3.468	0.5
fixed.acidity	0.5093	0.5

Accuracy: 0.7161
tAUC: 0.7271
sensitivity: 0.6945
specificity: 0.7598
bootstrap:

coefficients:

Table continues below
	Estimate	lower	OR	upper
alcohol	0.3864	1.394	1.472	1.554
total.sulfur.dioxide	-0.00414	0.9952	0.9959	0.9965
chlorides	-8.093	5.945e-06	0.0003057	0.01572
pH	0.4435	1.243	1.558	1.954
sulphates	0.5859	1.295	1.797	2.492

Table continues below
	u.Accuracy	r.Accuracy	full.Accuracy	u.AUC
alcohol	0.7161	0.63	0.7155	0.7163
total.sulfur.dioxide	0.559	0.5567	0.573	0.5797
chlorides	0.6179	0.7144	0.7155	0.6371
pH	0.5753	0.6598	0.6703	0.556
sulphates	0.5472	0.6107	0.6182	0.5119

Table continues below
	r.AUC	full.AUC	IDI	NRI	z.IDI
alcohol	0.6508	0.7195	0.1053	0.5487	17.25
total.sulfur.dioxide	0.5267	0.5876	0.03412	0.332	8.984
chlorides	0.7155	0.7195	0.006584	-0.03315	3.963
pH	0.6689	0.6812	0.005449	0.1663	3.552
sulphates	0.6245	0.6259	0.005288	0.08003	3.412

	z.NRI	Frequency
alcohol	13.71	0.5
total.sulfur.dioxide	8.111	0.5
chlorides	-0.8017	0.5
pH	4.027	0.5
sulphates	1.946	0.5

Accuracy: 0.7134
tAUC: 0.7224
sensitivity: 0.7382
specificity: 0.7066
bootstrap:

- coefficients:
  
  Table continues below
  
  Estimate lower OR upper u.Accuracy
  
  alcohol 0.4555 1.479 1.577 1.682 0.713
  
  fixed.acidity -0.2425 0.7578 0.7846 0.8125 0.5381
  
  Table continues below
  
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
  
  alcohol 0.0388 0.713 0.7186 0.5 0.7186
  
  fixed.acidity 0.0388 0.5381 0.5386 0.5 0.5386
  
  IDI NRI z.IDI z.NRI Frequency
  
  alcohol 0.2484 0.877 31.07 25.96 0.5
  
  fixed.acidity 0.03335 0.1538 9.742 4.102 0.5
- Accuracy: 0.725
- tAUC: 0.7391
- sensitivity: 0.7544
- specificity: 0.7238
- bootstrap:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
alcohol	0.4555	1.479	1.577	1.682	0.713
fixed.acidity	-0.2425	0.7578	0.7846	0.8125	0.5381

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
alcohol	0.0388	0.713	0.7186	0.5	0.7186
fixed.acidity	0.0388	0.5381	0.5386	0.5	0.5386

	IDI	NRI	z.IDI	z.NRI	Frequency
alcohol	0.2484	0.877	31.07	25.96	0.5
fixed.acidity	0.03335	0.1538	9.742	4.102	0.5


topFeat <- min(ncol(bmodel$bagging$formulaNetwork),30);
shortformulaNetwork <- bmodel$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.6,cexCol = 0.6)



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.2)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.3,cexCol = 0.3,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)

Features (continued below)
	cohortMean	cohortStd	kendall.r	kendall.p
volatile.acidity	0.2782	0.1008	-0.1549	0
free.sulfur.dioxide	35.31	17.01	0.0173	0.115
alcohol	10.51	1.231	0.3468	0
density	0.994	0.003	-0.2666	0
fixed.acidity	6.855	0.8439	-0.0655	0
total.sulfur.dioxide	138.4	42.5	-0.1512	0
chlorides	0.0458	0.0218	-0.2449	0
pH	3.188	0.151	0.0844	0
citric.acid	0.3342	0.121	0.0146	0.1864
residual.sugar	6.391	5.072	-0.063	0

	Frequency
volatile.acidity	1
free.sulfur.dioxide	1
alcohol	0.9857
density	0.9714
fixed.acidity	0.8286
total.sulfur.dioxide	0.7857
chlorides	0.7429
pH	0.7429
citric.acid	0.5571
residual.sugar	0.5429

Benchmarks for Modeling Ordinal Scores

José Tamez-Peña

October 10, 2018

1 FRESA.CAD Ordinal Regresion Benchmark

1.1 Wine Quality Data sets

1.2 Benchmark

1.3 Results

1.3.1 Model Selection Results

1.3.2 Radar Plots

1.3.3 Features Analysis