1 FRESA.CAD Benchmark

1.1 Modeling MPG

mpg <- read.csv("./mpg/mpg.csv")
mpg <- mpg[complete.cases(mpg),]
mpg$carName <- NULL
mpg$origin <- as.factor(mpg$origin)

mpg_mat <- as.data.frame(model.matrix(mpg~.*.,mpg))
mpg_mat$`(Intercept)` <- NULL
mpg_mat$mpg <- mpg$mpg

fnames <- colnames(mpg_mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(mpg_mat) <- fnames


ExperimentName <- "mpg"
bswimsReps <- 20;
theData <- mpg_mat;
theOutcome <- "mpg";
reps <- 5
fraction <- 0.3;

BSWiMSFileName <- paste(ExperimentName,reps,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,reps,"CVMethod.RDATA",sep = "_")

1.2 Benchmark


BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)
save(BSWiMSMODEL,file = BSWiMSFileName)
#load(file = BSWiMSFileName)

par(mfrow = c(2,2));
cp <- RegresionBenchmark(theData = theData, theOutcome = theOutcome, reps = reps, trainFraction = fraction)

save(cp,file = CVFileName)
par(mfrow = c(1,1));

#load(file = CVFileName)

1.3 Results

1.3.1 Model Selection Results


hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM ENS
1.668 0.17 0.006 0.268 0.008 2.12

par(mfrow = c(2,1));

pr <- plot(cp,prefix = paste(ExperimentName,":"));

par(mfrow = c(1,1));
pander::pander(pr$metrics,caption = "MPG",round = 3)
MPG
  BSWiMS RF LASSO SVM ENS RPART
Spearman 0.939 0.934 0.941 0.942 0.944 0.911
MAE 2.208 2.18 2.122 2.113 2.037 2.485
Pearson 0.923 0.921 0.928 0.923 0.931 0.898
RMSE 3.01 3.06 2.915 3.041 2.871 3.438
Bias 0.078 -0.017 0.059 -0.165 -0.011 -0.061
pander::pander(pr$metrics_filter,caption = "MPG",round = 3)
MPG (continued below)
  LASSO RPART F-Test Pearson mRMR W-Test Kendall
Spearman 0.94 0.938 0.936 0.936 0.937 0.936 0.936
MAE 2.06 2.106 2.165 2.166 2.394 2.156 2.174
Pearson 0.928 0.926 0.924 0.923 0.914 0.923 0.922
RMSE 2.909 2.962 3.009 3.008 3.174 3.02 3.028
Bias 0.029 0.019 0.061 0.092 0.017 0.098 0.092
  BSWiMS RF.ref
Spearman 0.938 0.927
MAE 2.157 2.408
Pearson 0.924 0.904
RMSE 2.987 3.353
Bias -0.013 -0.058

1.3.2 Radar Plots


op <- par(no.readonly = TRUE)

library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)


classRanks <- c(pr$minMaxMetrics$Pearson[2],pr$minMaxMetrics$RMSE[1],pr$minMaxMetrics$Spearman[2],pr$minMaxMetrics$MAE[1],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(0,pr$minMaxMetrics$RMSE[2],0,pr$minMaxMetrics$MAE[2],max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("Pearson","RMSE","Spearman","MAE"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("Pearson","RMSE","Spearman","MAE","CPU")

classRanks$RMSE <- -classRanks$RMSE
classRanks$MAE <- -classRanks$MAE
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1) , rgb(1.0,0.0,1.0,0.1) )
radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","F-Test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$Pearson[2],pr$minMaxMetrics$RMSE[1],pr$minMaxMetrics$Spearman[2],pr$minMaxMetrics$MAE[1],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(0,pr$minMaxMetrics$RMSE[2],0,pr$minMaxMetrics$MAE[2],0,max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("Pearson","RMSE","Spearman","MAE"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("Pearson","RMSE","Spearman","MAE","Jaccard","SIZE")
filterRanks$RMSE <- -filterRanks$RMSE
filterRanks$MAE <- -filterRanks$MAE
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0),rgb(1.0,0.0,1.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1), rgb(1.0,0.0,1.0,0.1)  )
radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


detach("package:fmsb", unload=TRUE)

par(mfrow = c(1,1))
par(op)

1.3.3 Features Analysis


pander::pander(summary(BSWiMSMODEL),caption = "Model",round = 3)
  • coefficients:

    Table continues below
      Estimate lower mean
    cylinders -2.772 -3.072 -2.772
    weight.modelYear -0.0001298 -0.0001437 -0.0001298
    cylinders.weight 0.0009267 0.0008333 0.0009267
    acceleration.modelYear 0.05549 0.04965 0.05549
    acceleration -4.105 -4.538 -4.105
    modelYear 0.4024 0.3578 0.4024
    displacement -0.008715 -0.01068 -0.008715
    cylinders.modelYear -0.00161 -0.001768 -0.00161
    weight -0.0009794 -0.001251 -0.0009794
    horsepower.weight 5.02e-06 4.297e-06 5.02e-06
    horsepower.modelYear -0.001309 -0.001587 -0.001309
    weight.acceleration -9.631e-06 -1.255e-05 -9.631e-06
    displacement.horsepower 5.467e-05 3.497e-05 5.467e-05
    horsepower.acceleration -0.002275 -0.002606 -0.002275
    acceleration.origin2 0.3402 0.2357 0.3402
    modelYear.origin2 -0.06298 -0.08743 -0.06298
    cylinders.horsepower 0.001615 0.0005087 0.001615
    displacement.acceleration -0.000126 -0.0002186 -0.000126
    horsepower 0.03868 0.009249 0.03868
    origin3 0.5101 0.4263 0.5101
    Table continues below
      upper u.MSE r.MSE model.MSE
    cylinders -2.33 24.02 8.81 7.42
    weight.modelYear -8.909e-05 24.45 10.98 7.425
    cylinders.weight 0.001028 21.15 9.188 7.443
    acceleration.modelYear 0.0689 44.17 16.16 7.433
    acceleration -3.121 49.87 14.72 7.433
    modelYear 0.5424 40.28 12.07 7.862
    displacement -0.006752 21.37 8.79 7.999
    cylinders.modelYear -0.001453 28.95 10.34 7.889
    weight 0.002672 18.68 9.49 7.84
    horsepower.weight 5.743e-06 23.07 9.6 7.802
    horsepower.modelYear -0.0003144 26.53 9.055 7.93
    weight.acceleration -6.709e-06 40.06 9.038 8.001
    displacement.horsepower 8.612e-05 26.73 8.685 7.995
    horsepower.acceleration -0.001079 22.66 7.694 7.425
    acceleration.origin2 0.524 56.65 7.694 7.425
    modelYear.origin2 -0.02184 56.88 7.612 7.425
    cylinders.horsepower 0.002846 24.11 8.159 7.986
    displacement.acceleration -3.348e-05 24.15 8.142 7.992
    horsepower 0.06812 23.94 8.129 7.991
    origin3 0.9212 48.38 7.461 7.348
    Table continues below
      NeRI F.pvalue t.pvalue Sign.pvalue
    cylinders 0.1987 0 1.144e-14 3.931e-05
    weight.modelYear 0.273 0 3.794e-21 1.258e-08
    cylinders.weight 0.2191 0 3.621e-16 6.326e-06
    acceleration.modelYear 0.3406 0 5.268e-33 8.562e-13
    acceleration 0.3385 0 5.661e-30 1.053e-12
    modelYear 0.192 0 1.295e-17 3.674e-06
    displacement 0.1005 0 5.614e-07 0.01604
    cylinders.modelYear 0.2347 0 1.396e-19 1.954e-06
    weight 0.07732 0 1.381e-07 0.004591
    horsepower.weight 0.2122 0 2.541e-13 3.835e-06
    horsepower.modelYear 0.1516 0 2.398e-08 0.0007096
    weight.acceleration 0.1722 4.159e-12 5.729e-11 0.0003264
    displacement.horsepower 0.07096 7.945e-09 2.971e-07 0.06962
    horsepower.acceleration 0.0852 9.277e-05 0.0005799 0.04781
    acceleration.origin2 0.09337 0.0001093 0.01503 0.03307
    modelYear.origin2 0.05791 0.001041 0.01281 0.1041
    cylinders.horsepower 0.0676 0.001938 0.0003176 0.09702
    displacement.acceleration 0.05466 0.003535 0.0003285 0.1247
    horsepower 0.06633 0.004802 0.0749 0.09635
    origin3 0.04945 0.007356 0.07907 0.1409
      Wilcox.pvalue Frequency
    cylinders 2.223e-07 0.95
    weight.modelYear 2.498e-11 1
    cylinders.weight 1.642e-08 1
    acceleration.modelYear 1.924e-17 1
    acceleration 4.106e-16 1
    modelYear 3.258e-09 0.8
    displacement 0.0007943 0.5
    cylinders.modelYear 7.43e-10 0.05
    weight 0.000249 0.65
    horsepower.weight 1.535e-07 0.25
    horsepower.modelYear 4.927e-05 0.7
    weight.acceleration 5.296e-06 0.2
    displacement.horsepower 0.001404 0.55
    horsepower.acceleration 0.006558 1
    acceleration.origin2 0.03225 1
    modelYear.origin2 0.0424 1
    cylinders.horsepower 0.01142 0.6
    displacement.acceleration 0.008035 0.35
    horsepower 0.09438 0.5
    origin3 0.1676 0.65
  • MSE: 7.267
  • R2: 0.8807
  • bootstrap:


topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
shortformulaNetwork <- BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat]
validf <- diag(shortformulaNetwork) > 0.1
gplots::heatmap.2(shortformulaNetwork[validf,validf],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network")



rm <- rowMeans(cp$featureSelectionFrequency[,c("BSWiMS","LASSO","RPART","RF.ref","W-Test","Kendall","mRMR")])
selFrequency <- cp$featureSelectionFrequency[rm > 0.10,]


gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.6)

hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.65,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ,caption = "Features",round = 4)
Features (continued below)
  cohortMean cohortStd kendall.r kendall.p
modelYear 75.98 3.684 0.4152 0
weight 2978 849.4 -0.6942 0
horsepower.acceleration 1551 394.7 -0.651 0
acceleration.modelYear 1184 231.2 0.3735 0
displacement 194.4 104.6 -0.6786 0
weight.modelYear 225271 61062 -0.605 0
displacement.weight 661596 523666 -0.6913 0
horsepower 104.5 38.49 -0.6792 0
displacement.horsepower 23915 21679 -0.7062 0
horsepower.weight 339260 220228 -0.7162 0
displacement.acceleration 2865 1287 -0.626 0
horsepower.modelYear 7879 2714 -0.6166 0
cylinders.modelYear 413.6 122.7 -0.4129 0
cylinders.weight 17590 10218 -0.6943 0
cylinders 5.472 1.706 -0.6874 0
displacement.modelYear 14629 7573 -0.6275 0
cylinders.horsepower 626.9 419.8 -0.7042 0
cylinders.displacement 1233 998.8 -0.6786 0
weight.acceleration 45301 12080 -0.4626 0
acceleration.origin3 3.259 6.554 0.37 0
modelYear.origin3 15.61 31.15 0.3793 0
acceleration 15.54 2.759 0.3031 0
cylinders.origin3 0.8265 1.668 0.3664 0
horsepower.origin3 16.09 33.04 0.3173 0
cylinders.acceleration 82.67 21.88 -0.4497 0
origin3 313 79 0.3718 0
displacement.origin3 20.7 42.53 0.3335 0
acceleration.origin2 2.913 6.494 0.2041 0
weight.origin3 447.6 903.6 0.3235 0
horsepower.origin2 13.97 31.66 0.1727 0
cylinders.origin2 0.7219 1.592 0.2026 0
displacement.origin2 19.02 42.61 0.1785 0
weight.origin2 422.1 944.8 0.1767 0
modelYear.origin2 13.13 28.73 0.2163 0
origin2 324 68 0.209 0
  Frequency
modelYear 0.9429
weight 0.8286
horsepower.acceleration 0.7714
acceleration.modelYear 0.7714
displacement 0.7143
weight.modelYear 0.7143
displacement.weight 0.6857
horsepower 0.6857
displacement.horsepower 0.6857
horsepower.weight 0.6286
displacement.acceleration 0.6286
horsepower.modelYear 0.6286
cylinders.modelYear 0.6286
cylinders.weight 0.6
cylinders 0.6
displacement.modelYear 0.5714
cylinders.horsepower 0.5714
cylinders.displacement 0.5429
weight.acceleration 0.5429
acceleration.origin3 0.5143
modelYear.origin3 0.4857
acceleration 0.4571
cylinders.origin3 0.4
horsepower.origin3 0.4
cylinders.acceleration 0.3714
origin3 0.3714
displacement.origin3 0.3714
acceleration.origin2 0.3714
weight.origin3 0.3429
horsepower.origin2 0.3429
cylinders.origin2 0.3143
displacement.origin2 0.3143
weight.origin2 0.3143
modelYear.origin2 0.2857
origin2 0.2286