Libraries
library(FRESA.CAD)
library("mlbench")
library("ggplot2")
library(pander)
library(beepr)
library(twosamples)
models <-c(BSWiMS.model,NAIVE_BAYES,LASSO_1SE,LASSO_MIN,GLMNET_RIDGE_MIN,GLMNET_ELASTICNET_MIN)
modelsnames <- c("BSWiMS.model","NAIVE_BAYES","LASSO_1SE","LASSO_MIN",
"GLMNET_RIDGE_MIN","GLMNET_ELASTICNET_MIN")
Sonar Sonar
data(Sonar, package = "mlbench")
Sonar$Class <- 1*(Sonar$Class == "M")
20x cv using 70% training and 30% holdout (for LC models)
lc.cvlist <- list()
lc.filteredFitcv <- randomCV_V3(Sonar,
"Class",
HLCM_EM,
trainFraction = 0.7,
repetitions = 20,
method = filteredFit,
hysteresis=0.1,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
lc.cvlist[["LC_filteredFit"]] <-lc.filteredFitcv
i=1 #
for (model in models){
modelname= paste0("LC_",modelsnames[i])
cv <- randomCV_V3(Sonar,
"Class",
HLCM_EM,
trainSampleSets = lc.filteredFitcv$trainSamplesSets,
method = model,
hysteresis=0.1)
lc.cvlist[[modelname]] <-cv
i = i+1
}
save(lc.cvlist, file = "lc.cvlist.RData")
20x cv using 70% training and 30% holdout (for vanilla models)
cvlist <- list()
filteredFitcv <- randomCV(Sonar,
"Class",
filteredFit,
trainSampleSets = lc.filteredFitcv$trainSamplesSets,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
cvlist[["filteredFit"]] <-filteredFitcv
save(filteredFitcv, file = "filteredFitcv.RData")
i=1 #starts from filteredfit
for (model in models){
modelname= modelsnames[i]
#beep()
cv <- randomCV(Sonar,
"Class",
model,
trainSampleSets = lc.filteredFitcv$trainSamplesSets)
cvlist[[modelname]] <-cv
i = i+1
}
save(cvlist, file = "cvlist.RData")
ROC plots (latent class AUC vs vanilla AUC)
par(mfrow = c(1,2), cex = 1)#combine and adapt the cvlists into one combided
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)
cp.combined <- BinaryBenchmark(referenceCV = combined.cvlist)
save(cp.combined, file = "cp.combined.RData")
Statitistics to acces the difference between the classes found by the LC scheme
modelsnames <- c("filteredFit",modelsnames)
result.stats <- get.lc.statistics(lc.cvlist, Sonar, modelsnames)
## [1] 1
## [1] "RandomHOCV"
## [1] 1
## [1] 2
## [1] "RandomHOCV"
## [1] 2
## [1] 3
## [1] "RandomHOCV"
## [1] 3
## [1] 4
## [1] "RandomHOCV"
## [1] 4
## [1] 5
## [1] "RandomHOCV"
## [1] 5
## [1] 6
## [1] "RandomHOCV"
## [1] 6
## [1] 7
## [1] "RandomHOCV"
## [1] 7
pander::pander(result.stats$concat.table,
caption = "compressed table of statistical significant features per test per method")
compressed table of statistical significant features per test per method (continued below)
| KS |
0 |
17/61 |
0 |
30/61 |
0 |
| DTS |
0 |
20/61 |
0 |
34/61 |
0 |
| Wilcox |
0 |
19/61 |
0 |
32/61 |
0 |
| KS |
0 |
0 |
| DTS |
0 |
0 |
| Wilcox |
0 |
0 |
write.csv(result.stats$concat.table,"concat.table.csv")
ks.list <- result.stats$ks.list
dts.list <- result.stats$dts.list
wilcox.list <- result.stats$wilcox.list
save(result.stats, file = "result.stats.RData")
save(ks.list, file ="ks.list.RData")
save(dts.list, file ="dts.list.RData")
save(wilcox.list , file ="wilcox.list.RData")
par(mfrow = c(1,1),
cex = 0.7,
xpd = T,
pty = 'm', #maximal plotting region
mar = c(3,3,3,10))
cp.combined <-trim.cp(cp.combined)
prBenchmark <- plot(cp.combined)








Perfomance metrics of LC CV
pander::pander(prBenchmark$metrics,
caption = "Lc vs vanilla Classifier Performance",round = 3)
Lc vs vanilla Classifier Performance (continued below)
| BER |
0.161 |
0.212 |
0.222 |
0.231 |
0.234 |
| ACC |
0.841 |
0.787 |
0.778 |
0.768 |
0.763 |
| AUC |
0.932 |
0.865 |
0.883 |
0.84 |
0.841 |
| SEN |
0.874 |
0.802 |
0.784 |
0.784 |
0.748 |
| SPE |
0.802 |
0.771 |
0.771 |
0.75 |
0.781 |
| CIDX |
0.926 |
0.851 |
0.863 |
0.822 |
0.818 |
Table continues below
| BER |
0.235 |
0.252 |
0.257 |
0.259 |
| ACC |
0.763 |
0.739 |
0.758 |
0.758 |
| AUC |
0.837 |
0.829 |
0.859 |
0.903 |
| SEN |
0.757 |
0.64 |
0.964 |
0.982 |
| SPE |
0.771 |
0.854 |
0.521 |
0.5 |
| CIDX |
0.812 |
0.82 |
0.835 |
0.877 |
Table continues below
| BER |
0.26 |
0.264 |
0.265 |
0.35 |
| ACC |
0.758 |
0.725 |
0.725 |
0.667 |
| AUC |
0.903 |
0.82 |
0.82 |
0.79 |
| SEN |
0.982 |
0.595 |
0.595 |
0.865 |
| SPE |
0.5 |
0.875 |
0.875 |
0.438 |
| CIDX |
0.874 |
0.717 |
0.719 |
0.81 |
| BER |
0.47 |
0.474 |
0.475 |
| ACC |
0.565 |
0.56 |
0.56 |
| AUC |
0.849 |
0.875 |
0.874 |
| SEN |
1 |
1 |
1 |
| SPE |
0.062 |
0.052 |
0.052 |
| CIDX |
0.82 |
0.856 |
0.856 |
sonar_metrics <- data.frame(prBenchmark$metrics)
write.csv(sonar_metrics,"sonar_metrics.csv")
i=1
#par(mfrow=c(1,1))
for (i in 1:length(lc.cvlist)) {
lc.sets <- split_df_into_lc.sets(lc.cvlist[[i]],Sonar)
#3 plots per set (per method)
#Sonar,lc.sets,list,methodname,modelname,Sonarname)
plot_ssf_boxplot(Sonar,lc.sets,wilcox.list[[i]],"Wilcoxon",modelsnames[i],"Sonar")
plot_ssf_boxplot(Sonar,lc.sets,ks.list[[i]],"KS",modelsnames[i],"Sonar")
plot_ssf_boxplot(Sonar,lc.sets,dts.list[[i]],"DTS",modelsnames[i],"Sonar")
}
## [1] "RandomHOCV"
## [1] "RandomHOCV"



## [1] "RandomHOCV"
## [1] "RandomHOCV"



## [1] "RandomHOCV"
## [1] "RandomHOCV"
## [1] "RandomHOCV"
beep(3)
beep()