Libraries

library(FRESA.CAD)
library("mlbench")
library("ggplot2")
library(pander)
library(beepr)
library(twosamples)


models <-c(BSWiMS.model,NAIVE_BAYES,LASSO_1SE,LASSO_MIN,GLMNET_RIDGE_MIN,GLMNET_ELASTICNET_MIN)
modelsnames <- c("BSWiMS.model","NAIVE_BAYES","LASSO_1SE","LASSO_MIN",
                 "GLMNET_RIDGE_MIN","GLMNET_ELASTICNET_MIN")

Sonar Sonar

data(Sonar, package = "mlbench")
Sonar$Class <- 1*(Sonar$Class == "M")

20x cv using 70% training and 30% holdout (for LC models)

lc.cvlist <- list()
lc.filteredFitcv <- randomCV_V3(Sonar,
    "Class",
    HLCM_EM,
    trainFraction = 0.7,
    repetitions = 20,
    method = filteredFit,
    hysteresis=0.1,
    fitmethod=glm,
    filtermethod=univariate_BinEnsemble,
    filtermethod.control = list(pvalue=0.05),
    family = "binomial")
lc.cvlist[["LC_filteredFit"]] <-lc.filteredFitcv           
i=1 #

for (model in models){
  modelname= paste0("LC_",modelsnames[i])
  
  cv <- randomCV_V3(Sonar,
                "Class",
                HLCM_EM,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets,
                method = model,
                hysteresis=0.1)
  
  lc.cvlist[[modelname]] <-cv
  i = i+1
                    
  }
save(lc.cvlist, file = "lc.cvlist.RData")

20x cv using 70% training and 30% holdout (for vanilla models)

cvlist <- list()
filteredFitcv <- randomCV(Sonar,
               "Class",
                filteredFit,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets,
                fitmethod=glm,
                filtermethod=univariate_BinEnsemble,
                filtermethod.control = list(pvalue=0.05),
                family = "binomial")

cvlist[["filteredFit"]] <-filteredFitcv           

save(filteredFitcv, file = "filteredFitcv.RData")

i=1 #starts from filteredfit
for (model in models){
  modelname= modelsnames[i]
  #beep()
  cv <- randomCV(Sonar,
                "Class",
                model,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets)
  
  cvlist[[modelname]] <-cv
  i = i+1
                    
  }
save(cvlist, file = "cvlist.RData")

ROC plots (latent class AUC vs vanilla AUC)

par(mfrow = c(1,2), cex = 1)#combine and adapt the cvlists into one combided 
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)
cp.combined <- BinaryBenchmark(referenceCV = combined.cvlist)

save(cp.combined, file = "cp.combined.RData")

AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CVs

load("lc.cvlist.RData")
load("cvlist.RData")
load("cp.combined.RData")

#combine and adapt the cvlists into one combided 
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)

sonar_auctable <- get.combined_auctable(cp.combined,lc.cvlist)
pander::pander(sonar_auctable,
               caption = "AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CV",round = 3)
AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CV (continued below)
  AUC CI LC_AUC LC_CI
filteredFit 0.84 [0.787,0.894] 0.837 [0.782,0.892]
BSWiMS.model 0.841 [0.786,0.896] 0.883 [0.839,0.928]
NAIVE_BAYES 0.82 [0.762,0.878] 0.82 [0.762,0.878]
LASSO_1SE 0.79 [0.729,0.851] 0.829 [0.771,0.888]
LASSO_MIN 0.849 [0.797,0.901] 0.859 [0.807,0.91]
GLMNET_RIDGE_MIN 0.874 [0.827,0.921] 0.903 [0.86,0.945]
GLMNET_ELASTICNET_MIN 0.875 [0.828,0.922] 0.903 [0.86,0.945]
RF 0.932 [0.901,0.963] - -
SVM 0.865 [0.813,0.916] - -
Table continues below
  train mean obs. LC1, LC2, LC3
filteredFit 132 (92%), 12 (8%), 0 (0%)
BSWiMS.model 104 (72%), 40 (28%), 0 (0%)
NAIVE_BAYES 144 (100%), 0 (0%), 0 (0%)
LASSO_1SE 106 (74%), 31 (22%), 7 (5%)
LASSO_MIN 144 (100%), 0 (0%), 0 (0%)
GLMNET_RIDGE_MIN 144 (100%), 0 (0%), 0 (0%)
GLMNET_ELASTICNET_MIN 144 (100%), 0 (0%), 0 (0%)
RF -
SVM -
  test mean obs. LC1, LC2, LC3
filteredFit 59 (92%), 5 (8%), 0 (0%)
BSWiMS.model 45 (70%), 19 (30%), 0 (0%)
NAIVE_BAYES 64 (100%), 0 (0%), 0 (0%)
LASSO_1SE 49 (77%), 12 (19%), 3 (5%)
LASSO_MIN 64 (100%), 0 (0%), 0 (0%)
GLMNET_RIDGE_MIN 64 (100%), 0 (0%), 0 (0%)
GLMNET_ELASTICNET_MIN 64 (100%), 0 (0%), 0 (0%)
RF -
SVM -
write.csv(sonar_auctable, "sonar_auctable.csv")

Statitistics to acces the difference between the classes found by the LC scheme

modelsnames <- c("filteredFit",modelsnames)

result.stats <- get.lc.statistics(lc.cvlist, Sonar, modelsnames)
## [1] 1
## [1] "RandomHOCV"
## [1] 1
## [1] 2
## [1] "RandomHOCV"
## [1] 2
## [1] 3
## [1] "RandomHOCV"
## [1] 3
## [1] 4
## [1] "RandomHOCV"
## [1] 4
## [1] 5
## [1] "RandomHOCV"
## [1] 5
## [1] 6
## [1] "RandomHOCV"
## [1] 6
## [1] 7
## [1] "RandomHOCV"
## [1] 7
pander::pander(result.stats$concat.table,
               caption = "compressed table of statistical significant features per test per method")
compressed table of statistical significant features per test per method (continued below)
  filteredFit BSWiMS.model NAIVE_BAYES LASSO_1SE LASSO_MIN
KS 0 17/61 0 30/61 0
DTS 0 20/61 0 34/61 0
Wilcox 0 19/61 0 32/61 0
  GLMNET_RIDGE_MIN GLMNET_ELASTICNET_MIN
KS 0 0
DTS 0 0
Wilcox 0 0
write.csv(result.stats$concat.table,"concat.table.csv")

ks.list <- result.stats$ks.list
dts.list <- result.stats$dts.list
wilcox.list <- result.stats$wilcox.list

save(result.stats, file = "result.stats.RData")
save(ks.list, file ="ks.list.RData")
save(dts.list, file ="dts.list.RData")
save(wilcox.list , file ="wilcox.list.RData")
par(mfrow = c(1,1),
    cex = 0.7,
    xpd = T, 
    pty = 'm', #maximal plotting region
    mar = c(3,3,3,10))

cp.combined <-trim.cp(cp.combined)

prBenchmark <- plot(cp.combined)

Perfomance metrics of LC CV

pander::pander(prBenchmark$metrics,
               caption = "Lc vs vanilla Classifier Performance",round = 3)
Lc vs vanilla Classifier Performance (continued below)
  RF SVM LC_BSWiMS.model filteredFit BSWiMS.model
BER 0.161 0.212 0.222 0.231 0.234
ACC 0.841 0.787 0.778 0.768 0.763
AUC 0.932 0.865 0.883 0.84 0.841
SEN 0.874 0.802 0.784 0.784 0.748
SPE 0.802 0.771 0.771 0.75 0.781
CIDX 0.926 0.851 0.863 0.822 0.818
Table continues below
  LC_filteredFit LC_LASSO_1SE LC_LASSO_MIN LC_GLMNET_RIDGE_MIN
BER 0.235 0.252 0.257 0.259
ACC 0.763 0.739 0.758 0.758
AUC 0.837 0.829 0.859 0.903
SEN 0.757 0.64 0.964 0.982
SPE 0.771 0.854 0.521 0.5
CIDX 0.812 0.82 0.835 0.877
Table continues below
  LC_GLMNET_ELASTICNET_MIN NAIVE_BAYES LC_NAIVE_BAYES LASSO_1SE
BER 0.26 0.264 0.265 0.35
ACC 0.758 0.725 0.725 0.667
AUC 0.903 0.82 0.82 0.79
SEN 0.982 0.595 0.595 0.865
SPE 0.5 0.875 0.875 0.438
CIDX 0.874 0.717 0.719 0.81
  LASSO_MIN GLMNET_ELASTICNET_MIN GLMNET_RIDGE_MIN
BER 0.47 0.474 0.475
ACC 0.565 0.56 0.56
AUC 0.849 0.875 0.874
SEN 1 1 1
SPE 0.062 0.052 0.052
CIDX 0.82 0.856 0.856
sonar_metrics <- data.frame(prBenchmark$metrics)

write.csv(sonar_metrics,"sonar_metrics.csv")
i=1
#par(mfrow=c(1,1))

for (i in 1:length(lc.cvlist)) {
  lc.sets <- split_df_into_lc.sets(lc.cvlist[[i]],Sonar)
  #3 plots per set (per method)
  #Sonar,lc.sets,list,methodname,modelname,Sonarname)
  plot_ssf_boxplot(Sonar,lc.sets,wilcox.list[[i]],"Wilcoxon",modelsnames[i],"Sonar")
  plot_ssf_boxplot(Sonar,lc.sets,ks.list[[i]],"KS",modelsnames[i],"Sonar")
  plot_ssf_boxplot(Sonar,lc.sets,dts.list[[i]],"DTS",modelsnames[i],"Sonar")
}
## [1] "RandomHOCV"
## [1] "RandomHOCV"

## [1] "RandomHOCV"
## [1] "RandomHOCV"

## [1] "RandomHOCV"
## [1] "RandomHOCV"
## [1] "RandomHOCV"
beep(3)
beep()