R Notebook

Libraries

library(FRESA.CAD)
library("mlbench")
library("ggplot2")
library(pander)
library(beepr)
library(twosamples)


models <-c(BSWiMS.model,NAIVE_BAYES,LASSO_1SE,LASSO_MIN,GLMNET_RIDGE_MIN,GLMNET_ELASTICNET_MIN)
modelsnames <- c("BSWiMS.model","NAIVE_BAYES","LASSO_1SE","LASSO_MIN",
                 "GLMNET_RIDGE_MIN","GLMNET_ELASTICNET_MIN")

Sonar Sonar

data(Sonar, package = "mlbench")
Sonar$Class <- 1*(Sonar$Class == "M")

20x cv using 70% training and 30% holdout (for LC models)

lc.cvlist <- list()
lc.filteredFitcv <- randomCV_V3(Sonar,
    "Class",
    HLCM_EM,
    trainFraction = 0.7,
    repetitions = 20,
    method = filteredFit,
    hysteresis=0.1,
    fitmethod=glm,
    filtermethod=univariate_BinEnsemble,
    filtermethod.control = list(pvalue=0.05),
    family = "binomial")
lc.cvlist[["LC_filteredFit"]] <-lc.filteredFitcv           
i=1 #

for (model in models){
  modelname= paste0("LC_",modelsnames[i])
  
  cv <- randomCV_V3(Sonar,
                "Class",
                HLCM_EM,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets,
                method = model,
                hysteresis=0.1)
  
  lc.cvlist[[modelname]] <-cv
  i = i+1
                    
  }
save(lc.cvlist, file = "lc.cvlist.RData")

20x cv using 70% training and 30% holdout (for vanilla models)

cvlist <- list()
filteredFitcv <- randomCV(Sonar,
               "Class",
                filteredFit,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets,
                fitmethod=glm,
                filtermethod=univariate_BinEnsemble,
                filtermethod.control = list(pvalue=0.05),
                family = "binomial")

cvlist[["filteredFit"]] <-filteredFitcv           

save(filteredFitcv, file = "filteredFitcv.RData")

i=1 #starts from filteredfit
for (model in models){
  modelname= modelsnames[i]
  #beep()
  cv <- randomCV(Sonar,
                "Class",
                model,
                trainSampleSets  = lc.filteredFitcv$trainSamplesSets)
  
  cvlist[[modelname]] <-cv
  i = i+1
                    
  }
save(cvlist, file = "cvlist.RData")

ROC plots (latent class AUC vs vanilla AUC)

par(mfrow = c(1,2), cex = 1)#combine and adapt the cvlists into one combided 
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)
cp.combined <- BinaryBenchmark(referenceCV = combined.cvlist)

save(cp.combined, file = "cp.combined.RData")

AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CVs

load("lc.cvlist.RData")
load("cvlist.RData")
load("cp.combined.RData")

#combine and adapt the cvlists into one combided 
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)

sonar_auctable <- get.combined_auctable(cp.combined,lc.cvlist)
pander::pander(sonar_auctable,
               caption = "AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CV",round = 3)

AUC table, vanilla vs latent class classifier and mean proportion of classes formed during the CV (continued below)
	AUC	CI	LC_AUC	LC_CI
filteredFit	0.84	[0.787,0.894]	0.837	[0.782,0.892]
BSWiMS.model	0.841	[0.786,0.896]	0.883	[0.839,0.928]
NAIVE_BAYES	0.82	[0.762,0.878]	0.82	[0.762,0.878]
LASSO_1SE	0.79	[0.729,0.851]	0.829	[0.771,0.888]
LASSO_MIN	0.849	[0.797,0.901]	0.859	[0.807,0.91]
GLMNET_RIDGE_MIN	0.874	[0.827,0.921]	0.903	[0.86,0.945]
GLMNET_ELASTICNET_MIN	0.875	[0.828,0.922]	0.903	[0.86,0.945]
RF	0.932	[0.901,0.963]	-	-
SVM	0.865	[0.813,0.916]	-	-

Table continues below
	train mean obs. LC1, LC2, LC3
filteredFit	132 (92%), 12 (8%), 0 (0%)
BSWiMS.model	104 (72%), 40 (28%), 0 (0%)
NAIVE_BAYES	144 (100%), 0 (0%), 0 (0%)
LASSO_1SE	106 (74%), 31 (22%), 7 (5%)
LASSO_MIN	144 (100%), 0 (0%), 0 (0%)
GLMNET_RIDGE_MIN	144 (100%), 0 (0%), 0 (0%)
GLMNET_ELASTICNET_MIN	144 (100%), 0 (0%), 0 (0%)
RF	-
SVM	-

	test mean obs. LC1, LC2, LC3
filteredFit	59 (92%), 5 (8%), 0 (0%)
BSWiMS.model	45 (70%), 19 (30%), 0 (0%)
NAIVE_BAYES	64 (100%), 0 (0%), 0 (0%)
LASSO_1SE	49 (77%), 12 (19%), 3 (5%)
LASSO_MIN	64 (100%), 0 (0%), 0 (0%)
GLMNET_RIDGE_MIN	64 (100%), 0 (0%), 0 (0%)
GLMNET_ELASTICNET_MIN	64 (100%), 0 (0%), 0 (0%)
RF	-
SVM	-

write.csv(sonar_auctable, "sonar_auctable.csv")

Statitistics to acces the difference between the classes found by the LC scheme

modelsnames <- c("filteredFit",modelsnames)

result.stats <- get.lc.statistics(lc.cvlist, Sonar, modelsnames)

## [1] 1
## [1] "RandomHOCV"
## [1] 1
## [1] 2
## [1] "RandomHOCV"
## [1] 2
## [1] 3
## [1] "RandomHOCV"
## [1] 3
## [1] 4
## [1] "RandomHOCV"
## [1] 4
## [1] 5
## [1] "RandomHOCV"
## [1] 5
## [1] 6
## [1] "RandomHOCV"
## [1] 6
## [1] 7
## [1] "RandomHOCV"
## [1] 7

pander::pander(result.stats$concat.table,
               caption = "compressed table of statistical significant features per test per method")

compressed table of statistical significant features per test per method (continued below)
	BSWiMS.model	LASSO_1SE
KS	17/61	30/61
DTS	20/61	34/61
Wilcox	19/61	32/61

	GLMNET_RIDGE_MIN	GLMNET_ELASTICNET_MIN
KS	0	0
DTS	0	0
Wilcox	0	0

write.csv(result.stats$concat.table,"concat.table.csv")

ks.list <- result.stats$ks.list
dts.list <- result.stats$dts.list
wilcox.list <- result.stats$wilcox.list

save(result.stats, file = "result.stats.RData")
save(ks.list, file ="ks.list.RData")
save(dts.list, file ="dts.list.RData")
save(wilcox.list , file ="wilcox.list.RData")

par(mfrow = c(1,1),
    cex = 0.7,
    xpd = T, 
    pty = 'm', #maximal plotting region
    mar = c(3,3,3,10))

cp.combined <-trim.cp(cp.combined)

prBenchmark <- plot(cp.combined)

Perfomance metrics of LC CV

pander::pander(prBenchmark$metrics,
               caption = "Lc vs vanilla Classifier Performance",round = 3)

Lc vs vanilla Classifier Performance (continued below)
	RF	SVM	LC_BSWiMS.model	filteredFit	BSWiMS.model
BER	0.161	0.212	0.222	0.231	0.234
ACC	0.841	0.787	0.778	0.768	0.763
AUC	0.932	0.865	0.883	0.84	0.841
SEN	0.874	0.802	0.784	0.784	0.748
SPE	0.802	0.771	0.771	0.75	0.781
CIDX	0.926	0.851	0.863	0.822	0.818

Table continues below
	LC_filteredFit	LC_LASSO_1SE	LC_LASSO_MIN	LC_GLMNET_RIDGE_MIN
BER	0.235	0.252	0.257	0.259
ACC	0.763	0.739	0.758	0.758
AUC	0.837	0.829	0.859	0.903
SEN	0.757	0.64	0.964	0.982
SPE	0.771	0.854	0.521	0.5
CIDX	0.812	0.82	0.835	0.877

Table continues below
	LC_GLMNET_ELASTICNET_MIN	NAIVE_BAYES	LC_NAIVE_BAYES	LASSO_1SE
BER	0.26	0.264	0.265	0.35
ACC	0.758	0.725	0.725	0.667
AUC	0.903	0.82	0.82	0.79
SEN	0.982	0.595	0.595	0.865
SPE	0.5	0.875	0.875	0.438
CIDX	0.874	0.717	0.719	0.81

	LASSO_MIN	GLMNET_ELASTICNET_MIN	GLMNET_RIDGE_MIN
BER	0.47	0.474	0.475
ACC	0.565	0.56	0.56
AUC	0.849	0.875	0.874
SEN	1	1	1
SPE	0.062	0.052	0.052
CIDX	0.82	0.856	0.856

sonar_metrics <- data.frame(prBenchmark$metrics)

write.csv(sonar_metrics,"sonar_metrics.csv")

i=1
#par(mfrow=c(1,1))

for (i in 1:length(lc.cvlist)) {
  lc.sets <- split_df_into_lc.sets(lc.cvlist[[i]],Sonar)
  #3 plots per set (per method)
  #Sonar,lc.sets,list,methodname,modelname,Sonarname)
  plot_ssf_boxplot(Sonar,lc.sets,wilcox.list[[i]],"Wilcoxon",modelsnames[i],"Sonar")
  plot_ssf_boxplot(Sonar,lc.sets,ks.list[[i]],"KS",modelsnames[i],"Sonar")
  plot_ssf_boxplot(Sonar,lc.sets,dts.list[[i]],"DTS",modelsnames[i],"Sonar")
}

## [1] "RandomHOCV"
## [1] "RandomHOCV"

## [1] "RandomHOCV"
## [1] "RandomHOCV"

## [1] "RandomHOCV"
## [1] "RandomHOCV"
## [1] "RandomHOCV"

beep(3)
beep()