Libraries
library(pander)
library(FRESA.CAD)
library("mlbench")
library(dplyr)
library(beepr)
library(twosamples)
models <-c(BSWiMS.model,NAIVE_BAYES,LASSO_1SE,LASSO_MIN,GLMNET_RIDGE_MIN,GLMNET_ELASTICNET_MIN)
modelsnames <- c("BSWiMS.model","NAIVE_BAYES","LASSO_1SE","LASSO_MIN",
"GLMNET_RIDGE_MIN","GLMNET_ELASTICNET_MIN")
20x cv using 70% training and 30% holdout (for LC models)
lc.cvlist <- list()
lc.filteredFitcv <- randomCV_V3(theDataset,
theOutcome,
HLCM_EM,
trainFraction = 0.7,
repetitions = 5,
method = filteredFit,
hysteresis=0.1,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
save(lc.filteredFitcv,file="LC_filteredFitcv.RDATA")
lc.cvlist[["LC_filteredFit"]] <-lc.filteredFitcv
i=1 #
for (model in models){
modelname= paste0("LC_",modelname)
cv <- randomCV_V3(theDataset,
theOutcome,
HLCM_EM,
trainSampleSets = lc.filteredFitcv$trainSamplesSets,
method = model,
hysteresis=0.1)
save(cv,file=paste0(modelname,"cv.RDATA"))
lc.cvlist[[modelname]] <-cv
i = i+1
}
save(lc.cvlist, file = "lc.cvlist.RData")
cvlist <- list()
i=1 #starts from filteredfit
filteredFitcv <- randomCV(AdjustedFrame,
"theClass",
filteredFit,
trainFraction = 0.7,
repetitions = 20,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
save(filteredFitcv,file="filteredFitcv.RDATA")
cvlist[["filteredFit"]] <-filteredFitcv
for (model in models){
modelname= modelsnames[i]
cv <- randomCV(theDataset,
theOutcome,
model,
trainSampleSets = filteredFitcv$trainSamplesSets)
save(cv,file=paste0(modelname,"cv.RDATA"))
cvlist[[modelname]] <-cv
i = i+1
}
save(cvlist, file = "cvlist.RData")
ROC plots (latent class AUC vs vanilla AUC)
par(mfrow = c(1,2),cex = 0.5);
#combine and adapt the cvlists into one combided
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)
cp.combined <- BinaryBenchmark(referenceCV = combined.cvlist)
save(cp.combined, file = "cp.combined.RData")
Statitistics to acces the difference between the classes found by the LC scheme
ks.list = list()
dts.list = list()
wilcox.list = list()
concat.table = data.frame(matrix(ncol=length(modelsnames)+1,nrow=0,
dimnames=
list(NULL, c("filteredFit",modelsnames))))
load("lc.sets.RData")
result.stats <- get.lc.statistics(lc.sets, AdjustedFrame)
save(result.stats, file = "result.stats.RData")
#load("result.stats.RData")
pander::pander(result.stats$concat.table,
caption = "compressed table of statistical significant features per test per method")
compressed table of statistical significant features per test per method (continued below)
| KS |
168/352 |
128/352 |
0 |
0/352 |
0/352 |
| DTS |
136/352 |
89/352 |
0 |
0/352 |
0/352 |
| Wilcox |
169/352 |
138/352 |
0 |
0/352 |
0/352 |
| KS |
0 |
0/352 |
| DTS |
0 |
0/352 |
| Wilcox |
0 |
0/352 |
write.csv(result.stats$concat.table,"concat.table.csv")
ks.list <- result.stats$ks.list
pander::pander(ks.list[[1]],
caption = "statistical significant features using KS test", round = 3)
statistical significant features using KS test
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.007 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.03 |
| 0.007 |
| 0 |
| 0.002 |
| 0 |
| 0.003 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.017 |
| 0 |
| 0.036 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.015 |
| 0.02 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.003 |
| 0.006 |
| 0.01 |
| 0.035 |
| 0 |
| 0 |
| 0.043 |
| 0.003 |
| 0.009 |
| 0.023 |
| 0.019 |
| 0.011 |
| 0.019 |
| 0.01 |
| 0.002 |
| 0 |
| 0 |
| 0.026 |
| 0 |
| 0.014 |
| 0.016 |
| 0.013 |
| 0 |
| 0.021 |
| 0.016 |
| 0 |
| 0 |
| 0.001 |
| 0.018 |
| 0.012 |
| 0.002 |
| 0 |
| 0.01 |
| 0 |
| 0.002 |
| 0.043 |
| 0.041 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.001 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.007 |
| 0.016 |
| 0.02 |
| 0.002 |
| 0.009 |
| 0 |
| 0.005 |
| 0.002 |
| 0.009 |
| 0 |
| 0.011 |
| 0.001 |
| 0.015 |
| 0.018 |
| 0.034 |
| 0 |
| 0.014 |
| 0.001 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
dts.list <- result.stats$dts.list
pander::pander(dts.list[[1]],
caption = "statistical significant features using DTS test", round = 3)
statistical significant features using DTS test
| 0.004 |
| 0.022 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.006 |
| 0.047 |
| 0.004 |
| 0.012 |
| 0.004 |
| 0.028 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.032 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.012 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.012 |
| 0.004 |
| 0.012 |
| 0.012 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.017 |
| 0.022 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.006 |
| 0.004 |
| 0.006 |
| 0.006 |
| 0.047 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.006 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.017 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.012 |
| 0.004 |
| 0.012 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.032 |
| 0.028 |
| 0.017 |
| 0.004 |
| 0.004 |
| 0.004 |
| 0.043 |
| 0.022 |
| 0.032 |
| 0.006 |
| 0.017 |
| 0.047 |
| 0.022 |
| 0.006 |
| 0.004 |
| 0.022 |
| 0.047 |
| 0.012 |
| 0.004 |
| 0.004 |
| 0.004 |
wilcox.list <- result.stats$wilcox.list
pander::pander(wilcox.list[[1]],
caption = "statistical significant features using Wilcoxon test", round =3 )
statistical significant features using Wilcoxon test
| 0 |
| 0.007 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.001 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.002 |
| 0.022 |
| 0 |
| 0.009 |
| 0 |
| 0.024 |
| 0 |
| 0.038 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.006 |
| 0 |
| 0.017 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.008 |
| 0 |
| 0 |
| 0.026 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.001 |
| 0.005 |
| 0.008 |
| 0.017 |
| 0.001 |
| 0 |
| 0.025 |
| 0.038 |
| 0.001 |
| 0.002 |
| 0.012 |
| 0.009 |
| 0.031 |
| 0.005 |
| 0 |
| 0 |
| 0.01 |
| 0 |
| 0.005 |
| 0.002 |
| 0.003 |
| 0.039 |
| 0 |
| 0 |
| 0.012 |
| 0.041 |
| 0.005 |
| 0 |
| 0.027 |
| 0.036 |
| 0 |
| 0 |
| 0.009 |
| 0.038 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0 |
| 0.001 |
| 0.002 |
| 0.004 |
| 0.006 |
| 0.012 |
| 0 |
| 0.006 |
| 0.002 |
| 0.004 |
| 0 |
| 0.016 |
| 0 |
| 0.032 |
| 0.034 |
| 0.035 |
| 0 |
| 0.034 |
| 0.008 |
| 0.004 |
| 0 |
| 0 |
| 0.001 |
| 0 |
| 0 |
| 0 |
| 0 |
save(ks.list, file ="ks.list.RData")
save(dts.list, file ="dts.list.RData")
save(wilcox.list , file ="wilcox.list.RData")
par(mfrow = c(1,1),
cex = 0.7,
xpd = T,
pty = 'm', #maximal plotting region
mar = c(3,3,3,10))
cp.combined <-trim.cp(cp.combined)
prBenchmark <- plot(cp.combined)








Perfomance metrics of LC CV
pander::pander(prBenchmark$metrics,
caption = "Lc vs vanilla Classifier Performance",round = 3)
Lc vs vanilla Classifier Performance (continued below)
| BER |
0.077 |
0.084 |
0.086 |
0.087 |
0.093 |
| ACC |
0.925 |
0.937 |
0.919 |
0.925 |
0.917 |
| AUC |
0.98 |
0.98 |
0.977 |
0.975 |
0.976 |
| SEN |
0.919 |
0.892 |
0.904 |
0.894 |
0.897 |
| SPE |
0.926 |
0.94 |
0.922 |
0.931 |
0.919 |
| CIDX |
0.98 |
0.98 |
0.975 |
0.971 |
0.975 |
Table continues below
| BER |
0.094 |
0.115 |
0.115 |
| ACC |
0.921 |
0.845 |
0.849 |
| AUC |
0.964 |
0.958 |
0.958 |
| SEN |
0.886 |
0.93 |
0.924 |
| SPE |
0.923 |
0.839 |
0.844 |
| CIDX |
0.961 |
0.956 |
0.957 |
Table continues below
| BER |
0.116 |
0.308 |
0.323 |
| ACC |
0.849 |
0.879 |
0.92 |
| AUC |
0.958 |
0.841 |
0.822 |
| SEN |
0.924 |
0.416 |
0.398 |
| SPE |
0.844 |
0.969 |
0.956 |
| CIDX |
0.958 |
0.79 |
0.819 |
Table continues below
| BER |
0.352 |
0.369 |
0.375 |
0.375 |
| ACC |
0.355 |
0.383 |
0.371 |
0.371 |
| AUC |
0.941 |
0.97 |
0.969 |
0.969 |
| SEN |
0.984 |
1 |
1 |
1 |
| SPE |
0.312 |
0.264 |
0.249 |
0.249 |
| CIDX |
0.963 |
0.97 |
0.968 |
0.968 |
| BER |
0.385 |
| ACC |
0.356 |
| AUC |
0.971 |
| SEN |
1 |
| SPE |
0.231 |
| CIDX |
0.97 |
metrics <- data.frame(prBenchmark$metrics)
write.csv(metrics,"TADPOLE_metrics.csv")
beep()