Libraries
library(FRESA.CAD)
library("mlbench")
library("ggplot2")
library(pander)
library(beepr)
library(twosamples)
models <-c(BSWiMS.model,NAIVE_BAYES,LASSO_1SE,LASSO_MIN,GLMNET_RIDGE_MIN,GLMNET_ELASTICNET_MIN)
modelsnames <- c("BSWiMS.model","NAIVE_BAYES","LASSO_1SE","LASSO_MIN",
"GLMNET_RIDGE_MIN","GLMNET_ELASTICNET_MIN")
20x cv using 70% training and 30% holdout (for LC models)
lc.cvlist <- list()
lc.filteredFitcv <- randomCV_V3(dataSet,
"label",
HLCM_EM,
trainFraction = 0.7,
repetitions = 20,
method = filteredFit,
hysteresis=0.1,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
lc.cvlist[["LC_filteredFit"]] <-lc.filteredFitcv
i=1 #
for (model in models){
modelname= paste0("LC_",modelsnames[i])
cv <- randomCV_V3(dataSet,
"label",
HLCM_EM,
trainSampleSets = lc.filteredFitcv$trainSamplesSets,
method = model,
hysteresis=0.1)
lc.cvlist[[modelname]] <-cv
i = i+1
}
save(lc.cvlist, file = "lc.cvlist.RData")
20x cv using 70% training and 30% holdout (for vanilla models)
cvlist <- list()
filteredFitcv <- randomCV(dataSet,
"label",
filteredFit,
trainSampleSets = lc.filteredFitcv$trainSamplesSets,
fitmethod=glm,
filtermethod=univariate_BinEnsemble,
filtermethod.control = list(pvalue=0.05),
family = "binomial")
cvlist[["filteredFit"]] <-filteredFitcv
save(filteredFitcv, file = "filteredFitcv.RData")
i=1 #starts from filteredfit
for (model in models){
modelname= modelsnames[i]
#beep()
cv <- randomCV(dataSet,
"label",
model,
trainSampleSets = lc.filteredFitcv$trainSamplesSets)
cvlist[[modelname]] <-cv
i = i+1
}
save(cvlist, file = "cvlist.RData")
ROC plots (latent class AUC vs vanilla AUC)
par(mfrow = c(1,2),cex = 0.5);
#combine and adapt the cvlists into one combided
combined.cvlist <- combine.cvlist(lc.cvlist,cvlist)
cp.combined <- BinaryBenchmark(referenceCV = combined.cvlist)
save(cp.combined, file = "cp.combined.RData")
Statitistics to acces the difference between the classes found by the LC scheme
ks.list = list()
dts.list = list()
wilcox.list = list()
concat.table = data.frame(matrix(ncol=length(modelsnames)+1,nrow=0,
dimnames=list(NULL, c("filteredFit", modelsnames))))
for (i in 1:length(lc.cvlist)) {
lc.sets <- split_df_into_lc.sets(lc.cvlist[[i]],dataSet)
if (is.null(lc.sets)){
concat.table[1,i] <- 0
concat.table[2,i] <- 0
concat.table[3,i] <- 0
ks.list[[i]] <- 0
dts.list[[i]] <- 0
wilcox.list[[i]] <- 0
}else{
####################### KS############################
ks.res <- get_ks.test(lc.sets)
ks.res <- apply(ks.res, 2,p.adjust,method ="BH")
total.obs <- apply(matrix(dim(ks.res)), 2, prod)
ks.ss.obs <- sum(ks.res <0.05)
concat.table[1,i] <- paste0(ks.ss.obs,"/",total.obs)
#rows that have a pvalue lower than 0.05 (statistically significant)
ks.ss.rows<- sort(unique(which(ks.res < 0.05) %% nrow(ks.res)))
colnames(ks.res) <- apply(combn(1:max(ncol(ks.res),2), 2), 2,
function(x) {paste0("LC",paste0(x,collapse = " VS ")) })
ks.list[[i]] <- ks.res[ks.ss.rows,]
####################### DTS############################
dts.res <- get_dts.test(lc.sets)
dts.res <- apply(dts.res, 2,p.adjust,method ="BH")
dts.ss.obs <- sum(dts.res <0.05)
concat.table[2,i] <- paste0(dts.ss.obs,"/",total.obs)
#rows that have a pvalue lower than 0.05 (statistically significant)
dts.ss.rows<- sort(unique(which(dts.res < 0.05) %% nrow(dts.res)))
colnames(dts.res) <- apply(combn(1:max(ncol(ks.res),2), 2), 2,
function(x) {paste0("LC",paste0(x,collapse = " VS ")) })
dts.list[[i]] <- dts.res[dts.ss.rows,]
####################### wilcox############################
wilcox.res <- get_wilcox.test(lc.sets)
wilcox.res <- apply(wilcox.res, 2,p.adjust,method ="BH")
#apply(wilcox.res.adj, 2, function(x){table(x<0.5)})
#apply(wilcox.res, 2, function(x){table(x<0.5)})
wilcox.ss.obs <- sum(wilcox.res <0.05)
concat.table[3,i] <- paste0(wilcox.ss.obs,"/",total.obs)
#rows that have a pvalue lower than 0.05 (statistically significant)
wilcox.ss.rows<- sort(unique(which(wilcox.res < 0.05) %% nrow(wilcox.res)))
colnames(wilcox.res) <- apply(combn(1:max(ncol(ks.res),2), 2), 2,
function(x) {paste0("LC",paste0(x,collapse = " VS ")) })
wilcox.list[[i]] <- wilcox.res[wilcox.ss.rows,]
wilcox.list.adj <- lapply(wilcox.list,function(x){prod(x,total.obs)} )
}
}
rownames(concat.table) <- c("KS","DTS","Wilcox")
pander::pander(concat.table,
caption = "compressed table of statistical significant features per test per method")
compressed table of statistical significant features per test per method (continued below)
| KS |
5/104 |
3/104 |
0 |
12/312 |
5/104 |
| DTS |
5/104 |
5/104 |
0 |
14/312 |
4/104 |
| Wilcox |
5/104 |
4/104 |
0 |
12/312 |
4/104 |
| KS |
14/312 |
16/312 |
| DTS |
16/312 |
14/312 |
| Wilcox |
14/312 |
13/312 |
write.csv(concat.table,"concat.table.csv")
pander::pander(ks.res[ks.ss.rows,],
caption = "statistical significant features using KS test", round = 3)
statistical significant features using KS test
| 0 |
0 |
0 |
| 0 |
0 |
0 |
| 0 |
0 |
0 |
| 0 |
0 |
0 |
| 0.979 |
0.133 |
0.046 |
| 0.595 |
0.04 |
0.447 |
| 0.132 |
0.04 |
0.959 |
| 0.777 |
0.04 |
0.216 |
pander::pander( dts.res[dts.ss.rows,],
caption = "statistical significant features using DTS test", round = 3)
statistical significant features using DTS test
| 0.026 |
0.021 |
0.026 |
| 0.026 |
0.021 |
0.026 |
| 0.026 |
0.021 |
0.026 |
| 0.026 |
0.021 |
0.026 |
| 0.636 |
0.035 |
0.288 |
| 0.208 |
0.021 |
0.967 |
pander::pander(as.data.frame(wilcox.list[[1]]),
caption = "statistical significant features using Wilcoxon test", round =3 )
statistical significant features using Wilcoxon test
| 0 |
| 0 |
| 0 |
| 0 |
| 0.003 |
save(ks.list, file ="ks.list.RData")
save(dts.list, file ="dts.list.RData")
save(wilcox.list , file ="wilcox.list.RData")
par(mfrow = c(1,1),
cex = 0.7,
xpd = T,
pty = 'm', #maximal plotting region
mar = c(3,3,3,10))
cp.combined <-trim.cp(cp.combined)
prBenchmark <- plot(cp.combined)








Perfomance metrics of LC CV
pander::pander(prBenchmark$metrics,
caption = "Lc vs vanilla Classifier Performance",round = 3)
Lc vs vanilla Classifier Performance (continued below)
| BER |
0.026 |
0.075 |
0.091 |
0.166 |
0.271 |
| ACC |
0.975 |
0.928 |
0.899 |
0.83 |
0.731 |
| AUC |
0.989 |
0.983 |
0.984 |
0.899 |
0.857 |
| SEN |
0.968 |
0.908 |
0.99 |
0.867 |
0.713 |
| SPE |
0.981 |
0.943 |
0.828 |
0.801 |
0.745 |
| CIDX |
0.99 |
0.982 |
0.982 |
0.886 |
0.841 |
Table continues below
| BER |
0.291 |
0.294 |
0.297 |
0.304 |
| ACC |
0.715 |
0.711 |
0.71 |
0.703 |
| AUC |
0.732 |
0.728 |
0.728 |
0.723 |
| SEN |
0.663 |
0.662 |
0.657 |
0.646 |
| SPE |
0.755 |
0.749 |
0.751 |
0.747 |
| CIDX |
0.728 |
0.721 |
0.722 |
0.721 |
Table continues below
| BER |
0.336 |
0.337 |
0.351 |
| ACC |
0.668 |
0.668 |
0.651 |
| AUC |
0.701 |
0.701 |
0.685 |
| SEN |
0.629 |
0.627 |
0.635 |
| SPE |
0.699 |
0.699 |
0.662 |
| CIDX |
0.69 |
0.688 |
0.628 |
Table continues below
| BER |
0.367 |
0.396 |
0.403 |
| ACC |
0.634 |
0.643 |
0.628 |
| AUC |
0.676 |
0.836 |
0.725 |
| SEN |
0.621 |
0.29 |
0.349 |
| SPE |
0.643 |
0.916 |
0.842 |
| CIDX |
0.607 |
0.834 |
0.714 |
| BER |
0.406 |
| ACC |
0.627 |
| AUC |
0.724 |
| SEN |
0.341 |
| SPE |
0.848 |
| CIDX |
0.715 |
metrics <- data.frame(prBenchmark$metrics)
write.csv(metrics,"synt_metrics.csv")
## [++][+][+-]( 1147 )[+][+-]( 149 )[+][+]( 39 )[+][+]( 12 )[+][+]( 2 )[+][+]( 0 )< 2082 , 2125 , 1084 , 0 >
##
## 1 2
## 2124 1076