Here we make use of the FRESA.CAD::filteredfit() function to train ML models with and without UPSTM.
Naive-Bayes (NB) and LASSO models are used in this demo.
This scrip uses FRESA.CAD and mlbench R packages:
knitr::opts_chunk$set(collapse = TRUE, warning = FALSE, message = FALSE,comment = "#>")
library("FRESA.CAD")
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(mlbench)
op <- par(no.readonly = TRUE)
I’ll load the Sonar data set
data("Sonar", package = "mlbench")
print(table(Sonar$Class))
#>
#> M R
#> 111 97
Setting some variables for downstream analysis
studyName = "Sonar"
datasetframe <- Sonar
Outcome <- "Class"
# 50% of subjects for training
trainFraction = 0.5
Setting the Training and Testing sets
tb <- table(datasetframe[,Outcome])
classNames <- unique(datasetframe[,Outcome])
allrowClass <- datasetframe[,Outcome]
names(allrowClass) <- rownames(datasetframe)
trainsize <- trainFraction*min(tb);
trainSamples <- NULL;
for (theClass in classNames)
{
classSample <- allrowClass[allrowClass == theClass]
trainSamples <- c(trainSamples,names(classSample[sample(length(classSample),trainsize)]))
}
datasetframe_train <- datasetframe[trainSamples,]
testSamples <- !(rownames(datasetframe) %in% trainSamples)
datasetframe_test <- datasetframe[testSamples,]
outcomes <- datasetframe_train[,Outcome]
pander::pander(table(datasetframe[,Outcome]),caption="All")
| M | R |
|---|---|
| 111 | 97 |
pander::pander(table(datasetframe_train[,Outcome]),caption="Training")
| M | R |
|---|---|
| 48 | 48 |
pander::pander(table(datasetframe_test[,Outcome]),caption="Testing")
| M | R |
|---|---|
| 63 | 49 |
Train a simple NB and LASSO model on the datasets
In FRESA.CAD all Binary classification task assume that the outcome is 0 and 1.
datasetframe_train[,Outcome] <- 1*(datasetframe_train[,Outcome] == classNames[2])
datasetframe_test[,Outcome] <- 1*(datasetframe_test[,Outcome] == classNames[2])
mNBRaw <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca=FALSE
)
mLASSORaw <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
family = "binomial"
)
With PCA
# With PCA
mNBPCA <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca=TRUE
)
mLASSOPCA <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
PCA = TRUE,
family = "binomial"
)
Now we run filteredFit with the decorrelation set to true and default parameters
mNBDecor <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
pca=FALSE
)
mLASSODecor <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
DECOR = TRUE,
family = "binomial"
)
Decorrelation with parameters: Spearman correlation and Robust Fit.
mNBDecor2 <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
DECOR.control=list(method="spearman",type="RLM"),
pca=FALSE
)
mLASSODecor2 <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
DECOR = TRUE,
DECOR.control=list(method="spearman",type="RLM"),
family = "binomial"
)
Once we have the transformed testing dataset we can make a side by side comparison of predictions
# Predict the raw testing set
prRAW <- predict(mNBRaw,datasetframe_test)
# Predict with PCA
prPCA <- predict(mNBPCA,datasetframe_test)
# Predict the transformed dataset
prDecor <- predict(mNBDecor,datasetframe_test)
# Predict the transformed dataset spearman
prDecor2 <- predict(mNBDecor2,datasetframe_test)
par(mfrow=c(2,2))
AllRocAUC <- NULL;
classoutcomes <- datasetframe_test[,Outcome]
psRaw <- predictionStats_binary(cbind(classoutcomes,prRAW),
"NB Raw",cex=0.75)
NB Raw
pander::pander(psRaw$aucs)
| est | lower | upper |
|---|---|---|
| 0.8413 | 0.7616 | 0.921 |
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)
psPCA <- predictionStats_binary(cbind(classoutcomes,prPCA),
"NB PCA",cex=0.75)
NB PCA
pander::pander(psPCA$aucs)
| est | lower | upper |
|---|---|---|
| 0.7451 | 0.6473 | 0.8429 |
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)
psDecor <- predictionStats_binary(cbind(classoutcomes,prDecor),
"NB UPSTM",cex=0.75)
NB UPSTM
pander::pander(psDecor$aucs)
| est | lower | upper |
|---|---|---|
| 0.769 | 0.6741 | 0.864 |
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);
psDecor2 <- predictionStats_binary(cbind(classoutcomes,prDecor2),
"NB UPSTM Spearman",cex=0.75)
NB UPSTM Spearman
pander::pander(psDecor2$aucs)
| est | lower | upper |
|---|---|---|
| 0.712 | 0.6093 | 0.8148 |
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);
psRaw <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSORaw,datasetframe_test)),
"LASSO Raw",cex=0.75)
LASSO Raw
pander::pander(psRaw$aucs)
| est | lower | upper |
|---|---|---|
| 0.8154 | 0.7371 | 0.8936 |
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)
psPCA <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSOPCA,datasetframe_test)),
"LASSO PCA",cex=0.75)
LASSO PCA
pander::pander(psPCA$aucs)
| est | lower | upper |
|---|---|---|
| 0.7901 | 0.7055 | 0.8747 |
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)
psDecor <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSODecor,datasetframe_test)),
"LASSO UPSTM",cex=0.75)
LASSO UPSTM
pander::pander(psDecor$aucs)
| est | lower | upper |
|---|---|---|
| 0.7927 | 0.7061 | 0.8793 |
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);
psDecor2 <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSODecor2,datasetframe_test)),
"LASSO UPSTM Spearman",cex=0.75)
LASSO UPSTM Spearman
pander::pander(psDecor2$aucs)
| est | lower | upper |
|---|---|---|
| 0.8322 | 0.7587 | 0.9057 |
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);
rownames(AllRocAUC) <- c("NB:Raw","NB:PCA","NB:UPSTM_P","NB:UPSTM_S",
"LASSO:Raw","LASSO:PCA","LASSO:UPSTM_P","LASSO:UPSTM_S")
pander::pander(AllRocAUC)
| Â | est | lower | upper |
|---|---|---|---|
| NB:Raw | 0.8413 | 0.7616 | 0.921 |
| NB:PCA | 0.7451 | 0.6473 | 0.8429 |
| NB:UPSTM_P | 0.769 | 0.6741 | 0.864 |
| NB:UPSTM_S | 0.712 | 0.6093 | 0.8148 |
| LASSO:Raw | 0.8154 | 0.7371 | 0.8936 |
| LASSO:PCA | 0.7901 | 0.7055 | 0.8747 |
| LASSO:UPSTM_P | 0.7927 | 0.7061 | 0.8793 |
| LASSO:UPSTM_S | 0.8322 | 0.7587 | 0.9057 |
bpROCAUC <- barPlotCiError(as.matrix(AllRocAUC),
metricname = "ROCAUC",
thesets = "ROC AUC",
themethod = rownames(AllRocAUC),
main = "ROC AUC",
offsets = c(0.5,1),
scoreDirection = ">",
ho=0.5,
args.legend = list(bg = "white",x="bottomright",inset=c(0.0,0),cex=0.75),
col = terrain.colors(nrow(AllRocAUC))
)
The UPSTM is stored in the filteredFit() object. Hence, we can analyze and display the matrix.
gplots::heatmap.2(mNBDecor$UPSTM,
trace = "none",
mar = c(10,10),
col=rev(heat.colors(7)),
main = paste("UPSTM Matrix (Pearson, LM):",studyName),
cexRow = 0.7,
cexCol = 0.7,
key.title=NA,
key.xlab="beta",
xlab="UPSTM Feature", ylab="Input Feature")
gplots::heatmap.2(mNBDecor2$UPSTM,
trace = "none",
mar = c(10,10),
col=rev(heat.colors(7)),
main = paste("UPSTM Matrix (Spearman, RLM):",studyName),
cexRow = 0.7,
cexCol = 0.7,
key.title=NA,
key.xlab="beta",
xlab="UPSTM Feature", ylab="Input Feature")
dataCV <- datasetframe
dataCV[,Outcome] <- 1*(dataCV[,Outcome] == classNames[2])
cvNBRaw <- randomCV(dataCV,
Outcome,
fittingFunction= filteredFit,
classSamplingType = "Ba",
trainFraction = 0.80,
repetitions = 100,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca = FALSE
)
……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2467712
……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2542709
……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2569203
……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2593484
……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2590856
……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2591148
……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.258516
……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2570825
……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.254672
……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2531862
cvNBPCA <- randomCV(dataCV,
Outcome,
trainSampleSets= cvNBRaw$trainSamplesSets,
fittingFunction= filteredFit,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca = TRUE
)
……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.3106977
……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2929746
……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2905504
……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2943513
……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2926295
……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2900025
……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2906709
……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2874164
……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.290441
……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2891882
cvNBCCA <- randomCV(dataCV,
Outcome,
trainSampleSets= cvNBRaw$trainSamplesSets,
fittingFunction= filteredFit,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
WHITE = "CCA",
pca = FALSE
)
……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.3188351
……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.3021787
……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.3054885
……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.3062946
……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.3004374
……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2992335
……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2985134
……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2948016
……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2992464
……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2996997
cvNBDecor <- randomCV(dataCV,
Outcome,
trainSampleSets= cvNBRaw$trainSamplesSets,
fittingFunction= filteredFit,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
pca = FALSE
)
……….10 Tested: 196 Avg. Selected: 7.7 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2586948
……….20 Tested: 207 Avg. Selected: 8 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2685391
……….30 Tested: 208 Avg. Selected: 8.066667 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2725268
……….40 Tested: 208 Avg. Selected: 8.05 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2669177
……….50 Tested: 208 Avg. Selected: 8.2 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2694205
……….60 Tested: 208 Avg. Selected: 8.366667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2692112
……….70 Tested: 208 Avg. Selected: 8.485714 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2685034
……….80 Tested: 208 Avg. Selected: 8.6125 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2658716
……….90 Tested: 208 Avg. Selected: 8.644444 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2644067
……….100 Tested: 208 Avg. Selected: 8.7 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2637768
cvNBDecorC <- randomCV(dataCV,
Outcome,
trainSampleSets= cvNBRaw$trainSamplesSets,
fittingFunction= filteredFit,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
DECOR.control=list(Outcome=Outcome),
pca = FALSE
)
……….10 Tested: 196 Avg. Selected: 9 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2330702
……….20 Tested: 207 Avg. Selected: 9.05 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2608207
……….30 Tested: 208 Avg. Selected: 9.1 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2653522
……….40 Tested: 208 Avg. Selected: 9.1 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2557903
……….50 Tested: 208 Avg. Selected: 9.26 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2563164
……….60 Tested: 208 Avg. Selected: 9.616667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2565288
……….70 Tested: 208 Avg. Selected: 9.657143 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2577135
……….80 Tested: 208 Avg. Selected: 9.5875 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2528372
……….90 Tested: 208 Avg. Selected: 9.488889 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2561514
……….100 Tested: 208 Avg. Selected: 9.45 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2553373
The Aggregated Test Results
par(mfrow=c(2,2))
bpraw <- predictionStats_binary(cvNBRaw$testPredictions,"NB RAW",cex=0.70)
NB RAW
bpPCA <- predictionStats_binary(cvNBPCA$testPredictions,"NB PCA",cex=0.70)
NB PCA
bpdecor <- predictionStats_binary(cvNBDecor$testPredictions,"NB UPSTM",cex=0.70)
NB UPSTM
bpdecorC <- predictionStats_binary(cvNBDecorC$testPredictions,"NB UPSTM Outcome Driven",cex=0.70)
NB UPSTM Outcome Driven
pander::pander(bpraw$aucs)
| est | lower | upper |
|---|---|---|
| 0.8282 | 0.7709 | 0.8855 |
pander::pander(bpPCA$aucs)
| est | lower | upper |
|---|---|---|
| 0.8584 | 0.8079 | 0.9088 |
pander::pander(bpdecor$aucs)
| est | lower | upper |
|---|---|---|
| 0.8374 | 0.7828 | 0.8919 |
pander::pander(bpdecorC$aucs)
| est | lower | upper |
|---|---|---|
| 0.8521 | 0.8008 | 0.9035 |
signedsqrt <- function(x) { return (sign(x)*sqrt(abs(x)))}
data("Sonar", package = "mlbench")
sclass <- Sonar$Class
Sonar <- as.data.frame(model.matrix(Class ~ .*.,Sonar))
Sonar$`(Intercept)` <- NULL
Sonar[,1:ncol(Sonar)] <- sapply(Sonar,as.numeric)
fnames <- colnames(Sonar)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":","_x_")
colnames(Sonar) <- fnames
squaredfeatures <- str_detect(fnames,"_x_")
Sonar[,squaredfeatures] <- as.data.frame(apply(Sonar[,squaredfeatures],2,signedsqrt));
Sonar$Class <- sclass
datasetframe <- Sonar
Setting the Training and Testing sets
datasetframe_train <- datasetframe[trainSamples,]
datasetframe_test <- datasetframe[testSamples,]
Train a simple NB and LASSO model on the datasets
In FRESA.CAD all Binary classification task assume that the outcome is 0 and 1.
datasetframe_train[,Outcome] <- 1*(datasetframe_train[,Outcome] == classNames[2])
datasetframe_test[,Outcome] <- 1*(datasetframe_test[,Outcome] == classNames[2])
mNBRaw <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca=FALSE
)
mLASSORaw <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
family = "binomial"
)
With PCA
# With PCA
mNBPCA <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
pca=TRUE
)
mLASSOPCA <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
PCA = TRUE,
family = "binomial"
)
Now we run filteredFit with the decorrelation set to true and default parameters
mNBDecor <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
pca=FALSE
)
mLASSODecor <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
DECOR = TRUE,
family = "binomial"
)
Now we run filteredFit with the decorrelation set to true and default parameters and Outcome driven
mNBDecorD <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
DECOR.control=list(Outcome=Outcome),
pca=FALSE
)
mLASSODecorD <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
DECOR = TRUE,
DECOR.control=list(Outcome=Outcome),
family = "binomial"
)
Decorrelation with parameters: Spearman correlation and Robust Fit.
mNBDecor2 <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=NAIVE_BAYES,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.01,limit= 0),
DECOR = TRUE,
DECOR.control=list(method="spearman",type="RLM"),
pca=FALSE
)
mLASSODecor2 <- filteredFit(paste(Outcome,"~."),
datasetframe_train,
fitmethod=LASSO_MIN,
filtermethod=univariate_KS,
filtermethod.control=list(pvalue=0.20,limit= -1),
DECOR = TRUE,
DECOR.control=list(method="spearman",type="RLM"),
family = "binomial"
)
Once we have the transformed testing dataset we can make a side by side comparison of predictions
# Predict the raw testing set
prRAW <- predict(mNBRaw,datasetframe_test)
# Predict with PCA
prPCA <- predict(mNBPCA,datasetframe_test)
# Predict the transformed dataset
prDecor <- predict(mNBDecor,datasetframe_test)
# Predict the transformed dataset spearman
prDecor2 <- predict(mNBDecor2,datasetframe_test)
# Predict the transformed dataset
prDecorD <- predict(mNBDecorD,datasetframe_test)
par(mfrow=c(2,2))
AllRocAUC <- NULL;
classoutcomes <- datasetframe_test[,Outcome]
psRaw <- predictionStats_binary(cbind(classoutcomes,prRAW),
"NB Raw",cex=0.75)
NB Raw
pander::pander(psRaw$aucs)
| est | lower | upper |
|---|---|---|
| 0.7561 | 0.6709 | 0.8413 |
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)
psPCA <- predictionStats_binary(cbind(classoutcomes,prPCA),
"NB PCA",cex=0.75)
NB PCA
pander::pander(psPCA$aucs)
| est | lower | upper |
|---|---|---|
| 0.6453 | 0.538 | 0.7525 |
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)
psDecor <- predictionStats_binary(cbind(classoutcomes,prDecor),
"NB UPSTM",cex=0.75)
NB UPSTM
pander::pander(psDecor$aucs)
| est | lower | upper |
|---|---|---|
| 0.846 | 0.7699 | 0.922 |
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);
psDecor2 <- predictionStats_binary(cbind(classoutcomes,prDecor2),
"NB UPSTM Spearman",cex=0.75)
NB UPSTM Spearman
pander::pander(psDecor2$aucs)
| est | lower | upper |
|---|---|---|
| 0.8343 | 0.7549 | 0.9137 |
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);
psDecorD <- predictionStats_binary(cbind(classoutcomes,prDecorD),
"NB UPSTMD Spearman",cex=0.75)
NB UPSTMD Spearman
pander::pander(psDecorD$aucs)
| est | lower | upper |
|---|---|---|
| 0.8147 | 0.735 | 0.8944 |
AllRocAUC <- rbind(AllRocAUC,psDecorD$aucs);
psRaw <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSORaw,datasetframe_test)),
"LASSO Raw",cex=0.75)
LASSO Raw
pander::pander(psRaw$aucs)
| est | lower | upper |
|---|---|---|
| 0.8769 | 0.8114 | 0.9424 |
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)
psPCA <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSOPCA,datasetframe_test)),
"LASSO PCA",cex=0.75)
LASSO PCA
pander::pander(psPCA$aucs)
| est | lower | upper |
|---|---|---|
| 0.8056 | 0.7249 | 0.8864 |
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)
psDecor <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSODecor,datasetframe_test)),
"LASSO UPSTM",cex=0.75)
LASSO UPSTM
pander::pander(psDecor$aucs)
| est | lower | upper |
|---|---|---|
| 0.8649 | 0.7964 | 0.9334 |
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);
psDecorD <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSODecorD,datasetframe_test)),
"LASSO UPSTMD",cex=0.75)
LASSO UPSTMD
pander::pander(psDecorD$aucs)
| est | lower | upper |
|---|---|---|
| 0.8717 | 0.8067 | 0.9368 |
AllRocAUC <- rbind(AllRocAUC,psDecorD$aucs);
psDecor2 <- predictionStats_binary(cbind(classoutcomes,
predict(mLASSODecor2,datasetframe_test)),
"LASSO UPSTM Spearman",cex=0.75)
LASSO UPSTM Spearman
pander::pander(psDecor2$aucs)
| est | lower | upper |
|---|---|---|
| 0.8507 | 0.7768 | 0.9246 |
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);
rownames(AllRocAUC) <- c("NB:Raw","NB:PCA","NB:UPSTM_P","NB:UPSTMD_P","NB:UPSTM_S",
"LASSO:Raw","LASSO:PCA","LASSO:UPSTM_P","LASSO:UPSTMD_P","LASSO:UPSTM_S")
pander::pander(AllRocAUC)
| Â | est | lower | upper |
|---|---|---|---|
| NB:Raw | 0.7561 | 0.6709 | 0.8413 |
| NB:PCA | 0.6453 | 0.538 | 0.7525 |
| NB:UPSTM_P | 0.846 | 0.7699 | 0.922 |
| NB:UPSTMD_P | 0.8343 | 0.7549 | 0.9137 |
| NB:UPSTM_S | 0.8147 | 0.735 | 0.8944 |
| LASSO:Raw | 0.8769 | 0.8114 | 0.9424 |
| LASSO:PCA | 0.8056 | 0.7249 | 0.8864 |
| LASSO:UPSTM_P | 0.8649 | 0.7964 | 0.9334 |
| LASSO:UPSTMD_P | 0.8717 | 0.8067 | 0.9368 |
| LASSO:UPSTM_S | 0.8507 | 0.7768 | 0.9246 |
bpROCAUC <- barPlotCiError(as.matrix(AllRocAUC),
metricname = "ROCAUC",
thesets = "ROC AUC",
themethod = rownames(AllRocAUC),
main = "ROC AUC",
offsets = c(0.5,1),
scoreDirection = ">",
ho=0.5,
args.legend = list(bg = "white",x="bottomright",inset=c(0.0,0),cex=0.75),
col = terrain.colors(nrow(AllRocAUC))
)