Filtered ML fit and the UPSTM with FRESA.CAD

Here we make use of the FRESA.CAD::filteredfit() function to train ML models with and without UPSTM.

Naive-Bayes (NB) and LASSO models are used in this demo.

This scrip uses FRESA.CAD and mlbench R packages:

knitr::opts_chunk$set(collapse = TRUE, warning = FALSE, message = FALSE,comment = "#>")

library("FRESA.CAD")
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(mlbench)

op <- par(no.readonly = TRUE)

I’ll load the Sonar data set

data("Sonar", package = "mlbench")
print(table(Sonar$Class))
#> 
#>   M   R 
#> 111  97

Setting some variables for downstream analysis

studyName = "Sonar"
datasetframe <- Sonar
Outcome <- "Class"

# 50% of subjects for training

trainFraction = 0.5

Setting the Training and Testing sets


tb <- table(datasetframe[,Outcome])
classNames <- unique(datasetframe[,Outcome])

allrowClass <- datasetframe[,Outcome]
names(allrowClass) <- rownames(datasetframe)

trainsize <- trainFraction*min(tb);
trainSamples <- NULL;
for (theClass in classNames)
{
  classSample <- allrowClass[allrowClass == theClass]
  trainSamples <- c(trainSamples,names(classSample[sample(length(classSample),trainsize)]))
}


datasetframe_train <- datasetframe[trainSamples,]
testSamples <- !(rownames(datasetframe) %in% trainSamples)
datasetframe_test <- datasetframe[testSamples,]

outcomes <- datasetframe_train[,Outcome]

pander::pander(table(datasetframe[,Outcome]),caption="All")
All
M R
111 97
pander::pander(table(datasetframe_train[,Outcome]),caption="Training")
Training
M R
48 48
pander::pander(table(datasetframe_test[,Outcome]),caption="Testing")
Testing
M R
63 49

Machine Learning with the filteredFit() function

Train a simple NB and LASSO model on the datasets

In FRESA.CAD all Binary classification task assume that the outcome is 0 and 1.


datasetframe_train[,Outcome] <- 1*(datasetframe_train[,Outcome] == classNames[2])
datasetframe_test[,Outcome] <- 1*(datasetframe_test[,Outcome] == classNames[2])

mNBRaw <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     pca=FALSE
                   )

mLASSORaw <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                    family = "binomial"
                   )

With PCA

# With PCA
mNBPCA <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     pca=TRUE
                   )


mLASSOPCA <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     PCA = TRUE,
                    family = "binomial"
                   )

Now we run filteredFit with the decorrelation set to true and default parameters


mNBDecor <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     DECOR = TRUE,
                     pca=FALSE
                   )

mLASSODecor <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     DECOR = TRUE,
                    family = "binomial"
                   )

Decorrelation with parameters: Spearman correlation and Robust Fit.

mNBDecor2 <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     DECOR = TRUE,
                     DECOR.control=list(method="spearman",type="RLM"),
                     pca=FALSE
                   )
mLASSODecor2 <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     DECOR = TRUE,
                     DECOR.control=list(method="spearman",type="RLM"),
                    family = "binomial"
                   )

Once we have the transformed testing dataset we can make a side by side comparison of predictions


# Predict the raw testing set
prRAW <- predict(mNBRaw,datasetframe_test)

# Predict with PCA
prPCA <- predict(mNBPCA,datasetframe_test)

# Predict the transformed dataset
prDecor <- predict(mNBDecor,datasetframe_test)

# Predict the transformed dataset spearman
prDecor2 <- predict(mNBDecor2,datasetframe_test)

par(mfrow=c(2,2))
AllRocAUC <- NULL;

classoutcomes <- datasetframe_test[,Outcome]
psRaw <- predictionStats_binary(cbind(classoutcomes,prRAW),
                                "NB Raw",cex=0.75)

NB Raw

pander::pander(psRaw$aucs)
est lower upper
0.8413 0.7616 0.921
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)

psPCA <- predictionStats_binary(cbind(classoutcomes,prPCA),
                                "NB PCA",cex=0.75)

NB PCA

pander::pander(psPCA$aucs)
est lower upper
0.7451 0.6473 0.8429
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)

psDecor <- predictionStats_binary(cbind(classoutcomes,prDecor),
                                "NB UPSTM",cex=0.75)

NB UPSTM

pander::pander(psDecor$aucs)
est lower upper
0.769 0.6741 0.864
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);


psDecor2 <- predictionStats_binary(cbind(classoutcomes,prDecor2),
                                "NB UPSTM Spearman",cex=0.75)

NB UPSTM Spearman

pander::pander(psDecor2$aucs)
est lower upper
0.712 0.6093 0.8148
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);


psRaw <- predictionStats_binary(cbind(classoutcomes,
                                      predict(mLASSORaw,datasetframe_test)),
                                "LASSO Raw",cex=0.75)

LASSO Raw

pander::pander(psRaw$aucs)
est lower upper
0.8154 0.7371 0.8936
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)

psPCA <- predictionStats_binary(cbind(classoutcomes,
                                      predict(mLASSOPCA,datasetframe_test)),
                                "LASSO PCA",cex=0.75)

LASSO PCA

pander::pander(psPCA$aucs)
est lower upper
0.7901 0.7055 0.8747
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)

psDecor <- predictionStats_binary(cbind(classoutcomes,
                                        predict(mLASSODecor,datasetframe_test)),
                                "LASSO UPSTM",cex=0.75)

LASSO UPSTM

pander::pander(psDecor$aucs)
est lower upper
0.7927 0.7061 0.8793
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);


psDecor2 <- predictionStats_binary(cbind(classoutcomes,
                                         predict(mLASSODecor2,datasetframe_test)),
                                "LASSO UPSTM Spearman",cex=0.75)

LASSO UPSTM Spearman

pander::pander(psDecor2$aucs)
est lower upper
0.8322 0.7587 0.9057
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);

Comparing ROCAUC


rownames(AllRocAUC) <- c("NB:Raw","NB:PCA","NB:UPSTM_P","NB:UPSTM_S",
                         "LASSO:Raw","LASSO:PCA","LASSO:UPSTM_P","LASSO:UPSTM_S")
pander::pander(AllRocAUC)
  est lower upper
NB:Raw 0.8413 0.7616 0.921
NB:PCA 0.7451 0.6473 0.8429
NB:UPSTM_P 0.769 0.6741 0.864
NB:UPSTM_S 0.712 0.6093 0.8148
LASSO:Raw 0.8154 0.7371 0.8936
LASSO:PCA 0.7901 0.7055 0.8747
LASSO:UPSTM_P 0.7927 0.7061 0.8793
LASSO:UPSTM_S 0.8322 0.7587 0.9057
bpROCAUC <- barPlotCiError(as.matrix(AllRocAUC),
                          metricname = "ROCAUC",
                          thesets = "ROC AUC",
                          themethod = rownames(AllRocAUC),
                          main = "ROC AUC",
                          offsets = c(0.5,1),
                          scoreDirection = ">",
                          ho=0.5,
                          args.legend = list(bg = "white",x="bottomright",inset=c(0.0,0),cex=0.75),
                          col = terrain.colors(nrow(AllRocAUC))
                          )

Visualization of UPSTM

The UPSTM is stored in the filteredFit() object. Hence, we can analyze and display the matrix.


gplots::heatmap.2(mNBDecor$UPSTM,
                  trace = "none",
                  mar = c(10,10),
                  col=rev(heat.colors(7)),
                  main = paste("UPSTM Matrix (Pearson, LM):",studyName),
                  cexRow = 0.7,
                  cexCol = 0.7,
                  key.title=NA,
                  key.xlab="beta",
                  xlab="UPSTM Feature", ylab="Input Feature")


gplots::heatmap.2(mNBDecor2$UPSTM,
                  trace = "none",
                  mar = c(10,10),
                  col=rev(heat.colors(7)),
                  main = paste("UPSTM Matrix (Spearman, RLM):",studyName),
                  cexRow = 0.7,
                  cexCol = 0.7,
                  key.title=NA,
                  key.xlab="beta",
                  xlab="UPSTM Feature", ylab="Input Feature")

Repeated Holdout Cross-Validation




dataCV <- datasetframe
dataCV[,Outcome] <- 1*(dataCV[,Outcome] == classNames[2])


cvNBRaw <- randomCV(dataCV,
                Outcome,
                fittingFunction= filteredFit,
                classSamplingType = "Ba",
                trainFraction = 0.80,
                repetitions = 100,
                fitmethod=NAIVE_BAYES,
                filtermethod=univariate_KS,
                filtermethod.control=list(pvalue=0.01,limit= 0),
                pca = FALSE
            )

……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2467712

……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2542709

……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2569203

……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2593484

……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2590856

……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2591148

……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.258516

……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2570825

……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.254672

……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2531862


cvNBPCA <- randomCV(dataCV,
                Outcome,
                trainSampleSets= cvNBRaw$trainSamplesSets,
                fittingFunction= filteredFit,
                fitmethod=NAIVE_BAYES,
                filtermethod=univariate_KS,
                filtermethod.control=list(pvalue=0.01,limit= 0),
                pca = TRUE
            )

……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.3106977

……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2929746

……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2905504

……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2943513

……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2926295

……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2900025

……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2906709

……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2874164

……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.290441

……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2891882


cvNBCCA <- randomCV(dataCV,
                Outcome,
                trainSampleSets= cvNBRaw$trainSamplesSets,
                fittingFunction= filteredFit,
                fitmethod=NAIVE_BAYES,
                filtermethod=univariate_KS,
                filtermethod.control=list(pvalue=0.01,limit= 0),
                WHITE = "CCA",
                pca = FALSE
            )

……….10 Tested: 196 Avg. Selected: 13 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.3188351

……….20 Tested: 207 Avg. Selected: 14.1 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.3021787

……….30 Tested: 208 Avg. Selected: 13.73333 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.3054885

……….40 Tested: 208 Avg. Selected: 13.7 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.3062946

……….50 Tested: 208 Avg. Selected: 14.04 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.3004374

……….60 Tested: 208 Avg. Selected: 14.31667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2992335

……….70 Tested: 208 Avg. Selected: 14.38571 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2985134

……….80 Tested: 208 Avg. Selected: 14.25 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2948016

……….90 Tested: 208 Avg. Selected: 14.25556 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2992464

……….100 Tested: 208 Avg. Selected: 14.31 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2996997


cvNBDecor <- randomCV(dataCV,
                Outcome,
                trainSampleSets= cvNBRaw$trainSamplesSets,
                fittingFunction= filteredFit,
                fitmethod=NAIVE_BAYES,
                filtermethod=univariate_KS,
                filtermethod.control=list(pvalue=0.01,limit= 0),
                DECOR = TRUE,
                pca = FALSE
            )

……….10 Tested: 196 Avg. Selected: 7.7 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2586948

……….20 Tested: 207 Avg. Selected: 8 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2685391

……….30 Tested: 208 Avg. Selected: 8.066667 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2725268

……….40 Tested: 208 Avg. Selected: 8.05 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2669177

……….50 Tested: 208 Avg. Selected: 8.2 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2694205

……….60 Tested: 208 Avg. Selected: 8.366667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2692112

……….70 Tested: 208 Avg. Selected: 8.485714 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2685034

……….80 Tested: 208 Avg. Selected: 8.6125 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2658716

……….90 Tested: 208 Avg. Selected: 8.644444 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2644067

……….100 Tested: 208 Avg. Selected: 8.7 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2637768


cvNBDecorC <- randomCV(dataCV,
                Outcome,
                trainSampleSets= cvNBRaw$trainSamplesSets,
                fittingFunction= filteredFit,
                fitmethod=NAIVE_BAYES,
                filtermethod=univariate_KS,
                filtermethod.control=list(pvalue=0.01,limit= 0),
                DECOR = TRUE,
                DECOR.control=list(Outcome=Outcome),
                pca = FALSE
            )

……….10 Tested: 196 Avg. Selected: 9 Min Tests: 1 Max Tests: 7 Mean Tests: 2.755102 . MAD: 0.2330702

……….20 Tested: 207 Avg. Selected: 9.05 Min Tests: 1 Max Tests: 12 Mean Tests: 5.217391 . MAD: 0.2608207

……….30 Tested: 208 Avg. Selected: 9.1 Min Tests: 2 Max Tests: 16 Mean Tests: 7.788462 . MAD: 0.2653522

……….40 Tested: 208 Avg. Selected: 9.1 Min Tests: 2 Max Tests: 21 Mean Tests: 10.38462 . MAD: 0.2557903

……….50 Tested: 208 Avg. Selected: 9.26 Min Tests: 5 Max Tests: 26 Mean Tests: 12.98077 . MAD: 0.2563164

……….60 Tested: 208 Avg. Selected: 9.616667 Min Tests: 5 Max Tests: 29 Mean Tests: 15.57692 . MAD: 0.2565288

……….70 Tested: 208 Avg. Selected: 9.657143 Min Tests: 5 Max Tests: 32 Mean Tests: 18.17308 . MAD: 0.2577135

……….80 Tested: 208 Avg. Selected: 9.5875 Min Tests: 8 Max Tests: 36 Mean Tests: 20.76923 . MAD: 0.2528372

……….90 Tested: 208 Avg. Selected: 9.488889 Min Tests: 10 Max Tests: 39 Mean Tests: 23.36538 . MAD: 0.2561514

……….100 Tested: 208 Avg. Selected: 9.45 Min Tests: 11 Max Tests: 42 Mean Tests: 25.96154 . MAD: 0.2553373

The Aggregated Test Results


par(mfrow=c(2,2))
bpraw <- predictionStats_binary(cvNBRaw$testPredictions,"NB RAW",cex=0.70)

NB RAW

bpPCA <- predictionStats_binary(cvNBPCA$testPredictions,"NB PCA",cex=0.70)

NB PCA

bpdecor <- predictionStats_binary(cvNBDecor$testPredictions,"NB UPSTM",cex=0.70)

NB UPSTM

bpdecorC <- predictionStats_binary(cvNBDecorC$testPredictions,"NB UPSTM Outcome Driven",cex=0.70)

NB UPSTM Outcome Driven


pander::pander(bpraw$aucs)
est lower upper
0.8282 0.7709 0.8855
pander::pander(bpPCA$aucs)
est lower upper
0.8584 0.8079 0.9088
pander::pander(bpdecor$aucs)
est lower upper
0.8374 0.7828 0.8919
pander::pander(bpdecorC$aucs)
est lower upper
0.8521 0.8008 0.9035

Using Feature Interactions.


signedsqrt <- function(x) { return (sign(x)*sqrt(abs(x)))}
data("Sonar", package = "mlbench")
sclass <- Sonar$Class


Sonar <- as.data.frame(model.matrix(Class ~ .*.,Sonar))
Sonar$`(Intercept)` <- NULL
Sonar[,1:ncol(Sonar)] <- sapply(Sonar,as.numeric)

fnames <- colnames(Sonar)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":","_x_")
colnames(Sonar) <- fnames
squaredfeatures <- str_detect(fnames,"_x_")

Sonar[,squaredfeatures] <- as.data.frame(apply(Sonar[,squaredfeatures],2,signedsqrt));
Sonar$Class <- sclass

datasetframe <- Sonar

Setting the Training and Testing sets



datasetframe_train <- datasetframe[trainSamples,]
datasetframe_test <- datasetframe[testSamples,]

Machine Learning with the filteredFit() function

Train a simple NB and LASSO model on the datasets

In FRESA.CAD all Binary classification task assume that the outcome is 0 and 1.


datasetframe_train[,Outcome] <- 1*(datasetframe_train[,Outcome] == classNames[2])
datasetframe_test[,Outcome] <- 1*(datasetframe_test[,Outcome] == classNames[2])

mNBRaw <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     pca=FALSE
                   )

mLASSORaw <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                    family = "binomial"
                   )

With PCA

# With PCA
mNBPCA <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     pca=TRUE
                   )


mLASSOPCA <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                   fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     PCA = TRUE,
                    family = "binomial"
                   )

Now we run filteredFit with the decorrelation set to true and default parameters


mNBDecor <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     DECOR = TRUE,
                     pca=FALSE
                   )

mLASSODecor <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     DECOR = TRUE,
                    family = "binomial"
                   )

Now we run filteredFit with the decorrelation set to true and default parameters and Outcome driven


mNBDecorD <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     DECOR = TRUE,
                     DECOR.control=list(Outcome=Outcome),
                     pca=FALSE
                   )

mLASSODecorD <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     DECOR = TRUE,
                     DECOR.control=list(Outcome=Outcome),
                    family = "binomial"
                   )

Decorrelation with parameters: Spearman correlation and Robust Fit.

mNBDecor2 <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=NAIVE_BAYES,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.01,limit= 0),
                     DECOR = TRUE,
                     DECOR.control=list(method="spearman",type="RLM"),
                     pca=FALSE
                   )
mLASSODecor2 <- filteredFit(paste(Outcome,"~."),
                   datasetframe_train,
                    fitmethod=LASSO_MIN,
                     filtermethod=univariate_KS,
                     filtermethod.control=list(pvalue=0.20,limit= -1),
                     DECOR = TRUE,
                     DECOR.control=list(method="spearman",type="RLM"),
                    family = "binomial"
                   )

Once we have the transformed testing dataset we can make a side by side comparison of predictions


# Predict the raw testing set
prRAW <- predict(mNBRaw,datasetframe_test)

# Predict with PCA
prPCA <- predict(mNBPCA,datasetframe_test)

# Predict the transformed dataset
prDecor <- predict(mNBDecor,datasetframe_test)

# Predict the transformed dataset spearman
prDecor2 <- predict(mNBDecor2,datasetframe_test)

# Predict the transformed dataset
prDecorD <- predict(mNBDecorD,datasetframe_test)



par(mfrow=c(2,2))
AllRocAUC <- NULL;

classoutcomes <- datasetframe_test[,Outcome]
psRaw <- predictionStats_binary(cbind(classoutcomes,prRAW),
                                "NB Raw",cex=0.75)

NB Raw

pander::pander(psRaw$aucs)
est lower upper
0.7561 0.6709 0.8413
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)

psPCA <- predictionStats_binary(cbind(classoutcomes,prPCA),
                                "NB PCA",cex=0.75)

NB PCA

pander::pander(psPCA$aucs)
est lower upper
0.6453 0.538 0.7525
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)

psDecor <- predictionStats_binary(cbind(classoutcomes,prDecor),
                                "NB UPSTM",cex=0.75)

NB UPSTM

pander::pander(psDecor$aucs)
est lower upper
0.846 0.7699 0.922
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);


psDecor2 <- predictionStats_binary(cbind(classoutcomes,prDecor2),
                                "NB UPSTM Spearman",cex=0.75)

NB UPSTM Spearman

pander::pander(psDecor2$aucs)
est lower upper
0.8343 0.7549 0.9137
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);

psDecorD <- predictionStats_binary(cbind(classoutcomes,prDecorD),
                                "NB UPSTMD Spearman",cex=0.75)

NB UPSTMD Spearman

pander::pander(psDecorD$aucs)
est lower upper
0.8147 0.735 0.8944
AllRocAUC <- rbind(AllRocAUC,psDecorD$aucs);


psRaw <- predictionStats_binary(cbind(classoutcomes,
                                      predict(mLASSORaw,datasetframe_test)),
                                "LASSO Raw",cex=0.75)

LASSO Raw

pander::pander(psRaw$aucs)
est lower upper
0.8769 0.8114 0.9424
AllRocAUC <- rbind(AllRocAUC,psRaw$aucs)

psPCA <- predictionStats_binary(cbind(classoutcomes,
                                      predict(mLASSOPCA,datasetframe_test)),
                                "LASSO PCA",cex=0.75)

LASSO PCA

pander::pander(psPCA$aucs)
est lower upper
0.8056 0.7249 0.8864
AllRocAUC <- rbind(AllRocAUC,psPCA$aucs)

psDecor <- predictionStats_binary(cbind(classoutcomes,
                                        predict(mLASSODecor,datasetframe_test)),
                                "LASSO UPSTM",cex=0.75)

LASSO UPSTM

pander::pander(psDecor$aucs)
est lower upper
0.8649 0.7964 0.9334
AllRocAUC <- rbind(AllRocAUC,psDecor$aucs);

psDecorD <- predictionStats_binary(cbind(classoutcomes,
                                        predict(mLASSODecorD,datasetframe_test)),
                                "LASSO UPSTMD",cex=0.75)

LASSO UPSTMD

pander::pander(psDecorD$aucs)
est lower upper
0.8717 0.8067 0.9368
AllRocAUC <- rbind(AllRocAUC,psDecorD$aucs);

psDecor2 <- predictionStats_binary(cbind(classoutcomes,
                                         predict(mLASSODecor2,datasetframe_test)),
                                "LASSO UPSTM Spearman",cex=0.75)

LASSO UPSTM Spearman

pander::pander(psDecor2$aucs)
est lower upper
0.8507 0.7768 0.9246
AllRocAUC <- rbind(AllRocAUC,psDecor2$aucs);

FI: Comparing ROCAUC


rownames(AllRocAUC) <- c("NB:Raw","NB:PCA","NB:UPSTM_P","NB:UPSTMD_P","NB:UPSTM_S",
                         "LASSO:Raw","LASSO:PCA","LASSO:UPSTM_P","LASSO:UPSTMD_P","LASSO:UPSTM_S")
pander::pander(AllRocAUC)
  est lower upper
NB:Raw 0.7561 0.6709 0.8413
NB:PCA 0.6453 0.538 0.7525
NB:UPSTM_P 0.846 0.7699 0.922
NB:UPSTMD_P 0.8343 0.7549 0.9137
NB:UPSTM_S 0.8147 0.735 0.8944
LASSO:Raw 0.8769 0.8114 0.9424
LASSO:PCA 0.8056 0.7249 0.8864
LASSO:UPSTM_P 0.8649 0.7964 0.9334
LASSO:UPSTMD_P 0.8717 0.8067 0.9368
LASSO:UPSTM_S 0.8507 0.7768 0.9246
bpROCAUC <- barPlotCiError(as.matrix(AllRocAUC),
                          metricname = "ROCAUC",
                          thesets = "ROC AUC",
                          themethod = rownames(AllRocAUC),
                          main = "ROC AUC",
                          offsets = c(0.5,1),
                          scoreDirection = ">",
                          ho=0.5,
                          args.legend = list(bg = "white",x="bottomright",inset=c(0.0,0),cex=0.75),
                          col = terrain.colors(nrow(AllRocAUC))
                          )