Here I’ll show the impact of decorrelating high-dimensional data sets.
Crossvalidation Raw data
op <- par(no.readonly = TRUE,pty="m")
par(mfrow = c(1,2),cex = 0.5);
RAWOneFilteredSVM_ml <- filteredFit(Labels~.,
SetOnec,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
RAWTwoFilteredSVM_ml <- filteredFit(Labels~.,
SetTwoc,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
predSet2 <- predict(RAWOneFilteredSVM_ml,SetTwo)
bsFilteredSVM <- predictionStats_binary(cbind(SetTwo$Labels,predSet2),
"RAW Set One Filtered:SVM",
cex = 0.70)
predSet1 <- predict(RAWTwoFilteredSVM_ml,SetOne)
bsFilteredSVM <- predictionStats_binary(cbind(SetOne$Labels,predSet1),
"RAW Set Two Filtered:SVM",
cex = 0.70)

bsFilteredSVM$ROC.analysis$F1
bsFilteredSVM$ClassMetrics
par(op)
Mergning the two RAW testsets
predArcene <- cbind(ArceneSet$Labels,c(predSet1,predSet2))
bsBoothFilteredSVM <- predictionStats_binary(
predArcene,
"Raw Filtered:SVM",
cex = 0.90)
Raw Filtered:SVM 
pander::pander(bsBoothFilteredSVM$accc,caption="Accuracy")
Accuracy
0.7 |
0.6314 |
0.7626 |
pander::pander(bsBoothFilteredSVM$aucs,caption="ROC AUC")
Linear decorrelation
LMDecorrelatedOneO <- featureDecorrelation(ArceneSet[,vartoAdjust],refdata=SetOne[,vartoAdjust],thr=0.80,unipvalue=1.0e-4,type="NZLM")
|||||||||||6050 :||||||||||||||||4420 :||||||||||||||2676 :||||||||||||||||||1167 :|||||||||||||||||||||||||||||||290 :|||||||||||||||||||||59 :||||||||||||||||||||||28 :|||||||||||||||||||||18 :||4 :|||||||||||||||||||||||||||||||1 :
attr(LMDecorrelatedOneO, "featureMatrix") <- NULL
dataOneOneLMDecorrelatedOc <- cbind(Labels=SetOne$Labels,LMDecorrelatedOneO[OneIDS,])
dataOneTwoLMDecorrelatedO <- cbind(Labels=SetTwo$Labels,LMDecorrelatedOneO[TwoIDS,])
dataOneOneLMDecorrelatedOc$Labels <- as.factor(dataOneOneLMDecorrelatedOc$Labels)
LMDecorrelatedOne <- featureDecorrelation(ArceneSet,Outcome="Labels",refdata=SetOne,thr=0.80,unipvalue=1.0e-4,type="NZLM")
||||||||||6047 :||||||||4423 :||||||||||2675 :|||||||||||||||||||1170 :||||||||||||||||||||||||||||||||302 :|||||||||||||||||||||61 :||||||||||||||||||||||27 :||||||||||||||||||||||24 :||9 :|||||||||||||||||||||||||||||||1 :
attr(LMDecorrelatedOne, "featureMatrix") <- NULL
dataOneOneLMDecorrelatedc <- LMDecorrelatedOne[OneIDS,]
dataOneTwoLMDecorrelated <- LMDecorrelatedOne[TwoIDS,]
dataOneOneLMDecorrelatedc$Labels <- as.factor(dataOneOneLMDecorrelatedc$Labels)
LMDecorrelatedTwoO <- featureDecorrelation(ArceneSet[,vartoAdjust],refdata=SetTwo[,vartoAdjust],thr=0.80,unipvalue=1.0e-4,type="NZLM")
||||||5904 :||||||||||||||||4233 :|||||||||||||||||||||||||||||||||||||||||||||||2620 :|||||||||||||||||||||||||||||||||||1201 :|||||||||||||||||||||||||||||||341 :||||||||||||||||||||||||||||||||||||||64 :||||||||||||||||||||||||||||||||||||||22 :||||||||||||||||||||||||||||||||||||||14 :|||||||||||||||||||||||||||||||||||||||||||||1 :||||||||||||||||||||||||||||||||||||||||||||0 :
attr(LMDecorrelatedTwoO, "featureMatrix") <- NULL
dataTwoTwoLMDecorrelatedOc <- cbind(Labels=SetTwo$Labels,LMDecorrelatedTwoO[TwoIDS,])
dataTwoTwoLMDecorrelatedOc$Labels <- as.factor(dataTwoTwoLMDecorrelatedOc$Labels)
dataTwoOneLMDecorrelatedO <- cbind(Labels=SetOne$Labels,LMDecorrelatedTwoO[OneIDS,])
LMDecorrelatedTwo <- featureDecorrelation(ArceneSet,Outcome="Labels",refdata=SetTwo,thr=0.80,unipvalue=1.0e-4,type="NZLM")
|||||5904 :||||||||||||4235 :||||||||||||||||||||||||||||||||||||||||||||||||||2618 :||||||||||||||||||||||||||||||||||||||1196 :|||||||||||||||||||||||||||||||348 :||||||||||||||||||||||||||||||||||||||63 :||||||||||||||||||||||||||||||||||||||21 :||||||||||||||||||||||||||||||||||||||14 :||||||||||||||||||||||||||||||||||||||2 :||||||||||||||||||||||||||||||||||||||||||||0 :
attr(LMDecorrelatedTwo, "featureMatrix") <- NULL
dataTwoTwoLMDecorrelatedc <- LMDecorrelatedTwo[TwoIDS,]
dataTwoTwoLMDecorrelatedc$Labels <- as.factor(dataTwoTwoLMDecorrelatedc$Labels)
dataTwoOneLMDecorrelated <- LMDecorrelatedTwo[OneIDS,]
Crossvalidation Decorrelated
par(mfrow = c(2,2),cex = 0.5);
LMOneFilteredSVM_ml <- filteredFit(Labels~.,
dataOneOneLMDecorrelatedc,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
predSetOneTwo <- predict(LMOneFilteredSVM_ml,dataOneTwoLMDecorrelated)
bsOneFilteredSVM <- predictionStats_binary(
cbind(dataOneTwoLMDecorrelated$Labels,predSetOneTwo),
"LM Decorrelated One Filtered:SVM",
cex = 0.70)
LM Decorrelated One Filtered:SVM
pander::pander(bsOneFilteredSVM$accc,caption="Accuracy")
Accuracy
0.77 |
0.6751 |
0.8483 |
pander::pander(bsOneFilteredSVM$aucs,caption="ROC AUC")
LMOneFilteredSVM_ml <- filteredFit(Labels~.,
dataOneOneLMDecorrelatedOc,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
bsOneFilteredSVM <- predictionStats_binary(
cbind(dataOneTwoLMDecorrelatedO$Labels,
predict(LMOneFilteredSVM_ml,dataOneTwoLMDecorrelatedO)),
"NT Decorrelated One Filtered:SVM",
cex = 0.70)
NT Decorrelated One Filtered:SVM
pander::pander(bsOneFilteredSVM$accc,caption="Accuracy")
Accuracy
0.79 |
0.6971 |
0.8651 |
pander::pander(bsOneFilteredSVM$aucs,caption="ROC AUC")
LMTwoFilteredSVM_ml <- filteredFit(Labels~.,
dataTwoTwoLMDecorrelatedc,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
predSetTwoOne <- predict(LMTwoFilteredSVM_ml,dataTwoOneLMDecorrelated)
bsTwoFilteredSVM <- predictionStats_binary(
cbind(dataTwoOneLMDecorrelated$Labels,
predSetTwoOne),
"LM Decorrelated Two Filtered:SVM",
cex = 0.70)
LM Decorrelated Two Filtered:SVM
pander::pander(bsTwoFilteredSVM$accc,caption="Accuracy")
Accuracy
0.83 |
0.7418 |
0.8977 |
pander::pander(bsTwoFilteredSVM$aucs,caption="ROC AUC")
LMTwoFilteredSVM_ml <- filteredFit(Labels~.,
dataTwoTwoLMDecorrelatedOc,
fitmethod=e1071::svm,
filtermethod.control=list(pvalue=0.05,limit=150),
Scale="OrderLogit",
probability = TRUE,
scale=FALSE
)
bsTwoFilteredSVM <- predictionStats_binary(
cbind(dataTwoOneLMDecorrelatedO$Labels,
predict(LMTwoFilteredSVM_ml,dataTwoOneLMDecorrelatedO)),
"NT Decorrelated Two Filtered:SVM",
cex = 0.70)
NT Decorrelated Two Filtered:SVM 
pander::pander(bsTwoFilteredSVM$accc,caption="Accuracy")
Accuracy
0.82 |
0.7305 |
0.8897 |
pander::pander(bsTwoFilteredSVM$aucs,caption="ROC AUC")
par(op)
Mergning the two testsets
predArcene <- cbind(ArceneSet$Labels,c(predSetTwoOne,predSetOneTwo))
bsBoothFilteredSVM <- predictionStats_binary(
predArcene,
"Decorrelated Filtered:SVM",
cex = 0.90)
Decorrelated Filtered:SVM 
pander::pander(bsBoothFilteredSVM$accc,caption="Accuracy")
Accuracy
0.8 |
0.7378 |
0.8531 |
pander::pander(bsBoothFilteredSVM$aucs,caption="ROC AUC")