FRESA.CAD Benchmark

Stage C Data Set


    library(rpart)
    data(stagec)
    # Split the stages into several columns
    dataCancer <- cbind(stagec[,c(1:3,5:6)],
                        gleason4 = 1*(stagec[,7] == 4),
                        gleason5 = 1*(stagec[,7] == 5),
                        gleason6 = 1*(stagec[,7] == 6),
                        gleason7 = 1*(stagec[,7] == 7),
                        gleason8 = 1*(stagec[,7] == 8),
                        gleason910 = 1*(stagec[,7] >= 9),
                        eet = 1*(stagec[,4] == 2),
                        diploid = 1*(stagec[,8] == "diploid"),
                        tetraploid = 1*(stagec[,8] == "tetraploid"),
                        notAneuploid = 1 - 1*(stagec[,8] == "aneuploid"))
    #Impute missing values
dataCancerImputed <- nearestNeighborImpute(dataCancer)
dataCancerImputed$pgtime <- NULL

ExperimentName <- "Stage_C_Prostate"
bswimsReps <- 30;
theData <- dataCancerImputed;
theOutcome <- "pgstat";
reps <- 100;
fraction <- 0.9;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")


data(cancerVarNames)
rownames(cancerVarNames) <- (as.character(cancerVarNames[,1]))
dataComplete <- dataCancer[complete.cases(dataCancer),]

BSWiMS on a Binary Outcome

Model Building



BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

pm <- plot(BSWiMSMODEL$BSWiMS.model$bootCV)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy
Tumor grade 0.693 1.619 2 2.471 0.664
Gleason grade = 5 -0.716 0.359 0.489 0.665 0.589
Gleason grade = 9 | 10 0.818 1.926 2.265 2.664 0.664
Tetraploid DNA pattern 0.402 1.269 1.495 1.763 0.63
Gleason grade = 8 0.424 1.299 1.529 1.799 0.651
Diploid DNA pattern -0.517 0.487 0.597 0.73 0.651
Gleason grade = 7 0.314 1.184 1.368 1.582 0.605
Not aneuploid DNA pattern -0.407 0.565 0.666 0.784 0.651
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
Tumor grade 0.651 0.721 0.699 0.673
Gleason grade = 5 0.569 0.66 0.662 0.613
Gleason grade = 9 | 10 0.637 0.672 0.55 0.618
Tetraploid DNA pattern 0.605 0.677 0.63 0.631
Gleason grade = 8 0.651 0.672 0.562 0.606
Diploid DNA pattern 0.664 0.721 0.673 0.699
Gleason grade = 7 0.685 0.672 0.554 0.612
Not aneuploid DNA pattern 0.649 0.683 0.543 0.647
Table continues below
  full.AUC IDI NRI z.IDI z.NRI
Tumor grade 0.712 0.142 0.799 5.344 6.22
Gleason grade = 5 0.678 0.126 0.646 4.987 5.792
Gleason grade = 9 | 10 0.669 0.077 0.427 3.855 3.897
Tetraploid DNA pattern 0.681 0.072 0.539 3.564 3.838
Gleason grade = 8 0.669 0.068 0.466 3.558 3.822
Diploid DNA pattern 0.712 0.062 0.693 3.39 5.133
Gleason grade = 7 0.669 0.055 0.456 3.164 3.396
Not aneuploid DNA pattern 0.692 0.05 0.241 2.986 2.057
  Frequency
Tumor grade 1
Gleason grade = 5 1
Gleason grade = 9 | 10 0.967
Tetraploid DNA pattern 1
Gleason grade = 8 0.967
Diploid DNA pattern 1
Gleason grade = 7 0.967
Not aneuploid DNA pattern 0.667

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>><.>>><.>><.>><.>><.>>><.>><.>>><.>><.>>><.>>><.>><.>><.>><.><.>>>><.>><.>><.>>><.>><.>>><.>>><.>><.>><.>>><.>>><.>>>><.>>>><.>>><.>>><.><.>><.>>><.>>><.>><.>>><.>><.>><.>>>><.>><.>>><.>>><.>><.>>><.>><.>>><.>><.>><.>><.>><.>><.>><.>><.>><.>>><.>><.>>><.>>><.>>><.>><.>>><.>>>><.>>>><.>><.>><.>><.>>><.>>><.>><.>>><.>>><.>>><.>>>><.>>>><.>>><.>>><.>>><.>>><.>><.>>><.>>><.>><.>><.>>><.><.>><.>>><.>><.>><.>><.>>><.>>><.>>><.>>><.>><.>>><.>><.>>><.>><.>>><

pm <- plotModels.ROC(rcv$testPredictions)

pm <- plotModels.ROC(rcv$medianTest)


pROC::roc(rcv$medianTest[,1],rcv$medianTest[,2],plot=TRUE)

Call: roc.default(response = rcv\(medianTest[, 1], predictor = rcv\)medianTest[, 2], plot = TRUE)

Data: rcv\(medianTest[, 2] in 92 controls (rcv\)medianTest[, 1] 0) < 54 cases (rcv$medianTest[, 1] 1). Area under the curve: 0.7313


pander::pander(rcv$jaccard)
  • Jaccard.SM: 0.6677
  • averageLength: 5.9
pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.36 0.55 0.66 0.6645 0.7425 1.33

bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LOGIT",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy
# of cells in G2 phase -0.015 0.976 0.985 0.993 0.617
Tumor grade 1.163 1.63 3.201 6.285 0.664
Gleason grade = 5 -1.379 0.085 0.252 0.746 0.589
Diploid DNA pattern -1.118 0.155 0.327 0.689 0.651
Gleason grade = 9 | 10 0.903 1.09 2.468 5.585 0.664
Gleason grade = 8 0.02 0.782 1.02 1.331 0.651
Gleason grade = 7 -0.03 0.826 0.971 1.141 0.613
Age at diagnosis -0.002 0.996 0.998 1 0.545
Tetraploid DNA pattern 0.287 0.584 1.332 3.036 0.63
Not aneuploid DNA pattern -0.259 0.577 0.772 1.031 0.651
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
# of cells in G2 phase 0.72 0.723 0.589 0.717
Tumor grade 0.711 0.717 0.699 0.717
Gleason grade = 5 0.721 0.717 0.662 0.712
Diploid DNA pattern 0.702 0.718 0.673 0.709
Gleason grade = 9 | 10 0.714 0.718 0.55 0.715
Gleason grade = 8 0.718 0.718 0.562 0.718
Gleason grade = 7 0.718 0.716 0.556 0.719
Age at diagnosis 0.717 0.712 0.538 0.718
Tetraploid DNA pattern 0.718 0.717 0.63 0.717
Not aneuploid DNA pattern 0.718 0.718 0.543 0.717
Table continues below
  full.AUC IDI NRI z.IDI z.NRI
# of cells in G2 phase 0.72 0.037 0.429 2.634 3.032
Tumor grade 0.716 0.038 0.254 2.501 1.936
Gleason grade = 5 0.717 0.021 0.359 1.968 2.78
Diploid DNA pattern 0.717 0.014 0.124 1.081 0.933
Gleason grade = 9 | 10 0.717 0.006 -0.323 0.948 -2.358
Gleason grade = 8 0.718 0.002 0.006 0.514 0.064
Gleason grade = 7 0.718 0.002 0.13 0.468 0.902
Age at diagnosis 0.713 0.002 0.016 0.405 0.11
Tetraploid DNA pattern 0.717 0.001 0.011 0.255 0.111
Not aneuploid DNA pattern 0.717 0 0.053 -0.079 0.361
  Frequency
# of cells in G2 phase 0.12
Tumor grade 1
Gleason grade = 5 0.97
Diploid DNA pattern 0.98
Gleason grade = 9 | 10 0.74
Gleason grade = 8 0.31
Gleason grade = 7 0.24
Age at diagnosis 0.08
Tetraploid DNA pattern 0.88
Not aneuploid DNA pattern 0.55
gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Continous Outcome

Model Building

theOutcome = "g2"

BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

pm <- plot(BSWiMSMODEL$BSWiMS.model$bootCV)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower mean upper u.MSE
Tetraploid DNA pattern 4.24 4.24 4.24 4.24 36.55
Diploid DNA pattern -6.551 -6.551 -6.551 -6.551 45.29
Not aneuploid DNA pattern 7.105 7.105 7.105 7.105 63.16
Table continues below
  r.MSE model.MSE NeRI F.pvalue
Tetraploid DNA pattern 65.94 36.55 0.329 0
Diploid DNA pattern 63.16 36.49 0.301 0
Not aneuploid DNA pattern 45.29 36.49 -0.075 0
Table continues below
  t.pvalue Sign.pvalue Wilcox.pvalue
Tetraploid DNA pattern 0 0 0
Diploid DNA pattern 0 0 0
Not aneuploid DNA pattern 0.033 0.5 1
  Frequency
Tetraploid DNA pattern 1
Diploid DNA pattern 1
Not aneuploid DNA pattern 1

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><

pm <- plot(rcv$testPredictions[,c(1,3)])

pm <- plot(rcv$medianTest)


pander::pander(rcv$jaccard)
  • Jaccard.SM: 0.9119
  • averageLength: 3.22
pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.24 0.34 0.39 0.3802 0.42 0.49

bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LM",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower mean upper u.MSE
Tumor grade 1.813 1.813 1.813 1.813 62.45
Tetraploid DNA pattern 7.459 7.459 7.459 7.459 36.55
Diploid DNA pattern -2.761 -2.762 -2.761 -2.76 45.29
Not aneuploid DNA pattern 4.697 4.697 4.697 4.698 63.16
Table continues below
  r.MSE model.MSE NeRI F.pvalue
Tumor grade 36.49 35.45 0.096 0.02
Tetraploid DNA pattern 36.36 36.36 -0.122 0.5
Diploid DNA pattern 36.36 36.36 0.108 0.5
Not aneuploid DNA pattern 36.36 36.36 -0.131 0.5
Table continues below
  t.pvalue Sign.pvalue Wilcox.pvalue
Tumor grade 0.002 0.141 0.01
Tetraploid DNA pattern 0.812 0.5 1
Diploid DNA pattern 0.006 0.076 0.017
Not aneuploid DNA pattern 0.545 0.5 1
  Frequency
Tumor grade 0.13
Tetraploid DNA pattern 1
Diploid DNA pattern 1
Not aneuploid DNA pattern 1
gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Ordinal Outcome

Model Building

theOutcome = "grade"
theData$grade[theData$grade < 2] <- 2

BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

pander::pander(sm[[1]]$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy r.Accuracy
gleason5 -2.011 0.116 0.134 0.154 0.825 0.615
gleason7 2.706 0.574 14.97 390.3 0.664 0.641
gleason8 12.03 122502 167706 229590 0.566 0.723
gleason4 -7.323 0 0.001 0.001 0.615 0.825
gleason910 8.358 3624 4266 5021 0.462 0.804
pgstat 0.333 1.258 1.395 1.546 0.657 0.804
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
gleason5 0.867 0.797 0.549 0.846 0.468 1.357
gleason7 0.833 0.703 0.681 0.846 0.329 1.189
gleason8 0.833 0.622 0.748 0.846 0.231 0.929
gleason4 0.867 0.549 0.797 0.846 0.106 1.379
gleason910 0.839 0.53 0.825 0.856 0.073 0.616
pgstat 0.82 0.682 0.825 0.828 0.063 0.711
  z.IDI z.NRI Frequency
gleason5 16.99 18.53 0.5
gleason7 12.63 16.56 0.5
gleason8 9.895 12.54 0.5
gleason4 6.188 18.89 0.5
gleason910 5.055 11.64 0.333
pgstat 4.543 7.413 0.167

pander::pander(sm[[2]]$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy r.Accuracy
gleason910 2.179 7.721 8.838 10.12 0.957 0.052
pgstat 10.03 22688 22688 22688 0.623 0.052
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
gleason910 0.957 0.82 0.5 0.82 0.45 1.276
pgstat 0.623 0.801 0.5 0.801 0.431 1.205
  z.IDI z.NRI Frequency
gleason910 18.92 18.96 0.5
pgstat 18.19 18.23 0.5

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.><.><.><.><.>>><.><.><.><.><.><.><.><.>>><.><.>><.><.>><.><.>>><.><.><.>>><.>><.><.>><.>><.><.><.>><

pm <- boxplot(rcv$testPredictions[,3]~rcv$testPredictions[,1])

pm <- boxplot(rcv$medianTest[,2]~rcv$medianTest[,1])


pander::pander(rcv$jaccard)
  • Jaccard.SM: 0.7368
  • averageLength: 5.87
pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.17 1.547 1.78 1.852 2.122 3.59

bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LM",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower mean upper u.MSE r.MSE
gleason5 -0.801 -0.94 -0.801 -0.662 0.192 0.196
gleason4 -0.784 -0.861 -0.784 -0.708 0.3 0.138
gleason910 0.599 0.481 0.599 0.717 0.272 0.131
gleason6 -0.273 -0.345 -0.273 -0.201 0.317 0.123
pgstat 0.156 0.131 0.156 0.181 0.256 0.12
gleason8 0.175 0.075 0.175 0.275 0.28 0.119
g2 0 0 0 0 0.303 0.11
age 0 0 0 0 0.315 0.108
Table continues below
  model.MSE NeRI F.pvalue t.pvalue Sign.pvalue
gleason5 0.114 0.547 0 0 0
gleason4 0.109 0.157 0 0 0.007
gleason910 0.114 -0.095 0 0.081 0.424
gleason6 0.111 0.155 0 0 0.011
pgstat 0.115 0.199 0.004 0.05 0.003
gleason8 0.114 -0.206 0.007 0.531 0.487
g2 0.109 0.026 0.315 0.539 0.356
age 0.108 -0.021 0.331 0.661 0.52
  Wilcox.pvalue Frequency
gleason5 0 1
gleason4 0 0.82
gleason910 0.557 0.95
gleason6 0.008 0.85
pgstat 0.049 0.93
gleason8 0.877 0.85
g2 0.456 0.17
age 0.658 0.13
gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Survival Outcome

Model Building


BSWiMSMODEL <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1,data = dataComplete,NumberofRepeats = bswimsReps)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy
Tumor grade 0.729 1.577 2.074 2.727 0.664
Gleason grade = 5 -0.597 0.335 0.551 0.904 0.575
Tetraploid DNA pattern 0.252 1.079 1.287 1.534 0.634
Diploid DNA pattern -0.263 0.597 0.768 0.989 0.657
Not aneuploid DNA pattern -0.459 0.522 0.632 0.765 0.657
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
Tumor grade 0.606 0.672 0.701 0.65
Gleason grade = 5 0.557 0.617 0.652 0.616
Tetraploid DNA pattern 0.632 0.64 0.638 0.57
Diploid DNA pattern 0.576 0.668 0.673 0.642
Not aneuploid DNA pattern 0.627 0.655 0.535 0.64
Table continues below
  full.AUC IDI NRI z.IDI z.NRI
Tumor grade 0.703 0.114 0.799 4.618 5.988
Gleason grade = 5 0.671 0.092 0.56 4.247 4.761
Tetraploid DNA pattern 0.669 0.055 0.659 3.958 4.612
Diploid DNA pattern 0.693 0.042 0.7 3.561 4.951
Not aneuploid DNA pattern 0.674 0.041 0.689 3.234 4.848
  Frequency
Tumor grade 1
Gleason grade = 5 1
Tetraploid DNA pattern 1
Diploid DNA pattern 1
Not aneuploid DNA pattern 0.8

Cross Validation Performance

rcv <- randomCV(dataComplete, formula(Surv(pgtime, pgstat) ~ .),BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>>><.>>>><.>>>><.>>><.>>><.>>><.>>><.>>><.>><.>><.><.>>><.>>><.>>><.>>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>><.>>><.>>><.>><.><.>>><.>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>>><.>><.>>><.>>><.>>>><.>><.>>>><.>>>><.>><.>><.>><.>><.>>><.>>>><.>><.>><.>><.>>>><.>>><.>><.>>><.>>><.>>><.>>>><.>>>><.>><.>>><.>>><.>>><.>><.>>><.>>><.>>>><.>>><.>>><.>>><.>><.>>><.>>><.>>><.>><.>>><.>>><.>>><.>><.>>>><.>><.>>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>>><.>><.>>><.>>>><.>><.>>><.>>><.>>><.>><.>><.>><.>><


pm <- plotModels.ROC(rcv$testPredictions)

pm <- plotModels.ROC(rcv$medianTest)


pander::pander(rcv$jaccard)
  • Jaccard.SM: 0.6734
  • averageLength: 5.84
pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.23 0.5 0.58 0.5789 0.6425 0.94

bm <- baggedModel(rcv$selectedFeaturesSet,dataComplete,"COX",Outcome = "pgstat",timeOutcome = "pgtime" ,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)
Prostate (continued below)
  Estimate lower OR upper u.Accuracy
Tumor grade 1.226 1.918 3.407 6.051 0.664
# of cells in G2 phase -0.009 0.988 0.991 0.995 0.546
Gleason grade = 5 -0.794 0.162 0.452 1.265 0.575
Diploid DNA pattern -1.012 0.156 0.364 0.85 0.657
Gleason grade = 8 0 0.82 1 1.22 0.649
Gleason grade = 7 0.013 0.881 1.013 1.165 0.617
Tetraploid DNA pattern -0.527 0.266 0.591 1.311 0.634
Gleason grade = 9 | 10 0.22 0.887 1.246 1.752 0.662
Not aneuploid DNA pattern -0.105 0.536 0.9 1.512 0.655
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
Tumor grade 0.658 0.676 0.701 0.686
# of cells in G2 phase 0.673 0.694 0.527 0.704
Gleason grade = 5 0.677 0.676 0.652 0.704
Diploid DNA pattern 0.674 0.676 0.673 0.707
Gleason grade = 8 0.678 0.682 0.555 0.708
Gleason grade = 7 0.679 0.681 0.56 0.708
Tetraploid DNA pattern 0.676 0.676 0.638 0.707
Gleason grade = 9 | 10 0.678 0.68 0.544 0.707
Not aneuploid DNA pattern 0.676 0.676 0.535 0.707
Table continues below
  full.AUC IDI NRI z.IDI z.NRI
Tumor grade 0.707 0.05 0.252 2.812 1.89
# of cells in G2 phase 0.716 0.027 0.406 2.642 2.746
Gleason grade = 5 0.708 0.019 0.362 2.19 2.777
Diploid DNA pattern 0.707 0.006 0.128 0.579 0.848
Gleason grade = 8 0.709 0.001 0.116 0.476 0.811
Gleason grade = 7 0.709 0.002 0.061 0.436 0.405
Tetraploid DNA pattern 0.708 0.001 -0.022 0.267 -0.142
Gleason grade = 9 | 10 0.708 0 -0.322 -0.012 -2.235
Not aneuploid DNA pattern 0.707 0 -0.02 -0.068 -0.097
  Frequency
Tumor grade 1
# of cells in G2 phase 0.12
Gleason grade = 5 0.98
Diploid DNA pattern 0.97
Gleason grade = 8 0.37
Gleason grade = 7 0.29
Tetraploid DNA pattern 0.89
Gleason grade = 9 | 10 0.43
Not aneuploid DNA pattern 0.74
gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Benchmark

theOutcome <- "pgstat";

cp <- BinaryBenchmark(theData,theOutcome,reps,fraction)



hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 

Results

Classifier Results


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
0.5833 0.1164 0.0062 0.1917 0.0092 0.0063 0.9131
learningTime <- -1*cp$cpuElapsedTimes

bp <- barPlotCiError(as.matrix(cp$errorciTable),metricname = "Balanced Error",thesets = cp$thesets,themethod = cp$theMethod,main = "Balanced Error",offsets = c(0.5,0.05),scoreDirection = "<",args.legend = list(x = "topright"),col = terrain.colors(length(cp$theMethod)))


pander::pander(bp$ciTable$mean,caption = "Balanced Error Rate",round = 3)
BSWiMS RF RPART LASSO SVM KNN ENS
0.3188 0.3351 0.3291 0.3118 0.377 0.3035 0.3281
testBalancedError <- -bp$ciTable$mean;
testBalancedErrormin <- min(-bp$ciTable$top95)
testBalancedErrormax <- max(-bp$ciTable$low95)

bp <- barPlotCiError(as.matrix(cp$accciTable),metricname = "Accuracy",thesets = cp$thesets,themethod = cp$theMethod,main = "Accuracy",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"),col = terrain.colors(length(cp$theMethod)))

pander::pander(bp$ciTable$mean,caption = "Accuracy",round = 3)
LASSO BSWiMS RF RPART KNN ENS SVM
0.6986 0.6849 0.6644 0.6575 0.6849 0.6781 0.6164
testAccuracy <- bp$ciTable$mean;
testAccuracymin <- min(bp$ciTable$low95)
testAccuracymax <- max(bp$ciTable$top95)

bp <- barPlotCiError(as.matrix(cp$aucTable),metricname = "AUC",thesets = cp$thesets,themethod = cp$theMethod,main = "ROC AUC",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"),col = terrain.colors(length(cp$theMethod)))


pander::pander(bp$ciTable$mean,caption = "ROC AUC",round = 3)
BSWiMS RF RPART LASSO SVM KNN ENS
0.7303 0.725 0.7014 0.7407 0.6904 0.7126 0.7075
testAUC <- bp$ciTable$mean;
testAUCmin <- min(bp$ciTable$low95)
testAUCmax <- max(bp$ciTable$top95)

Filter Results

pander::pander(cp$featsize)
Table continues below
BSWiMS LASSO RPART RF.ref IDI NRI t-test Wilcoxon Spearman
6.16 6.03 12.63 6.16 9.05 9.15 7.44 6.49 6.27
Kendall mRMR
6.35 10.44

par(mfrow = c(1,2))
barplot(cp$jaccard[order(-cp$jaccard)],las = 2,cex.axis = 1,cex.names = 0.7,main = "Jaccard Index",ylab = "Jaccard")
selectJaccard <- cp$jaccard[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")]
unsize <- cp$featsize + 1
barplot(unsize[order(unsize)],las = 2,cex.axis = 1,cex.names = 0.7,log = "y",main = "Number of Features",ylab = "# of Features+1")

selectFilFeatsize <- -cp$featsize[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")]
par(mfrow = c(1,1))


bp <- barPlotCiError(as.matrix(cp$errorciTable_filter),metricname = "Balanced Error",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "Balanced Error",scoreDirection = "<",args.legend = list(x = "topleft"),col = terrain.colors(length(cp$theClassMethod)))


pander::pander(bp$ciTable$mean,caption = "Balanced Error Rate",round = 3)
Balanced Error Rate (continued below)
  BSWiMS RF.ref IDI NRI t-test Wilcoxon
NC Spearman 0.3 0.3 0.31 0.323 0.332 0.325
SVM 0.311 0.313 0.324 0.331 0.333 0.31
NC RSS 0.308 0.336 0.324 0.337 0.337 0.347
KNN 0.301 0.318 0.327 0.301 0.301 0.345
Naive Bayes 0.5 0.5 0.5 0.5 0.5 0.5
  Spearman Kendall RPART mRMR LASSO
NC Spearman 0.302 0.317 0.3 0.287 0.319
SVM 0.326 0.32 0.355 0.358 0.335
NC RSS 0.351 0.371 0.327 0.377 0.341
KNN 0.305 0.312 0.345 0.356 0.391
Naive Bayes 0.5 0.5 0.504 0.509 0.5
testFilBalancedError <- -apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilBalancedErrormax <- -min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilBalancedErrormin <- -max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

bp <- barPlotCiError(as.matrix(cp$accciTable_filter),metricname = "Accuracy",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "Accuracy",offsets = c(0.5,0.05),args.legend = list(x = "bottomleft"),col = terrain.colors(length(cp$theClassMethod)))


pander::pander(bp$ciTable$mean,caption = "Accuracy",round = 3)
Accuracy (continued below)
  BSWiMS RF.ref Wilcoxon Spearman IDI NRI
NC Spearman 0.699 0.699 0.658 0.692 0.692 0.685
SVM 0.685 0.692 0.692 0.671 0.664 0.664
NC RSS 0.699 0.692 0.678 0.678 0.664 0.658
KNN 0.678 0.658 0.637 0.664 0.651 0.678
Naive Bayes 0.5 0.5 0.5 0.5 0.5 0.5
  t-test Kendall RPART LASSO mRMR
NC Spearman 0.678 0.678 0.699 0.685 0.705
SVM 0.658 0.678 0.63 0.664 0.63
NC RSS 0.658 0.658 0.651 0.671 0.616
KNN 0.678 0.664 0.623 0.589 0.61
Naive Bayes 0.5 0.5 0.37 0.5 0.363
testFilAccuracy <- apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilAccuracymin <- min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilAccuracymax <- max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

bp <- barPlotCiError(as.matrix(cp$aucTable_filter),metricname = "AUC",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "ROC AUC",offsets = c(0.5,0.05),args.legend = list(x = "bottomleft"),col = terrain.colors(length(cp$theClassMethod)))

pander::pander(bp$ciTable$mean,caption = "ROC AUC",round = 3)
ROC AUC (continued below)
  BSWiMS LASSO RF.ref IDI t-test Wilcoxon
KNN 0.718 0.704 0.736 0.712 0.708 0.744
NC RSS 0.702 0.695 0.688 0.699 0.707 0.686
NC Spearman 0.738 0.698 0.739 0.715 0.699 0.723
SVM 0.734 0.721 0.736 0.711 0.71 0.741
Naive Bayes 0.5 0.5 0.5 0.5 0.5 0.5
  Spearman Kendall RPART NRI mRMR
KNN 0.716 0.738 0.673 0.703 0.671
NC RSS 0.678 0.676 0.672 0.708 0.682
NC Spearman 0.712 0.738 0.707 0.726 0.731
SVM 0.742 0.74 0.675 0.708 0.691
Naive Bayes 0.5 0.5 0.505 0.454 0.408
testFilAUC <- apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilAUCmin <- min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilAUCmax <- max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

Radar Plots

op <- par(no.readonly = TRUE)

library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

classRanks <- c(testBalancedErrormax,testAccuracymax,testAUCmax,max(learningTime))
classRanks <- rbind(classRanks,c(testBalancedErrormin,0,0,min(learningTime)))

classRanks <- as.data.frame(rbind(classRanks,cbind(testBalancedError[names(learningTime)],testAccuracy[names(learningTime)],testAUC[names(learningTime)],learningTime)))
colnames(classRanks) <- c("B.Error","Accuracy","ROC AUC","CPU Time")

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1) , rgb(1.0,0.0,1.0,0.1), rgb(0.0,0.0,0.0,0.1) )
radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = 4,plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filterRanks <- c(max(selectJaccard),max(selectFilFeatsize),testFilBalancedErrormax, testAccuracymax,testAUCmax);

filterRanks <- rbind(filterRanks,c(0,min(selectFilFeatsize),testFilBalancedErrormin, 0,0));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(selectJaccard,selectFilFeatsize,testFilBalancedError, testFilAccuracy,testFilAUC)));
colnames(filterRanks) <- c("Jaccard","Size","B.Error","Accuracy","ROC.ACU")

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0),rgb(1.0,0.0,1.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1), rgb(1.0,0.0,1.0,0.1)  )
radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = 4,plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))

detach("package:fmsb", unload=TRUE)

par(op)

Feature Analysis



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.1,]
gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.5)



topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network")

pander::pander(summary(BSWiMSMODEL$bagging$bagged.model,caption="Colon",round = 3))
  • coefficients:

    Table continues below
      Estimate lower OR upper u.Accuracy
    grade 0.7293 1.577 2.074 2.727 0.6642
    gleason5 -0.5967 0.3353 0.5507 0.9044 0.5746
    tetraploid 0.2521 1.079 1.287 1.534 0.6343
    diploid -0.2634 0.5971 0.7684 0.9888 0.6567
    notAneuploid -0.4586 0.5222 0.6321 0.7652 0.6567
    Table continues below
      r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
    grade 0.6057 0.6717 0.7007 0.6502 0.7031
    gleason5 0.5572 0.6167 0.6517 0.6158 0.6714
    tetraploid 0.6321 0.6403 0.6383 0.57 0.6693
    diploid 0.5756 0.6677 0.6732 0.6423 0.6926
    notAneuploid 0.6269 0.6553 0.5349 0.64 0.6736
      IDI NRI z.IDI z.NRI Frequency
    grade 0.1144 0.7987 4.618 5.988 1
    gleason5 0.09202 0.56 4.247 4.761 1
    tetraploid 0.05462 0.6588 3.958 4.612 1
    diploid 0.04217 0.6996 3.561 4.951 1
    notAneuploid 0.04113 0.6889 3.234 4.848 0.8
  • Accuracy: 0.01493
  • tAUC: NA
  • bootstrap:



hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)

[1] 2

vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ,caption = "Features",round = 4)
Features
  cohortMean cohortStd kendall.r kendall.p Frequency
grade 61 79 0.4187 0 1
gleason5 108 38 -0.3574 0 0.9927
diploid 79 67 -0.3354 1e-04 0.9882
tetraploid 78 68 0.2517 0.0024 0.8364
g2 14.04 8.149 0.1644 0.0158 0.6573
gleason910 139 7 0.2265 0.0064 0.6336
gleason8 125 21 0.1711 0.0393 0.5818
notAneuploid 11 135 -0.1576 0.0578 0.5373
gleason7 108 38 0.1276 0.1245 0.4627
age 63 5.822 -0.0812 0.2431 0.4082
gleason4 140 6 -0.0157 0.8504 0.2664
eet 36 110 -0.0225 0.786 0.2364
gleason6 112 34 0.0143 0.8637 0.2327