FRESA.CAD Benchmark

Stage C Data Set


    library(rpart)
    data(stagec)
    
  options(na.action='na.pass')
    stagec_mat <- as.data.frame(model.matrix(Surv(pgtime, pgstat)~.*.,stagec))
    stagec_mat$`(Intercept)` <- NULL
  stagec_mat$pgstat <- stagec$pgstat
  stagec_mat$pgtime <- stagec$pgtime
  fnames <- colnames(stagec_mat)
  fnames <- str_replace_all(fnames,":","__")
  colnames(stagec_mat) <- fnames

  dataCancerImputed <- nearestNeighborImpute(stagec_mat)

  ExperimentName <- "Stage_C_Prostate"
  bswimsReps <- 20;
  theData <- dataCancerImputed;
  theOutcome <- "pgstat";
  reps <- 75;
  fraction <- 0.8;
  theData[,1:ncol(theData)] <- sapply(theData,as.numeric)

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")

BSWiMS on a Survival Outcome

Model Building


BSWiMSMODEL <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1,data = dataCancerImputed,NumberofRepeats = bswimsReps)

Benchmark


rcv <- randomCV(dataCancerImputed, formula(Surv(pgtime, pgstat) ~ .),BSWiMS.model,trainFraction = fraction, repetitions = reps)

theOutcome <- "pgstat";
theData$pgtime <- NULL;

#par(pty='s',cex=0.5);
par(mfrow = c(2,2));
cp <- BinaryBenchmark(theData,theOutcome,reps,fraction)

par(mfrow = c(1,1),cex=1.0);

Results

Classifier Results


par(mfrow = c(2,2));
pmr <- plotModels.ROC(rcv$medianTest,cex=0.75)
pmr <- plotModels.ROC(rcv$medianTrain,cex=0.75)
pmr <- plotModels.ROC(rcv$testPredictions,cex=0.75)
pmr <- plotModels.ROC(rcv$trainPredictions,cex=0.75)

par(mfrow = c(1,1),cex=1.0);

hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45) 


psb <- predictionStats_binary(rcv$medianTest)
pander::pander(psb$CM.analysis$tab)
  Outcome + Outcome - Total
Test + 44 40 84
Test - 10 52 62
Total 54 92 146
pander::pander(summary(psb$CM.analysis))
  est lower upper
aprev 0.5753 0.4909 0.6567
tprev 0.3699 0.2915 0.4536
se 0.8148 0.6857 0.9075
sp 0.5652 0.4578 0.6683
diag.acc 0.6575 0.5745 0.734
diag.or 5.72 2.568 12.74
nnd 2.631 1.737 6.968
youden 0.38 0.1435 0.5758
ppv 0.5238 0.4119 0.634
npv 0.8387 0.7233 0.9198
plr 1.874 1.437 2.444
nlr 0.3276 0.1821 0.5896


#The Times
pander::pander(cp$cpuElapsedTimes)
BSWiMS RF RPART LASSO SVM KNN ENS
1.63 0.074 0.0096 1.489 0.0136 0.0104 3.226
learningTime <- -1*cp$cpuElapsedTimes
par(mfrow = c(2,1));
par(pty="m")
pr <- plot(cp)

Radar Plots

op <- par(no.readonly = TRUE)

#library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

mNames <- names(cp$cpuElapsedTimes)

classRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],min(cp$cpuElapsedTimes))
classRanks <- rbind(classRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,max(cp$cpuElapsedTimes)))
classRanks <- as.data.frame(rbind(classRanks,cbind(t(pr$metrics[c("BER","ACC","AUC","SEN","SPE"),mNames]),cp$cpuElapsedTimes)))
colnames(classRanks) <- c("BER","ACC","AUC","SEN","SPE","CPU")

classRanks$BER <- -classRanks$BER
classRanks$CPU <- -classRanks$CPU

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filnames <- c("BSWiMS","LASSO","RF.ref","IDI","t-test","Kendall","mRMR")

filterRanks <- c(pr$minMaxMetrics$BER[1],pr$minMaxMetrics$ACC[2],pr$minMaxMetrics$AUC[2],pr$minMaxMetrics$SEN[2],pr$minMaxMetrics$SPE[2],max(cp$jaccard),min(cp$featsize));

filterRanks <- rbind(filterRanks,c(pr$minMaxMetrics$BER[2],0,0,0,0,min(cp$jaccard),max(cp$featsize)));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(t(pr$metrics_filter[c("BER","ACC","AUC","SEN","SPE"),filnames]),cp$jaccard[filnames],cp$featsize[filnames])));
colnames(filterRanks) <- c("BER","ACC","AUC","SEN","SPE","Jaccard","SIZE")
filterRanks$BER <- -filterRanks$BER
filterRanks$SIZE <- -filterRanks$SIZE

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.05), rgb(0.0,1.0,0.0,0.05) , rgb(0.0,0.0,1.0,0.05),rgb(1.0,1.0,0.0,0.05), rgb(0.0,1.0,1.0,0.05) , rgb(1.0,0.0,1.0,0.05), rgb(0.0,0.0,0.0,0.05) )
fmsb::radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = c(6,2,2,2,2,2,2),plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.6,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


#detach("package:fmsb", unload=TRUE)

par(mfrow = c(1,1))
par(op)

Feature Analysis



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.1,]
gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.5)



topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network")

pander::pander(summary(BSWiMSMODEL$bagging$bagged.model,caption="Colon",round = 3))
  • coefficients:

    Table continues below
      Estimate lower OR upper
    **grade__gleason** 0.01852 1.015 1.019 1.024
    grade 0.2176 1.184 1.243 1.326
    **age__grade** 0.009535 1.007 1.01 1.012
    gleason 0.04869 1.037 1.05 1.067
    **grade__ploidytetraploid** 0.07713 1.05 1.08 1.118
    **eet__grade** 0.05537 1.04 1.057 1.083
    ploidytetraploid 0.4089 1.308 1.505 1.816
    **g2__ploidyaneuploid** 0.03803 1.022 1.039 1.057
    **gleason__ploidytetraploid** 0.0002206 1 1 1
    age -0.04036 0.9445 0.9604 0.9791
    **eet__ploidytetraploid** -0.208 0.7479 0.8122 0.9128
    Table continues below
      u.Accuracy r.Accuracy full.Accuracy
    **grade__gleason** 0.6781 0.3699 0.6781
    grade 0.6644 0.3699 0.6644
    **age__grade** 0.6349 0.5475 0.6654
    gleason 0.6712 0.3699 0.6712
    **grade__ploidytetraploid** 0.6301 0.6507 0.6515
    **eet__grade** 0.6438 0.5651 0.6598
    ploidytetraploid 0.6301 0.6284 0.6564
    **g2__ploidyaneuploid** 0.6507 0.6301 0.6515
    **gleason__ploidytetraploid** 0.6301 0.3699 0.6301
    age 0.5475 0.6349 0.6654
    **eet__ploidytetraploid** 0.6301 0.6136 0.6564
    Table continues below
      u.AUC r.AUC full.AUC IDI NRI
    **grade__gleason** 0.6929 0.5 0.6929 0.1732 0.7685
    grade 0.6993 0.5 0.6993 0.1781 0.7822
    **age__grade** 0.651 0.539 0.6993 0.1726 0.7786
    gleason 0.6703 0.5 0.6703 0.1278 0.6884
    **grade__ploidytetraploid** 0.63 0.5431 0.6719 0.09336 0.6395
    **eet__grade** 0.6333 0.5975 0.6743 0.09865 0.6938
    ploidytetraploid 0.63 0.6287 0.6735 0.08106 0.5598
    **g2__ploidyaneuploid** 0.5431 0.63 0.6719 0.05404 0.7532
    **gleason__ploidytetraploid** 0.63 0.5 0.63 0.05274 0.5399
    age 0.539 0.651 0.6993 0.07296 0.3663
    **eet__ploidytetraploid** 0.63 0.614 0.6735 0.06368 0.3877
      z.IDI z.NRI Frequency
    **grade__gleason** 6.737 5.782 1
    grade 6.625 6.039 1
    **age__grade** 6.513 6.02 1
    gleason 5.978 5.004 1
    **grade__ploidytetraploid** 5.325 4.616 0.8
    **eet__grade** 4.522 5.145 0.2
    ploidytetraploid 4.268 4.3 0.2
    **g2__ploidyaneuploid** 4.196 5.607 0.8
    **gleason__ploidytetraploid** 4.19 3.808 0.1
    age 4.122 2.55 1
    **eet__ploidytetraploid** 3.493 2.925 0.2
  • Accuracy: 0.6644
  • tAUC: 0.6993
  • sensitivity: 0.8333
  • specificity: 0.5652
  • bootstrap:



hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)


vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LOGIT",rankingTest = "zIDI",uniType = "Binary")[,c("controlMean","controlStd","caseMean","caseStd","ROCAUC","WilcoxRes.p")] 

cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ[1:topFeat,],caption = "Features",round = 4)
Features (continued below)
  controlMean controlStd caseMean caseStd
**grade__gleason** 14.82 5.759 21.19 7.111
gleason 5.929 1.203 7.019 1.296
grade 2.413 0.5376 2.944 0.529
**age__grade** 153.3 36.75 182.6 36.5
**age__gleason** 376.5 85.4 435.1 87.48
**g2__grade** 33.92 25.96 43.73 27.78
**grade__ploidytetraploid** 0.9457 1.278 1.815 1.468
**g2__gleason** 84.19 67.96 103.6 62.91
**gleason__ploidytetraploid** 2.342 3.161 4.352 3.503
**eet__gleason** 10.41 3.279 12.13 3.64
**eet__grade** 4.25 1.411 5.093 1.545
ploidytetraploid 58 34 20 34
  ROCAUC WilcoxRes.p Frequency
**grade__gleason** 0.7433 0 0.9573
gleason 0.7292 0 0.8387
grade 0.7252 0 0.892
**age__grade** 0.6998 0 0.8867
**age__gleason** 0.6905 1e-04 0.8427
**g2__grade** 0.6649 5e-04 0.8067
**grade__ploidytetraploid** 0.6631 3e-04 0.7387
**g2__gleason** 0.6625 5e-04 0.7707
**gleason__ploidytetraploid** 0.6607 2e-04 0.7307
**eet__gleason** 0.6386 0.001 0.7133
**eet__grade** 0.6349 0.003 0.7773
ploidytetraploid 0.63 0.0419 0.824