stage C prostate

Stage C Data Set


    library(rpart)
    data(stagec)
    # Split the stages into several columns
    dataCancer <- cbind(stagec[,c(1:3,5:6)],
                        gleason4 = 1*(stagec[,7] == 4),
                        gleason5 = 1*(stagec[,7] == 5),
                        gleason6 = 1*(stagec[,7] == 6),
                        gleason7 = 1*(stagec[,7] == 7),
                        gleason8 = 1*(stagec[,7] == 8),
                        gleason910 = 1*(stagec[,7] >= 9),
                        eet = 1*(stagec[,4] == 2),
                        diploid = 1*(stagec[,8] == "diploid"),
                        tetraploid = 1*(stagec[,8] == "tetraploid"),
                        notAneuploid = 1 - 1*(stagec[,8] == "aneuploid"))
    #Impute missing values
dataCancerImputed <- nearestNeighborImpute(dataCancer)
dataCancerImputed$pgtime <- NULL

ExperimentName <- "Stage_C_Prostate"
bswimsReps <- 30;
theData <- dataCancerImputed;
theOutcome <- "pgstat";
reps <- 100;
fraction <- 0.9;

FRESAFileName <- paste(ExperimentName,"FRESAMethod.RDATA",sep = "_")
CVFileName <- paste(ExperimentName,"CVMethod.RDATA",sep = "_")


data(cancerVarNames)
rownames(cancerVarNames) <- (as.character(cancerVarNames[,1]))
dataComplete <- dataCancer[complete.cases(dataCancer),]

BSWiMS on a Binary Outcome

Model Building



BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

pm <- plot(BSWiMSMODEL$BSWiMS.model$bootCV)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy
Tumor grade	0.693	1.619	2	2.471	0.664
Gleason grade = 5	-0.716	0.359	0.489	0.665	0.589
Gleason grade = 9 \| 10	0.818	1.926	2.265	2.664	0.664
Tetraploid DNA pattern	0.402	1.269	1.495	1.763	0.63
Gleason grade = 8	0.424	1.299	1.529	1.799	0.651
Diploid DNA pattern	-0.517	0.487	0.597	0.73	0.651
Gleason grade = 7	0.314	1.184	1.368	1.582	0.605
Not aneuploid DNA pattern	-0.407	0.565	0.666	0.784	0.651

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
Tumor grade	0.651	0.721	0.699	0.673
Gleason grade = 5	0.569	0.66	0.662	0.613
Gleason grade = 9 \| 10	0.637	0.672	0.55	0.618
Tetraploid DNA pattern	0.605	0.677	0.63	0.631
Gleason grade = 8	0.651	0.672	0.562	0.606
Diploid DNA pattern	0.664	0.721	0.673	0.699
Gleason grade = 7	0.685	0.672	0.554	0.612
Not aneuploid DNA pattern	0.649	0.683	0.543	0.647

Table continues below
	full.AUC	IDI	NRI	z.IDI	z.NRI
Tumor grade	0.712	0.142	0.799	5.344	6.22
Gleason grade = 5	0.678	0.126	0.646	4.987	5.792
Gleason grade = 9 \| 10	0.669	0.077	0.427	3.855	3.897
Tetraploid DNA pattern	0.681	0.072	0.539	3.564	3.838
Gleason grade = 8	0.669	0.068	0.466	3.558	3.822
Diploid DNA pattern	0.712	0.062	0.693	3.39	5.133
Gleason grade = 7	0.669	0.055	0.456	3.164	3.396
Not aneuploid DNA pattern	0.692	0.05	0.241	2.986	2.057

	Frequency
Tumor grade	1
Gleason grade = 5	1
Gleason grade = 9 \| 10	0.967
Tetraploid DNA pattern	1
Gleason grade = 8	0.967
Diploid DNA pattern	1
Gleason grade = 7	0.967
Not aneuploid DNA pattern	0.667

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>><.>>><.>><.>><.>><.>>><.>><.>>><.>><.>>><.>>><.>><.>><.>><.><.>>>><.>><.>><.>>><.>><.>>><.>>><.>><.>><.>>><.>>><.>>>><.>>>><.>>><.>>><.><.>><.>>><.>>><.>><.>>><.>><.>><.>>>><.>><.>>><.>>><.>><.>>><.>><.>>><.>><.>><.>><.>><.>><.>><.>><.>><.>>><.>><.>>><.>>><.>>><.>><.>>><.>>>><.>>>><.>><.>><.>><.>>><.>>><.>><.>>><.>>><.>>><.>>>><.>>>><.>>><.>>><.>>><.>>><.>><.>>><.>>><.>><.>><.>>><.><.>><.>>><.>><.>><.>><.>>><.>>><.>>><.>>><.>><.>>><.>><.>>><.>><.>>><

pm <- plotModels.ROC(rcv$testPredictions)

pm <- plotModels.ROC(rcv$medianTest)


pROC::roc(rcv$medianTest[,1],rcv$medianTest[,2],plot=TRUE)

Call: roc.default(response = rcv$medianTest[, 1], predictor = rcv$medianTest[, 2], plot = TRUE)

Data: rcv$medianTest[, 2] in 92 controls (rcv$medianTest[, 1] 0) < 54 cases (rcv$medianTest[, 1] 1). Area under the curve: 0.7313


pander::pander(rcv$jaccard)

Jaccard.SM: 0.6677
averageLength: 5.9

pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0.36	0.55	0.66	0.6645	0.7425	1.33


bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LOGIT",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy
# of cells in G2 phase	-0.015	0.976	0.985	0.993	0.617
Tumor grade	1.163	1.63	3.201	6.285	0.664
Gleason grade = 5	-1.379	0.085	0.252	0.746	0.589
Diploid DNA pattern	-1.118	0.155	0.327	0.689	0.651
Gleason grade = 9 \| 10	0.903	1.09	2.468	5.585	0.664
Gleason grade = 8	0.02	0.782	1.02	1.331	0.651
Gleason grade = 7	-0.03	0.826	0.971	1.141	0.613
Age at diagnosis	-0.002	0.996	0.998	1	0.545
Tetraploid DNA pattern	0.287	0.584	1.332	3.036	0.63
Not aneuploid DNA pattern	-0.259	0.577	0.772	1.031	0.651

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
# of cells in G2 phase	0.72	0.723	0.589	0.717
Tumor grade	0.711	0.717	0.699	0.717
Gleason grade = 5	0.721	0.717	0.662	0.712
Diploid DNA pattern	0.702	0.718	0.673	0.709
Gleason grade = 9 \| 10	0.714	0.718	0.55	0.715
Gleason grade = 8	0.718	0.718	0.562	0.718
Gleason grade = 7	0.718	0.716	0.556	0.719
Age at diagnosis	0.717	0.712	0.538	0.718
Tetraploid DNA pattern	0.718	0.717	0.63	0.717
Not aneuploid DNA pattern	0.718	0.718	0.543	0.717

Table continues below
	full.AUC	IDI	NRI	z.IDI	z.NRI
# of cells in G2 phase	0.72	0.037	0.429	2.634	3.032
Tumor grade	0.716	0.038	0.254	2.501	1.936
Gleason grade = 5	0.717	0.021	0.359	1.968	2.78
Diploid DNA pattern	0.717	0.014	0.124	1.081	0.933
Gleason grade = 9 \| 10	0.717	0.006	-0.323	0.948	-2.358
Gleason grade = 8	0.718	0.002	0.006	0.514	0.064
Gleason grade = 7	0.718	0.002	0.13	0.468	0.902
Age at diagnosis	0.713	0.002	0.016	0.405	0.11
Tetraploid DNA pattern	0.717	0.001	0.011	0.255	0.111
Not aneuploid DNA pattern	0.717	0	0.053	-0.079	0.361

	Frequency
# of cells in G2 phase	0.12
Tumor grade	1
Gleason grade = 5	0.97
Diploid DNA pattern	0.98
Gleason grade = 9 \| 10	0.74
Gleason grade = 8	0.31
Gleason grade = 7	0.24
Age at diagnosis	0.08
Tetraploid DNA pattern	0.88
Not aneuploid DNA pattern	0.55

gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Continous Outcome

Model Building

theOutcome = "g2"

BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

pm <- plot(BSWiMSMODEL$BSWiMS.model$bootCV)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	mean	upper	u.MSE
Tetraploid DNA pattern	4.24	4.24	4.24	4.24	36.55
Diploid DNA pattern	-6.551	-6.551	-6.551	-6.551	45.29
Not aneuploid DNA pattern	7.105	7.105	7.105	7.105	63.16

Table continues below
	r.MSE	model.MSE	NeRI
Tetraploid DNA pattern	65.94	36.55	0.329
Diploid DNA pattern	63.16	36.49	0.301
Not aneuploid DNA pattern	45.29	36.49	-0.075

Table continues below
	t.pvalue	Sign.pvalue	Wilcox.pvalue
Tetraploid DNA pattern	0	0	0
Diploid DNA pattern	0	0	0
Not aneuploid DNA pattern	0.033	0.5	1

	Frequency
Tetraploid DNA pattern	1
Diploid DNA pattern	1
Not aneuploid DNA pattern	1

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><.>><

pm <- plot(rcv$testPredictions[,c(1,3)])

pm <- plot(rcv$medianTest)


pander::pander(rcv$jaccard)

Jaccard.SM: 0.9119
averageLength: 3.22

pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0.24	0.34	0.39	0.3802	0.42	0.49


bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LM",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	mean	upper	u.MSE
Tumor grade	1.813	1.813	1.813	1.813	62.45
Tetraploid DNA pattern	7.459	7.459	7.459	7.459	36.55
Diploid DNA pattern	-2.761	-2.762	-2.761	-2.76	45.29
Not aneuploid DNA pattern	4.697	4.697	4.697	4.698	63.16

Table continues below
	r.MSE	model.MSE	NeRI	F.pvalue
Tumor grade	36.49	35.45	0.096	0.02
Tetraploid DNA pattern	36.36	36.36	-0.122	0.5
Diploid DNA pattern	36.36	36.36	0.108	0.5
Not aneuploid DNA pattern	36.36	36.36	-0.131	0.5

Table continues below
	t.pvalue	Sign.pvalue	Wilcox.pvalue
Tumor grade	0.002	0.141	0.01
Tetraploid DNA pattern	0.812	0.5	1
Diploid DNA pattern	0.006	0.076	0.017
Not aneuploid DNA pattern	0.545	0.5	1

	Frequency
Tumor grade	0.13
Tetraploid DNA pattern	1
Diploid DNA pattern	1
Not aneuploid DNA pattern	1

gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Ordinal Outcome

Model Building

theOutcome = "grade"
theData$grade[theData$grade < 2] <- 2

BSWiMSMODEL <- BSWiMS.model(formula = paste(theOutcome," ~ 1"),data = theData,NumberofRepeats = bswimsReps)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

pander::pander(sm[[1]]$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
gleason5	-2.011	0.116	0.134	0.154	0.825	0.615
gleason7	2.706	0.574	14.97	390.3	0.664	0.641
gleason8	12.03	122502	167706	229590	0.566	0.723
gleason4	-7.323	0	0.001	0.001	0.615	0.825
gleason910	8.358	3624	4266	5021	0.462	0.804
pgstat	0.333	1.258	1.395	1.546	0.657	0.804

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI
gleason5	0.867	0.797	0.549	0.846	0.468	1.357
gleason7	0.833	0.703	0.681	0.846	0.329	1.189
gleason8	0.833	0.622	0.748	0.846	0.231	0.929
gleason4	0.867	0.549	0.797	0.846	0.106	1.379
gleason910	0.839	0.53	0.825	0.856	0.073	0.616
pgstat	0.82	0.682	0.825	0.828	0.063	0.711

	z.IDI	z.NRI	Frequency
gleason5	16.99	18.53	0.5
gleason7	12.63	16.56	0.5
gleason8	9.895	12.54	0.5
gleason4	6.188	18.89	0.5
gleason910	5.055	11.64	0.333
pgstat	4.543	7.413	0.167


pander::pander(sm[[2]]$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy	r.Accuracy
gleason910	2.179	7.721	8.838	10.12	0.957	0.052
pgstat	10.03	22688	22688	22688	0.623	0.052

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI
gleason910	0.957	0.82	0.5	0.82	0.45	1.276
pgstat	0.623	0.801	0.5	0.801	0.431	1.205

	z.IDI	z.NRI	Frequency
gleason910	18.92	18.96	0.5
pgstat	18.19	18.23	0.5

Cross Validation Performance

rcv <- randomCV(theData, theOutcome,BSWiMS.model,trainFraction = fraction, repetitions = reps)

.><.><.><.><.>>><.><.><.><.><.><.><.><.>>><.><.>><.><.>><.><.>>><.><.><.>>><.>><.><.>><.>><.><.><.>><

pm <- boxplot(rcv$testPredictions[,3]~rcv$testPredictions[,1])

pm <- boxplot(rcv$medianTest[,2]~rcv$medianTest[,1])


pander::pander(rcv$jaccard)

Jaccard.SM: 0.7368
averageLength: 5.87

pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
1.17	1.547	1.78	1.852	2.122	3.59


bm <- baggedModel(rcv$selectedFeaturesSet,theData,"LM",Outcome = theOutcome,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	mean	upper	u.MSE	r.MSE
gleason5	-0.801	-0.94	-0.801	-0.662	0.192	0.196
gleason4	-0.784	-0.861	-0.784	-0.708	0.3	0.138
gleason910	0.599	0.481	0.599	0.717	0.272	0.131
gleason6	-0.273	-0.345	-0.273	-0.201	0.317	0.123
pgstat	0.156	0.131	0.156	0.181	0.256	0.12
gleason8	0.175	0.075	0.175	0.275	0.28	0.119
g2	0	0	0	0	0.303	0.11
age	0	0	0	0	0.315	0.108

Table continues below
	model.MSE	NeRI	F.pvalue	t.pvalue	Sign.pvalue
gleason5	0.114	0.547	0	0	0
gleason4	0.109	0.157	0	0	0.007
gleason910	0.114	-0.095	0	0.081	0.424
gleason6	0.111	0.155	0	0	0.011
pgstat	0.115	0.199	0.004	0.05	0.003
gleason8	0.114	-0.206	0.007	0.531	0.487
g2	0.109	0.026	0.315	0.539	0.356
age	0.108	-0.021	0.331	0.661	0.52

	Wilcox.pvalue	Frequency
gleason5	0	1
gleason4	0	0.82
gleason910	0.557	0.95
gleason6	0.008	0.85
pgstat	0.049	0.93
gleason8	0.877	0.85
g2	0.456	0.17
age	0.658	0.13

gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

BSWiMS on a Survival Outcome

Model Building


BSWiMSMODEL <- BSWiMS.model(formula = Surv(pgtime, pgstat) ~ 1,data = dataComplete,NumberofRepeats = bswimsReps)

gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork,trace = "none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Summary Table

sm <- summary(BSWiMSMODEL)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy
Tumor grade	0.729	1.577	2.074	2.727	0.664
Gleason grade = 5	-0.597	0.335	0.551	0.904	0.575
Tetraploid DNA pattern	0.252	1.079	1.287	1.534	0.634
Diploid DNA pattern	-0.263	0.597	0.768	0.989	0.657
Not aneuploid DNA pattern	-0.459	0.522	0.632	0.765	0.657

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
Tumor grade	0.606	0.672	0.701	0.65
Gleason grade = 5	0.557	0.617	0.652	0.616
Tetraploid DNA pattern	0.632	0.64	0.638	0.57
Diploid DNA pattern	0.576	0.668	0.673	0.642
Not aneuploid DNA pattern	0.627	0.655	0.535	0.64

Table continues below
	full.AUC	IDI	NRI	z.IDI	z.NRI
Tumor grade	0.703	0.114	0.799	4.618	5.988
Gleason grade = 5	0.671	0.092	0.56	4.247	4.761
Tetraploid DNA pattern	0.669	0.055	0.659	3.958	4.612
Diploid DNA pattern	0.693	0.042	0.7	3.561	4.951
Not aneuploid DNA pattern	0.674	0.041	0.689	3.234	4.848

	Frequency
Tumor grade	1
Gleason grade = 5	1
Tetraploid DNA pattern	1
Diploid DNA pattern	1
Not aneuploid DNA pattern	0.8

Cross Validation Performance

rcv <- randomCV(dataComplete, formula(Surv(pgtime, pgstat) ~ .),BSWiMS.model,trainFraction = fraction, repetitions = reps)

.>>><.>>>><.>>>><.>>><.>>><.>>><.>>><.>>><.>><.>><.><.>>><.>>><.>>><.>>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>><.>>><.>>><.>><.><.>>><.>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>>><.>><.>>><.>>><.>>>><.>><.>>>><.>>>><.>><.>><.>><.>><.>>><.>>>><.>><.>><.>><.>>>><.>>><.>><.>>><.>>><.>>><.>>>><.>>>><.>><.>>><.>>><.>>><.>><.>>><.>>><.>>>><.>>><.>>><.>>><.>><.>>><.>>><.>>><.>><.>>><.>>><.>>><.>><.>>>><.>><.>>><.>>><.>>>><.>>>><.>>><.>>>><.>>><.>>><.>><.>>><.>>>><.>><.>>><.>>><.>>><.>><.>><.>><.>><


pm <- plotModels.ROC(rcv$testPredictions)

pm <- plotModels.ROC(rcv$medianTest)


pander::pander(rcv$jaccard)

Jaccard.SM: 0.6734
averageLength: 5.84

pander::pander(summary(rcv$theTimes[names(rcv$theTimes) == "elapsed"]))

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0.23	0.5	0.58	0.5789	0.6425	0.94


bm <- baggedModel(rcv$selectedFeaturesSet,dataComplete,"COX",Outcome = "pgstat",timeOutcome = "pgtime" ,frequencyThreshold = 0.05)

……….

sm <- summary(bm$bagged.model)

rownames(sm$coefficients) <- as.character(cancerVarNames[rownames(sm$coefficients),2])
pander::pander(sm$coefficients,caption = "Prostate",round = 3)

Prostate (continued below)
	Estimate	lower	OR	upper	u.Accuracy
Tumor grade	1.226	1.918	3.407	6.051	0.664
# of cells in G2 phase	-0.009	0.988	0.991	0.995	0.546
Gleason grade = 5	-0.794	0.162	0.452	1.265	0.575
Diploid DNA pattern	-1.012	0.156	0.364	0.85	0.657
Gleason grade = 8	0	0.82	1	1.22	0.649
Gleason grade = 7	0.013	0.881	1.013	1.165	0.617
Tetraploid DNA pattern	-0.527	0.266	0.591	1.311	0.634
Gleason grade = 9 \| 10	0.22	0.887	1.246	1.752	0.662
Not aneuploid DNA pattern	-0.105	0.536	0.9	1.512	0.655

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC
Tumor grade	0.658	0.676	0.701	0.686
# of cells in G2 phase	0.673	0.694	0.527	0.704
Gleason grade = 5	0.677	0.676	0.652	0.704
Diploid DNA pattern	0.674	0.676	0.673	0.707
Gleason grade = 8	0.678	0.682	0.555	0.708
Gleason grade = 7	0.679	0.681	0.56	0.708
Tetraploid DNA pattern	0.676	0.676	0.638	0.707
Gleason grade = 9 \| 10	0.678	0.68	0.544	0.707
Not aneuploid DNA pattern	0.676	0.676	0.535	0.707

Table continues below
	full.AUC	IDI	NRI	z.IDI	z.NRI
Tumor grade	0.707	0.05	0.252	2.812	1.89
# of cells in G2 phase	0.716	0.027	0.406	2.642	2.746
Gleason grade = 5	0.708	0.019	0.362	2.19	2.777
Diploid DNA pattern	0.707	0.006	0.128	0.579	0.848
Gleason grade = 8	0.709	0.001	0.116	0.476	0.811
Gleason grade = 7	0.709	0.002	0.061	0.436	0.405
Tetraploid DNA pattern	0.708	0.001	-0.022	0.267	-0.142
Gleason grade = 9 \| 10	0.708	0	-0.322	-0.012	-2.235
Not aneuploid DNA pattern	0.707	0	-0.02	-0.068	-0.097

	Frequency
Tumor grade	1
# of cells in G2 phase	0.12
Gleason grade = 5	0.98
Diploid DNA pattern	0.97
Gleason grade = 8	0.37
Gleason grade = 7	0.29
Tetraploid DNA pattern	0.89
Gleason grade = 9 \| 10	0.43
Not aneuploid DNA pattern	0.74

gplots::heatmap.2(bm$formulaNetwork,trace = "none",mar = c(5,5),main = "Formula Network",cexRow = 0.5,cexCol = 0.75,srtCol = 45)

Benchmark

theOutcome <- "pgstat";

cp <- BinaryBenchmark(theData,theOutcome,reps,fraction)



hm <- heatMaps(Outcome = "Outcome",data = cp$testPredictions,title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)

Results

Classifier Results


#The Times
pander::pander(cp$cpuElapsedTimes)

BSWiMS	RF	RPART	LASSO	SVM	KNN	ENS
0.5833	0.1164	0.0062	0.1917	0.0092	0.0063	0.9131

learningTime <- -1*cp$cpuElapsedTimes

bp <- barPlotCiError(as.matrix(cp$errorciTable),metricname = "Balanced Error",thesets = cp$thesets,themethod = cp$theMethod,main = "Balanced Error",offsets = c(0.5,0.05),scoreDirection = "<",args.legend = list(x = "topright"),col = terrain.colors(length(cp$theMethod)))


pander::pander(bp$ciTable$mean,caption = "Balanced Error Rate",round = 3)

BSWiMS	RF	RPART	LASSO	SVM	KNN	ENS
0.3188	0.3351	0.3291	0.3118	0.377	0.3035	0.3281

testBalancedError <- -bp$ciTable$mean;
testBalancedErrormin <- min(-bp$ciTable$top95)
testBalancedErrormax <- max(-bp$ciTable$low95)

bp <- barPlotCiError(as.matrix(cp$accciTable),metricname = "Accuracy",thesets = cp$thesets,themethod = cp$theMethod,main = "Accuracy",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"),col = terrain.colors(length(cp$theMethod)))

pander::pander(bp$ciTable$mean,caption = "Accuracy",round = 3)

LASSO	BSWiMS	RF	RPART	KNN	ENS	SVM
0.6986	0.6849	0.6644	0.6575	0.6849	0.6781	0.6164

testAccuracy <- bp$ciTable$mean;
testAccuracymin <- min(bp$ciTable$low95)
testAccuracymax <- max(bp$ciTable$top95)

bp <- barPlotCiError(as.matrix(cp$aucTable),metricname = "AUC",thesets = cp$thesets,themethod = cp$theMethod,main = "ROC AUC",offsets = c(0.5,0.05),args.legend = list(x = "bottomright"),col = terrain.colors(length(cp$theMethod)))


pander::pander(bp$ciTable$mean,caption = "ROC AUC",round = 3)

BSWiMS	RF	RPART	LASSO	SVM	KNN	ENS
0.7303	0.725	0.7014	0.7407	0.6904	0.7126	0.7075

testAUC <- bp$ciTable$mean;
testAUCmin <- min(bp$ciTable$low95)
testAUCmax <- max(bp$ciTable$top95)

Filter Results

pander::pander(cp$featsize)

Table continues below
BSWiMS	LASSO	RPART	RF.ref	IDI	NRI	t-test	Wilcoxon	Spearman
6.16	6.03	12.63	6.16	9.05	9.15	7.44	6.49	6.27

Kendall	mRMR
6.35	10.44


par(mfrow = c(1,2))
barplot(cp$jaccard[order(-cp$jaccard)],las = 2,cex.axis = 1,cex.names = 0.7,main = "Jaccard Index",ylab = "Jaccard")
selectJaccard <- cp$jaccard[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")]
unsize <- cp$featsize + 1
barplot(unsize[order(unsize)],las = 2,cex.axis = 1,cex.names = 0.7,log = "y",main = "Number of Features",ylab = "# of Features+1")

selectFilFeatsize <- -cp$featsize[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")]
par(mfrow = c(1,1))


bp <- barPlotCiError(as.matrix(cp$errorciTable_filter),metricname = "Balanced Error",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "Balanced Error",scoreDirection = "<",args.legend = list(x = "topleft"),col = terrain.colors(length(cp$theClassMethod)))


pander::pander(bp$ciTable$mean,caption = "Balanced Error Rate",round = 3)

Balanced Error Rate (continued below)
	BSWiMS	RF.ref	IDI	NRI	t-test	Wilcoxon
NC Spearman	0.3	0.3	0.31	0.323	0.332	0.325
SVM	0.311	0.313	0.324	0.331	0.333	0.31
NC RSS	0.308	0.336	0.324	0.337	0.337	0.347
KNN	0.301	0.318	0.327	0.301	0.301	0.345
Naive Bayes	0.5	0.5	0.5	0.5	0.5	0.5

	Spearman	Kendall	RPART	mRMR	LASSO
NC Spearman	0.302	0.317	0.3	0.287	0.319
SVM	0.326	0.32	0.355	0.358	0.335
NC RSS	0.351	0.371	0.327	0.377	0.341
KNN	0.305	0.312	0.345	0.356	0.391
Naive Bayes	0.5	0.5	0.504	0.509	0.5

testFilBalancedError <- -apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilBalancedErrormax <- -min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilBalancedErrormin <- -max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

bp <- barPlotCiError(as.matrix(cp$accciTable_filter),metricname = "Accuracy",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "Accuracy",offsets = c(0.5,0.05),args.legend = list(x = "bottomleft"),col = terrain.colors(length(cp$theClassMethod)))


pander::pander(bp$ciTable$mean,caption = "Accuracy",round = 3)

Accuracy (continued below)
	BSWiMS	RF.ref	Wilcoxon	Spearman	IDI	NRI
NC Spearman	0.699	0.699	0.658	0.692	0.692	0.685
SVM	0.685	0.692	0.692	0.671	0.664	0.664
NC RSS	0.699	0.692	0.678	0.678	0.664	0.658
KNN	0.678	0.658	0.637	0.664	0.651	0.678
Naive Bayes	0.5	0.5	0.5	0.5	0.5	0.5

	t-test	Kendall	RPART	LASSO	mRMR
NC Spearman	0.678	0.678	0.699	0.685	0.705
SVM	0.658	0.678	0.63	0.664	0.63
NC RSS	0.658	0.658	0.651	0.671	0.616
KNN	0.678	0.664	0.623	0.589	0.61
Naive Bayes	0.5	0.5	0.37	0.5	0.363

testFilAccuracy <- apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilAccuracymin <- min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilAccuracymax <- max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

bp <- barPlotCiError(as.matrix(cp$aucTable_filter),metricname = "AUC",thesets = cp$theFiltersets,themethod = cp$theClassMethod,main = "ROC AUC",offsets = c(0.5,0.05),args.legend = list(x = "bottomleft"),col = terrain.colors(length(cp$theClassMethod)))

pander::pander(bp$ciTable$mean,caption = "ROC AUC",round = 3)

ROC AUC (continued below)
	BSWiMS	LASSO	RF.ref	IDI	t-test	Wilcoxon
KNN	0.718	0.704	0.736	0.712	0.708	0.744
NC RSS	0.702	0.695	0.688	0.699	0.707	0.686
NC Spearman	0.738	0.698	0.739	0.715	0.699	0.723
SVM	0.734	0.721	0.736	0.711	0.71	0.741
Naive Bayes	0.5	0.5	0.5	0.5	0.5	0.5

	Spearman	Kendall	RPART	NRI	mRMR
KNN	0.716	0.738	0.673	0.703	0.671
NC RSS	0.678	0.676	0.672	0.708	0.682
NC Spearman	0.712	0.738	0.707	0.726	0.731
SVM	0.742	0.74	0.675	0.708	0.691
Naive Bayes	0.5	0.5	0.505	0.454	0.408

testFilAUC <- apply(bp$ciTable$mean,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")];
testFilAUCmin <- min(apply(bp$ciTable$low95,2,min)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])
testFilAUCmax <- max(apply(bp$ciTable$top95,2,max)[c("BSWiMS","LASSO","RF.ref","IDI","Kendall","mRMR")])

Radar Plots

op <- par(no.readonly = TRUE)

library(fmsb)
par(mfrow = c(1,2),xpd = TRUE,pty = "s",mar = c(1,1,1,1))

classRanks <- c(testBalancedErrormax,testAccuracymax,testAUCmax,max(learningTime))
classRanks <- rbind(classRanks,c(testBalancedErrormin,0,0,min(learningTime)))

classRanks <- as.data.frame(rbind(classRanks,cbind(testBalancedError[names(learningTime)],testAccuracy[names(learningTime)],testAUC[names(learningTime)],learningTime)))
colnames(classRanks) <- c("B.Error","Accuracy","ROC AUC","CPU Time")

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0), rgb(1.0,0.0,1.0,1.0), rgb(0.0,0.0,0.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1) , rgb(1.0,0.0,1.0,0.1), rgb(0.0,0.0,0.0,0.1) )
radarchart(classRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = 4,plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5 ,title = "Prediction Model")

legend("topleft",legend = rownames(classRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


filterRanks <- c(max(selectJaccard),max(selectFilFeatsize),testFilBalancedErrormax, testAccuracymax,testAUCmax);

filterRanks <- rbind(filterRanks,c(0,min(selectFilFeatsize),testFilBalancedErrormin, 0,0));

filterRanks <- as.data.frame(rbind(filterRanks,cbind(selectJaccard,selectFilFeatsize,testFilBalancedError, testFilAccuracy,testFilAUC)));
colnames(filterRanks) <- c("Jaccard","Size","B.Error","Accuracy","ROC.ACU")

colors_border = c( rgb(1.0,0.0,0.0,1.0), rgb(0.0,1.0,0.0,1.0) , rgb(0.0,0.0,1.0,1.0), rgb(0.2,0.2,0.0,1.0), rgb(0.0,1.0,1.0,1.0),rgb(1.0,0.0,1.0,1.0) )
colors_in = c( rgb(1.0,0.0,0.0,0.1), rgb(0.0,1.0,0.0,0.1) , rgb(0.0,0.0,1.0,0.1),rgb(1.0,1.0,0.0,0.1), rgb(0.0,1.0,1.0,0.1), rgb(1.0,0.0,1.0,0.1)  )
radarchart(filterRanks,axistype = 0,maxmin = T,pcol = colors_border,pfcol = colors_in,plwd = 4,plty = 1, cglcol = "grey", cglty = 1,axislabcol = "black",cglwd = 0.8, vlcex  = 0.5,title = "Filter Method" )


legend("topleft",legend = rownames(filterRanks[-c(1,2),]),bty = "n",pch = 20,col = colors_in,text.col = colors_border,cex = 0.5,pt.cex = 2)


par(mfrow = c(1,1))

detach("package:fmsb", unload=TRUE)

par(op)

Feature Analysis



rm <- rowMeans(cp$featureSelectionFrequency)
selFrequency <- cp$featureSelectionFrequency[rm > 0.1,]
gplots::heatmap.2(selFrequency,trace = "none",mar = c(10,10),main = "Features",cexRow = 0.5)



topFeat <- min(ncol(BSWiMSMODEL$bagging$formulaNetwork),30);
gplots::heatmap.2(BSWiMSMODEL$bagging$formulaNetwork[1:topFeat,1:topFeat],trace="none",mar = c(10,10),main = "B:SWiMS Formula Network")

pander::pander(summary(BSWiMSMODEL$bagging$bagged.model,caption="Colon",round = 3))

coefficients:

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
grade	0.7293	1.577	2.074	2.727	0.6642
gleason5	-0.5967	0.3353	0.5507	0.9044	0.5746
tetraploid	0.2521	1.079	1.287	1.534	0.6343
diploid	-0.2634	0.5971	0.7684	0.9888	0.6567
notAneuploid	-0.4586	0.5222	0.6321	0.7652	0.6567

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
grade	0.6057	0.6717	0.7007	0.6502	0.7031
gleason5	0.5572	0.6167	0.6517	0.6158	0.6714
tetraploid	0.6321	0.6403	0.6383	0.57	0.6693
diploid	0.5756	0.6677	0.6732	0.6423	0.6926
notAneuploid	0.6269	0.6553	0.5349	0.64	0.6736

	IDI	NRI	z.IDI	z.NRI	Frequency
grade	0.1144	0.7987	4.618	5.988	1
gleason5	0.09202	0.56	4.247	4.761	1
tetraploid	0.05462	0.6588	3.958	4.612	1
diploid	0.04217	0.6996	3.561	4.951	1
notAneuploid	0.04113	0.6889	3.234	4.848	0.8

Accuracy: 0.01493
tAUC: NA
bootstrap:



hm <- heatMaps(Outcome = theOutcome,data = theData[,c(theOutcome,rownames(selFrequency))],title = "Heat Map",Scale = TRUE,hCluster = "col",cexRow = 0.25,cexCol = 0.75,srtCol = 45)

[1] 2

vlist <- rownames(selFrequency)
vlist <- cbind(vlist,vlist)
univ <- univariateRankVariables(variableList = vlist,formula = paste(theOutcome,"~1"),Outcome = theOutcome,data = theData,type = "LM",rankingTest = "Ztest",uniType = "Regression")[,c("cohortMean","cohortStd","kendall.r","kendall.p")] 


cnames <- colnames(univ);
univ <- cbind(univ,rm[rownames(univ)])
colnames(univ) <- c(cnames,"Frequency")
univ <- univ[order(-univ[,5]),]
pander::pander(univ,caption = "Features",round = 4)

Features
	cohortMean	cohortStd	kendall.r	kendall.p	Frequency
grade	61	79	0.4187	0	1
gleason5	108	38	-0.3574	0	0.9927
diploid	79	67	-0.3354	1e-04	0.9882
tetraploid	78	68	0.2517	0.0024	0.8364
g2	14.04	8.149	0.1644	0.0158	0.6573
gleason910	139	7	0.2265	0.0064	0.6336
gleason8	125	21	0.1711	0.0393	0.5818
notAneuploid	11	135	-0.1576	0.0578	0.5373
gleason7	108	38	0.1276	0.1245	0.4627
age	63	5.822	-0.0812	0.2431	0.4082
gleason4	140	6	-0.0157	0.8504	0.2664
eet	36	110	-0.0225	0.786	0.2364
gleason6	112	34	0.0143	0.8637	0.2327

stage C prostate

José Tamez-Peña

July 10, 2018

FRESA.CAD Benchmark

Stage C Data Set

BSWiMS on a Binary Outcome

Model Building

Summary Table

Cross Validation Performance

BSWiMS on a Continous Outcome

Model Building

Summary Table

Cross Validation Performance

BSWiMS on a Ordinal Outcome

Model Building

Summary Table

Cross Validation Performance

BSWiMS on a Survival Outcome

Model Building

Summary Table

Cross Validation Performance

Benchmark

Results

Classifier Results

Filter Results

Radar Plots

Feature Analysis