Breast Cancer: Wisconsin

1.0.1 Libraries

library(survival)
library(FRESA.CAD)

## Loading required package: Rcpp

## Loading required package: stringr

## Loading required package: miscTools

## Loading required package: Hmisc

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: pROC

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
layout(matrix(1:1, nrow=1))

1.0.2 Wisconsin Data Set

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)

## 
##   N   R 
## 151  47

rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)

## 
##   0   1 
## 148  46

1.1 Exploring Raw Features with RRPlot

convar <- colnames(dataBreast)[lapply(apply(dataBreast,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataBreast[,c("status",convar)],"status")
pander::pander(topvar)

V35	V24	V34	V7	V16	V14	V17
0.0261	0.0261	0.0261	0.0623	0.126	0.126	0.126

topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
topf <- topFive[1]
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataBreast$status,dataBreast[,topf]),
                              atRate=c(0.90,0.80),
                  timetoEvent=dataBreast$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

1.2 Reporting the Metrics

pander::pander(RRanalysis[[1]]$keyPoints,caption=topFive[1])

V35
	Thr	RR	RR_LCI	RR_UCI	SEN	SPE	BACC
@:0.9	1.00e+01	1.33	0.678	2.63	0.152	0.89189	0.522
@:0.8	3.00e+00	2.32	1.423	3.77	0.478	0.77703	0.628
@MAX_BACC	1.00e+00	2.66	1.469	4.83	0.739	0.55405	0.647
@MAX_RR	-3.85e-09	3.11	0.804	11.99	0.957	0.14865	0.553
@SPE100	-9.97e-09	1.00	0.000	0.00	1.000	0.00676	0.503

pander::pander(RRanalysis[[2]]$keyPoints,caption=topFive[2])

V24
	Thr	RR	RR_LCI	RR_UCI	SEN	SPE	BACC
@:0.9	25.4	1.94	1.131	3.34	0.239	0.8919	0.566
@:0.8	23.9	1.67	1.001	2.78	0.348	0.7905	0.569
@MAX_BACC	20.3	2.45	1.353	4.44	0.739	0.5270	0.633
@MAX_RR	16.6	3.87	0.991	15.08	0.957	0.1824	0.569
@SPE100	15.5	30.33	0.063	14608.59	1.000	0.0811	0.541

RRanalysis[[2]]$keyPoints["@MAX_BACC",c("BACC","RR")]

           BACC       RR

@MAX_BACC 0.6330787 2.451923

ROCAUC <- NULL
CstatCI <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
MAXBACC <- NULL
RREst <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
  MAXBACC <- rbind(MAXBACC,RRanalysis[[topf]]$keyPoints["@MAX_BACC",c("BACC")])
  RREst <- rbind(RREst,RRanalysis[[topf]]$keyPoints[1,c("RR")])
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
rownames(MAXBACC) <- topFive
rownames(RREst) <- topFive

pander::pander(ROCAUC)

	est	lower	upper
V35	0.660	0.570	0.749
V24	0.633	0.542	0.724
V34	0.661	0.575	0.748
V7	0.610	0.515	0.705
V16	0.598	0.504	0.692

pander::pander(CstatCI)

	mean.C Index	median	lower	upper
V35	0.643	0.642	0.556	0.722
V24	0.677	0.678	0.593	0.756
V34	0.664	0.666	0.594	0.734
V7	0.667	0.667	0.589	0.745
V16	0.614	0.615	0.527	0.701

pander::pander(LogRangp)

V35	0.00104
V24	0.00938
V34	0.00282
V7	0.07332
V16	0.02135

pander::pander(Sensitivity)

	est	lower	upper
V35	0.152	0.0634	0.289
V24	0.239	0.1259	0.388
V34	0.152	0.0634	0.289
V7	0.152	0.0634	0.289
V16	0.109	0.0362	0.236

pander::pander(Specificity)

	est	lower	upper
V35	0.899	0.838	0.942
V24	0.899	0.838	0.942
V34	0.892	0.830	0.937
V7	0.899	0.838	0.942
V16	0.899	0.838	0.942

pander::pander(MAXBACC)

V35	0.647
V24	0.633
V34	0.638
V7	0.621
V16	0.614

pander::pander(RREst)

V35	1.33
V24	1.94
V34	1.33
V7	1.33
V16	1.00

meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],RREst,Sensitivity[,1],Specificity[,1],MAXBACC)
colnames(meanMatrix) <- c("ROCAUC","C-Stat","RR","Sen","Spe","MAX_BACC")
pander::pander(meanMatrix)

	ROCAUC	C-Stat	RR	Sen	Spe	MAX_BACC
V35	0.660	0.643	1.33	0.152	0.899	0.647
V24	0.633	0.677	1.94	0.239	0.899	0.633
V34	0.661	0.664	1.33	0.152	0.892	0.638
V7	0.610	0.667	1.33	0.152	0.899	0.621
V16	0.598	0.614	1.00	0.109	0.899	0.614

1.3 Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast,NumberofRepeats = 10)

[+++++++++++++++++++++++++++++++++++++++++++++++++++++]…..

sm <- summary(ml)
pander::pander(sm$coefficients)

Table continues below
	Estimate	lower	HR	upper	u.Accuracy	r.Accuracy
V24	5.11e-02	1.02	1.05	1.09	0.598	0.237
V26	4.45e-03	1.00	1.00	1.01	0.593	0.271
V27	2.28e-04	1.00	1.00	1.00	0.608	0.273
V34	1.24e-02	1.00	1.01	1.02	0.634	0.260
V7	5.27e-08	1.00	1.00	1.00	0.588	0.237
V35	1.89e-03	1.00	1.00	1.00	0.727	0.602
V6	9.39e-08	1.00	1.00	1.00	0.577	0.237

Table continues below
	full.Accuracy	u.AUC	r.AUC	full.AUC	IDI	NRI	z.IDI
V24	0.598	0.609	0.500	0.609	0.0619	0.437	2.87
V26	0.594	0.598	0.510	0.600	0.0624	0.394	2.76
V27	0.609	0.608	0.510	0.608	0.0563	0.435	2.76
V34	0.632	0.618	0.506	0.618	0.0315	0.467	2.41
V7	0.588	0.595	0.500	0.595	0.0487	0.380	2.30
V35	0.616	0.641	0.604	0.604	0.0283	0.551	2.26
V6	0.577	0.588	0.500	0.588	0.0459	0.353	2.19

	z.NRI	Delta.AUC	Frequency
V24	2.67	0.109136	1.0
V26	2.39	0.089564	1.0
V27	2.64	0.097302	1.0
V34	2.83	0.111465	1.0
V7	2.30	0.094888	0.8
V35	3.41	-0.000464	0.8
V6	2.13	0.088132	0.1

1.4 Cox Model Performance

Here we evaluate the model using the RRPlot() function.

1.4.1 The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events

index <- predict(ml,dataBreast)
timeinterval <- round(2*mean(subset(dataBreast,status==1)$time),0)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")

Initial Parameters
h0	timeinterval
0.323	51

rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

1.4.2 Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")

Threshold values
	@:0.9	@:0.8	@MAX_BACC	@MAX_RR	@SPE100	p(0.5)
Thr	0.41932	0.361166	0.2522	0.1787	1.61e-01	0.50159
RR	2.18301	1.833456	2.4016	3.5569	2.77e+01	2.49901
RR_LCI	1.30105	1.111407	1.3254	0.9152	5.75e-02	1.40627
RR_UCI	3.66282	3.024598	4.3518	13.8238	1.33e+04	4.44086
SEN	0.26087	0.369565	0.7391	0.9565	1.00e+00	0.15217
SPE	0.89865	0.797297	0.5203	0.1689	7.43e-02	0.95946
BACC	0.57976	0.583431	0.6297	0.5627	5.37e-01	0.55582
NetBenefit	0.00603	0.000217	0.0518	0.0888	1.01e-01	0.00496

pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Test")

O/E Test
O/E	Low	Upper	p.value
0.821	0.601	1.1	0.204

pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")

O/E Mean
mean	50%	2.5%	97.5%
1.01	1.01	0.953	1.07

pander::pander(t(rrAnalysisTrain$OARatio$estimate),caption="O/Acum Test")

O/Acum Test
O/A	Low	Upper	p.value
0.946	0.693	1.26	0.774

pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")

O/Acum Mean
mean	50%	2.5%	97.5%
0.922	0.922	0.914	0.929

pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")

C. Index
mean.C Index	median	lower	upper
0.683	0.684	0.607	0.764

pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")

ROC AUC
est	lower	upper
0.64	0.549	0.732

pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")

Sensitivity
est	lower	upper
0.261	0.143	0.411

pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")

Specificity
est	lower	upper
0.899	0.838	0.942

pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")

Probability Thresholds
90%	80%
0.419	0.36

pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")

Logrank test Chisq = 12.346960 on 2 degrees of freedom, p = 0.002084
	N	Observed	Expected	(O-E)^2/E	(O-E)^2/V
class=0	147	29	36.99	1.725	8.974
class=1	20	5	4.11	0.193	0.216
class=2	27	12	4.90	10.269	11.609