i— title: “BRCA Risk of Metastasis” author: “José Tamez-Peña” date: “Sep 20, 2017” output: word_document: fig_height: 6 fig_width: 8 toc: yes —

BRCA Analysis


library(readr)
library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("R.matlab")
library("gplots")
library("glmnet")

#BRCAdata <- readMat("./BRCA_2002/DMMPLCN_080423.mat")
BRCAdata <- readMat("./DMMPLCN_080423.mat")


a=as.numeric(Sys.time());
set.seed(a);

error.bar <- function(x, y, upper, lower=upper, length=0.05,...){
if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper))
stop("vectors must be same length")
arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)
}

barPlotCiError<-  function(citable,metricname,thesets,themethod,main,...)
{
colnames(citable) <- c(metricname,"lower","upper")
rownames(citable) <- rep(thesets,length(themethod))
pander::pander(citable,caption=main,round = 3)
citable <- citable[order(rep(1:length(thesets),length(themethod))),]
barmatrix <- matrix(citable[,1],length(themethod),length(thesets))
colnames(barmatrix) <- thesets
rownames(barmatrix) <- themethod
pander::pander(barmatrix,caption=main,round = 3)
barp <- barplot(barmatrix,cex.names=0.7,las=2,ylim=c(0.0,1.0),main=main,ylab=metricname,beside=TRUE,legend = themethod,...)
error.bar(barp,citable[,1],citable[,3]-citable[,1],citable[,1]-citable[,2])
return(barp)
}

summaryBRCA <- function(data)
{
  sumBC <- NULL
  sumBC$age <- c(mean(data$Age,na.rm=TRUE),sd(data$Age,na.rm=TRUE))
  sumBC$size <- c(mean(data$size,na.rm=TRUE),sd(data$size,na.rm=TRUE))
  sumBC$grade <- table(data$grade)
  sumBC$ER <- table(data$er)
  sumBC$type <- table(data$typeBRCA)
  sumBC$t.dmfs <- c(mean(data$t.dmfs,na.rm=TRUE),sd(data$t.dmfs,na.rm=TRUE))
  sumBC$e.dmfs <- table(data$e.dmfs)
  sumBC$t.sos <- c(mean(data$t.sos,na.rm=TRUE),sd(data$t.sos,na.rm=TRUE))
  sumBC$e.sos <- table(data$e.sos)
  sumBC$ln <- table(data$ln)
  cat(sprintf("Age: \t %5.1f (%4.1f)\n",sumBC$age[1],sumBC$age[2]))
  cat(sprintf("Size: \t %5.1f (%4.1f)\n",sumBC$size[1],sumBC$size[2]))
  cat("Grade: \t ",sumBC$grade,"\n")
  cat(sprintf("ER: \t  %d (%d) \n",sumBC$ER[1],sumBC$ER[2]))
  cat("Type: \t ",sumBC$type,"\n")
  cat(sprintf("Nodes: \t  %d (%d) \n",sumBC$ln[1],sumBC$ln[2]))
  cat(sprintf("DM Event:\t %d (%d) \n",sumBC$e.dmfs[1],sumBC$e.dmfs[2]))
  cat(sprintf("SOS Event:\t %d (%d) \n",sumBC$e.sos[1],sumBC$e.sos[2]))
  return (sumBC)
}

Data Preparation




BRCAdata2 <- BRCAdata$DMMPLCN
DataExpresion <- as.data.frame(BRCAdata2[3])
subjectsIDs <- unlist(BRCAdata2[4])
genesIDs <- unlist(BRCAdata2[5])
ngenesIDs <- gsub("-","_",genesIDs,fixed = TRUE,perl=FALSE);
ngenesIDs <- gsub("/","_",ngenesIDs,fixed = TRUE,perl=FALSE);
TgenesIDs <- unlist(BRCAdata2[6])
names(TgenesIDs) <- ngenesIDs 

colnames(DataExpresion) <- subjectsIDs
rownames(DataExpresion) <- paste("N",ngenesIDs,sep="_")

#FRESA.CAD works with tranposed data frames
DataExpresionV <- as.data.frame(t(DataExpresion))


otr <- as.data.frame(BRCAdata2[7])
clinical  <- otr$X1.1
subjID <- as.character(unlist(clinical$Simplified.ID))
t.dmfs <- unlist(clinical$t.dmfs)/12
e.dmfs <- unlist(clinical$e.dmfs)
t.sos <- unlist(clinical$t.sos)/12
e.sos <- unlist(clinical$e.sos)
events <- unlist(clinical$Events)
ln <- unlist(clinical$LN)
er <- unlist(clinical$ER.IHC)
lumA <- unlist(clinical$LumA.HU)
lumB <- unlist(clinical$LumB.HU)
HER2 <- unlist(clinical$Her2.HU)
Basal <- unlist(clinical$Basal.HU)
Nrmal <- unlist(clinical$Normal.HU)
typeBRCA <- unlist(clinical$which.max)
size <- unlist(clinical$Size)
sig70 <- unlist(clinical$CIN70.bin)
rsig70 <- unlist(clinical$CIN70)

otr <- as.data.frame(BRCAdata2[8])
clinical2  <- otr$X1.1
ids2 <- as.character(unlist(clinical2$Simplified.ID))
age <- as.vector(clinical2$Age)
names(age) <- ids2
grade <- as.vector(clinical2$Grade)
names(grade) <- ids2
ER2 <- as.vector(clinical2$ER)
names(ER2) <- ids2
PGR <- as.vector(clinical2$PGR)
names(PGR) <- ids2
Node <- as.vector(clinical2$Node)
names(Node) <- ids2

#plot(unlist(clinical$CIN70),unlist(clinical$CIN25))
#table(unlist(clinical$CIN70.bin),unlist(clinical$Events))
#table(unlist(clinical$CIN25.bin),unlist(clinical$Events))
#table(unlist(clinical$CIN25.bin),unlist(clinical$CIN70.bin))

#Small data frame with clinical info:
clicalDF <- data.frame(t.dmfs,e.dmfs,t.sos,e.sos,events,ln,er,typeBRCA,size)
rownames(clicalDF) <- subjID
clicalDF$Age <- age[subjID] 
clicalDF$grade <- grade[subjID]
clicalDF$PGR <- PGR[subjID]



smry <- summaryBRCA(clicalDF) 
#> Age:       56.6 (13.7)
#> Size:      25.3 (13.6)
#> Grade:     158 358 277 
#> ER:    200 (581) 
#> Type:      187 259 179 241 92 
#> Nodes:     527 (249) 
#> DM Event:     374 (135) 
#> SOS Event:    399 (111)

#FRESA.CAD does not like na.
settozero <- is.nan(as.matrix(DataExpresionV)) | is.na(as.matrix(DataExpresionV))
DataExpresionV[settozero] <- 0;


# Events are DM events in less than five years 
Event <- 1*((t.dmfs<5)&(e.dmfs==1))
# Censor events greater than five years
c.t.dmfs <- (t.dmfs>5)&(e.dmfs==1)

#Set the event column
DataExpresionV$Event <- 1*Event

#The Subjects with no event information
included <- !is.na(Event)


#Lets get the subjects with overall LOGITival data
#Mark events that are less than five years
LOGITEvent <- 1*(as.vector((t.sos<5)*e.sos))
LOGITExclude <-  as.vector(is.na(t.sos) | !is.na(Event))
DataExpresionLOGIT <- DataExpresionV
DataExpresionLOGIT$Event <- LOGITEvent
DataExpresionLOGIT <- DataExpresionLOGIT[!LOGITExclude,]
DataExpresionLOGIT$ct.dmfs <- 0;
DataExpresionLOGIT$ct.sos <-  t.sos[!LOGITExclude]
clicalDF$SEvent <- LOGITEvent
clicalDF$DMEvent <- Event
LOGITD <- clicalDF[!LOGITExclude,]
sum(LOGITD$SEvent)
#> [1] 58

DataExpresionV <- DataExpresionV[included,]
LOGITM <- clicalDF[included,]
c.t.dmfsV <- c.t.dmfs[included]
c.t.dmfsV[is.na(c.t.dmfsV)] <- FALSE
ct.dmfsV <- t.dmfs[included]
ct.dmfsV[c.t.dmfsV] <- 5
LOGITM$ct.dmfsV <- ct.dmfsV
DataExpresionV$ct.dmfs <-ct.dmfsV
sum(DataExpresionV$Event)
#> [1] 101

#DataExpresionV$ER <- clicalDF[rownames(DataExpresionV),"er"] 
#DataExpresionV$ER[is.nan(DataExpresionV$ER)]  <- 0
#DataExpresionV$LN <- clicalDF[rownames(DataExpresionV),"ln"]
#DataExpresionV$LN[is.nan(DataExpresionV$LN)]  <- 0

# We have four set of trials. I'll divde the sets
selectedSub <- rownames(DataExpresionV)
set1 <- 1:147
set2 <- 148:227
set3 <- 228:392
set4 <- 393:514
#selectedSub[set1]
#selectedSub[set2]
#selectedSub[set3]
#selectedSub[set4]

# This variables will store the results 
ROCTable <- NULL
epiTable <- NULL
signatures <- NULL
controlDistances <- NULL
caseDistances <- NULL
modSignatures <- NULL
KNNpredict <- NULL

Summary of data sets


smry <- summaryBRCA(clicalDF)

Age: 56.6 (13.7) Size: 25.3 (13.6) Grade: 158 358 277 ER: 200 (581) Type: 187 259 179 241 92 Nodes: 527 (249) DM Event: 374 (135) SOS Event: 399 (111)

pander::pander(smry,caption="Clinical")
  • age: 56.63 and 13.73
  • size: 25.28 and 13.57
  • grade:

    1 2 3
    158 358 277
  • ER:

    0 1
    200 581
  • type:

    1 2 3 4 5
    187 259 179 241 92
  • t.dmfs: 6.905 and 4.562
  • e.dmfs:

    0 1
    374 135
  • t.sos: 7.208 and 3.407
  • e.sos:

    0 1
    399 111
  • ln:

    0 1
    527 249
smry <- summaryBRCA(LOGITM) 

Age: 53.9 (12.8) Size: 26.1 (13.4) Grade: 62 174 158 ER: 157 (351) Type: 109 136 78 144 47 Nodes: 358 (151) DM Event: 374 (135) SOS Event: 94 (28)

pander::pander(smry,caption="All Sets")
  • age: 53.9 and 12.76
  • size: 26.13 and 13.45
  • grade:

    1 2 3
    62 174 158
  • ER:

    0 1
    157 351
  • type:

    1 2 3 4 5
    109 136 78 144 47
  • t.dmfs: 7.245 and 4.438
  • e.dmfs:

    0 1
    374 135
  • t.sos: 6.361 and 3.555
  • e.sos:

    0 1
    94 28
  • ln:

    0 1
    358 151

smry <- summaryBRCA(LOGITM[-set1,]) 

Age: 56.9 (13.3) Size: 27.3 (15.0) Grade: 38 116 93 ER: 106 (255) Type: 72 101 58 103 33 Nodes: 211 (151) DM Event: 275 (87) SOS Event: 94 (28)

pander::pander(smry,caption="Set des Removed")
  • age: 56.94 and 13.25
  • size: 27.29 and 14.97
  • grade:

    1 2 3
    38 116 93
  • ER:

    0 1
    106 255
  • type:

    1 2 3 4 5
    72 101 58 103 33
  • t.dmfs: 6.136 and 3.578
  • e.dmfs:

    0 1
    275 87
  • t.sos: 6.361 and 3.555
  • e.sos:

    0 1
    94 28
  • ln:

    0 1
    211 151
smry <- summaryBRCA(LOGITM[-set2,]) 

Age: 53.6 (12.6) Size: 24.1 (11.4) Grade: 62 174 158 ER: 121 (307) Type: 95 117 66 117 39 Nodes: 330 (99) DM Event: 320 (109) SOS Event: 94 (28)

pander::pander(smry,caption="Set min Removed")
  • age: 53.55 and 12.58
  • size: 24.09 and 11.39
  • grade:

    1 2 3
    62 174 158
  • ER:

    0 1
    121 307
  • type:

    1 2 3 4 5
    95 117 66 117 39
  • t.dmfs: 7.615 and 4.633
  • e.dmfs:

    0 1
    320 109
  • t.sos: 6.361 and 3.555
  • e.sos:

    0 1
    94 28
  • ln:

    0 1
    330 99
smry <- summaryBRCA(LOGITM[-set3,]) 

Age: 51.5 (12.7) Size: 27.5 (13.8) Grade: 38 101 126 ER: 130 (219) Type: 75 91 45 106 32 Nodes: 229 (120) DM Event: 250 (99) SOS Event: 94 (28)

#pander::pander(smry,caption="Set loi Removed")
smry <- summaryBRCA(LOGITM[-set4,]) 

Age: 53.6 (12.0) Size: 26.1 (13.4) Grade: 48 131 97 ER: 114 (272) Type: 85 99 65 106 37 Nodes: 304 (83) DM Event: 277 (110) SOS Event: NA (NA)

#pander::pander(smry,caption="Set chin Removed")

smry <- summaryBRCA(LOGITD) 

Age: 62.5 (14.0) Size: 22.2 (10.5) Grade: 88 176 111 ER: 31 (197) Type: 70 110 86 82 40 Nodes: 147 (76) DM Event: NA (NA) SOS Event: 305 (83)

#pander::pander(smry,caption="SOS Test Set")

roc.signature70 <- plotModels.ROC(cbind(DataExpresionLOGIT$Event,rsig70[!LOGITExclude]),main="70 Signature")

epi.signature70 <- epi.tests(roc.signature70$predictionTable)
pander::pander(epi.signature70$tab,caption="70 Signature")
70 Signature
  Outcome + Outcome - Total
Test + 43 151 194
Test - 15 179 194
Total 58 330 388
pander::pander(summary(epi.signature70),caption="Diangostic Summary 70 Signature")
Diangostic Summary 70 Signature
  est lower upper
aprev 0.5 0.4491 0.5509
tprev 0.1495 0.1155 0.1889
se 0.7414 0.6096 0.8474
sp 0.5424 0.487 0.5971
diag.acc 0.5722 0.5213 0.622
diag.or 3.398 1.816 6.357
nnd 3.524 2.249 10.36
youden 0.2838 0.09655 0.4445
ppv 0.2216 0.1653 0.2867
npv 0.9227 0.8757 0.9561
plr 1.62 1.337 1.963
nlr 0.4768 0.305 0.7454

FRESA.CAD Models


# Model Parameters
Folds <- 1
Repeats <- 1
filter <- 0.01
# Model 1
trainSet <- DataExpresionV[-set1,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
trainSet$ct.dmfs <- NULL


filename = paste("BRCASignatureLOGITT1",Folds,Repeats,sprintf("%5.4f",filter),".RDATA",sep="_")
system.time(BRCASignatureLOGITT1 <- FRESA.Model(formula = Event ~ 1,trainSet, CVfolds=Folds, repeats=Repeats,filter.p.value=filter,print=TRUE))
save(BRCASignatureLOGITT1,file=filename)

#load(filename)

# Model 2
trainSet <- DataExpresionV[-set2,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
trainSet$ct.dmfs <- NULL

filename = paste("BRCASignatureLOGITT2",Folds,Repeats,sprintf("%5.4f",filter),".RDATA",sep="_")
system.time(BRCASignatureLOGITT2 <- FRESA.Model(formula = Event ~ 1,trainSet, CVfolds=Folds, repeats=Repeats,filter.p.value=filter ))
save(BRCASignatureLOGITT2,file=filename)
#load(filename)


# Model 3
trainSet <- DataExpresionV[-set3,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
trainSet$ct.dmfs <- NULL

filename = paste("BRCASignatureLOGITT3",Folds,Repeats,sprintf("%5.4f",filter),".RDATA",sep="_")
system.time(BRCASignatureLOGITT3 <- FRESA.Model(formula = Event ~ 1,trainSet, CVfolds=Folds, repeats=Repeats,filter.p.value=filter))
save(BRCASignatureLOGITT3,file=filename)
#load(filename)

# Model 4
trainSet <- DataExpresionV[-set4,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
trainSet$ct.dmfs <- NULL

filename = paste("BRCASignatureLOGITT4",Folds,Repeats,sprintf("%5.4f",filter),".RDATA",sep="_")
system.time(BRCASignatureLOGITT4 <- FRESA.Model(formula = Event ~ 1,trainSet, CVfolds=Folds, repeats=Repeats,filter.p.value=filter))
save(BRCASignatureLOGITT4,file=filename)
#load(filename)


# Model All
trainSet <- DataExpresionV
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
trainSet$ct.dmfs <- NULL

filename = paste("BRCASignatureLOGITAll",Folds,Repeats,sprintf("%5.4f",filter),".RDATA",sep="_")
system.time(BRCASignatureLOGITALL <- FRESA.Model(formula = Event ~ 1,trainSet, CVfolds=Folds, repeats=Repeats,filter.p.value=filter))
save(BRCASignatureLOGITALL,file=filename)
#load(filename)

FRESA.CAD Models

FRESA.CAD Full Models


trainSet <- DataExpresionV[-set1,]
testSet <- DataExpresionV[set1,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etrainSet <- trainSet
etrainSet$Event <- NULL
etrainSet$ct.dmfs <- NULL
etestSet <- testSet
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL


BAGFORWARD_T1 <-baggedModel(BRCASignatureLOGITT1$BSWiMS.models$formula.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT2$univariateAnalysis,frequencyThreshold=0.0,useFreq=FALSE)
tempPredict <- predict(BAGFORWARD_T1$bagged.model,testSet)
ROCTable$BAGFORWARD_T1 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="Bagging Forward Selection (T1)")$roc.predictor

epiTable$BAGFORWARD_T1 <- epi.tests(table(tempPredict<0,!testSet$Event))


FULLBSWiMS <- predict(BRCASignatureLOGITT1$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_T1 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="B:SWiMS Bagging Model (T1)")

epiTable$FULLBSWiMS_T1 <- epi.tests(ROCTable$FULLBSWiMS_T1$predictionTable)


varlist <- names(BRCASignatureLOGITT1$bagging$frequencyTable[BRCASignatureLOGITT1$bagging$frequencyTable>1])

system.time(modSignatures$BAGGINSig_S1 <- getSignature(data=trainSet,varlist=varlist,Outcome="Event",method="spearman"))
BAGGINGdistance <- -signatureDistance(modSignatures$BAGGINSig_S1$caseTamplate,testSet,"spearman") + signatureDistance(modSignatures$BAGGINSig_S1$controlTemplate,testSet,"spearman") 
ROCTable$BAGGINSig_S1 <-plotModels.ROC(cbind(as.vector(testSet$Event),BAGGINGdistance),main="Bagging spearman (T1)")

epiTable$BAGGINSig_S1 <- epi.tests(ROCTable$BAGGINSig_S1$predictionTable)

enetT1 <- cv.glmnet(as.matrix(etrainSet),as.vector(trainSet$Event),family="binomial");

enetPredict <- predict(enetT1,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_T1 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="CV LASSO Model (T1)")

epiTable$CVLASSO_T1 <- epi.tests(ROCTable$CVLASSO_T1$predictionTable)

cenet <- as.matrix(coef(enetT1,s="lambda.min"))
lassoNamesT1 <- names(cenet[as.vector(cenet[,1] != 0),])[-1]
system.time(modSignatures$LASSOSig_S1 <- getSignature(data=trainSet,varlist=lassoNamesT1,Outcome="Event",method="spearman"))

LASSOdistance <- -signatureDistance(modSignatures$LASSOSig_S1$caseTamplate,testSet,"spearman") + signatureDistance(modSignatures$LASSOSig_S1$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_S1 <- plotModels.ROC(cbind(testSet$Event,as.vector(LASSOdistance)),main="CV LASSO Signature (T1)")  

epiTable$CVLASSOSig_S1 <- epi.tests(ROCTable$CVLASSOSig_S1$predictionTable)


trainSet <- DataExpresionV[-set2,]
testSet <- DataExpresionV[set2,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etrainSet <- trainSet
etrainSet$Event <- NULL
etrainSet$ct.dmfs <- NULL
etestSet <- testSet
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

BAGFORWARD_T2 <-baggedModel(BRCASignatureLOGITT2$BSWiMS.models$forward.selection.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT2$univariateAnalysis,frequencyThreshold=0.1,useFreq=FALSE)
tempPredict <- predict(BAGFORWARD_T2$bagged.model,testSet)
ROCTable$BAGFORWARD_T2 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="Bagging Forward Selection (T2)")

epiTable$BAGFORWARD_T2 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT2$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_T2 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="B:SWiMS Bagging Model (T2)")

epiTable$FULLBSWiMS_T2 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGBSWIMS1 <-baggedModel(BRCASignatureLOGITT2$BSWiMS.models$formula.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT2$univariateAnalysis,frequencyThreshold=0.0)
varlist <- c(all.vars(BAGBSWIMS1$bagged.model$formula)[-1],all.vars(BAGFORWARD_T2$bagged.model$formula)[-1])
varlist <- unique(varlist)
if (length(varlist)>150) varlist <- varlist[1:150]
system.time(modSignatures$BAGGINSig_S2 <- getSignature(data=trainSet,varlist=varlist,Outcome="Event",method="spearman"))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S2$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S2$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_S2 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="Forward Selection Signature (T2)")  

epiTable$BAGGINSig_S2 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))


enetT2 <- cv.glmnet(as.matrix(etrainSet),as.vector(trainSet$Event),family="binomial");

enetPredict <- predict(enetT2,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_T2 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="CV LASSO Model (T2)")

epiTable$CVLASSO_T2 <- epi.tests(table(enetPredict<0,!testSet$Event))

cenet <- as.matrix(coef(enetT2,s="lambda.min"))
lassoNamesT2 <- names(cenet[as.vector(cenet[,1] != 0),])[-1]
system.time(modSignatures$LASSOSig_S2 <- getSignature(data=trainSet,varlist=lassoNamesT2,Outcome="Event",method="spearman"))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S2$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S2$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_S2 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="CV LASSO Signature (T2)")  

epiTable$CVLASSOSig_S2 <- epi.tests(table(LASSOdistance>0,!testSet$Event))


trainSet <- DataExpresionV[-set3,]
testSet <- DataExpresionV[set3,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etrainSet <- trainSet
etrainSet$Event <- NULL
etrainSet$ct.dmfs <- NULL
etestSet <- testSet
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

BAGFORWARD_T3 <-baggedModel(BRCASignatureLOGITT3$BSWiMS.models$forward.selection.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT3$univariateAnalysis,frequencyThreshold=0.1,useFreq=FALSE)
tempPredict <- predict(BAGFORWARD_T3$bagged.model,testSet)
ROCTable$BAGFORWARD_T3 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="Bagging Forward Selection (T3)")

epiTable$BAGFORWARD_T3 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT3$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_T3 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="B:SWiMS Bagging Model (T3)")

epiTable$FULLBSWiMS_T3 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGBSWIMS1 <-baggedModel(BRCASignatureLOGITT3$BSWiMS.models$formula.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT3$univariateAnalysis,frequencyThreshold=0.0)
varlist <- c(all.vars(BAGBSWIMS1$bagged.model$formula)[-1],all.vars(BAGFORWARD_T3$bagged.model$formula)[-1])
varlist <- unique(varlist)
if (length(varlist)>150) varlist <- varlist[1:150]
system.time(modSignatures$BAGGINSig_S3 <- getSignature(data=trainSet,varlist=varlist,Outcome="Event",method="spearman"))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S3$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S3$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_S3 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="Forward Selection Signature (T3)")  

epiTable$BAGGINSig_S3 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))


enetT3 <- cv.glmnet(as.matrix(etrainSet),as.vector(trainSet$Event),family="binomial");

enetPredict <- predict(enetT3,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_T3 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="CV LASSO Model (T3)")

epiTable$CVLASSO_T3 <- epi.tests(table(enetPredict<0,!testSet$Event))

cenet <- as.matrix(coef(enetT3,s="lambda.min"))
lassoNamesT3 <- names(cenet[as.vector(cenet[,1] != 0),])[-1]
system.time(modSignatures$LASSOSig_S3 <- getSignature(data=trainSet,varlist=lassoNamesT3,Outcome="Event",method="spearman"))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S3$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S3$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_S3 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="CV LASSO Signature (T3)")  

epiTable$CVLASSOSig_S3 <- epi.tests(table(LASSOdistance>0,!testSet$Event))



trainSet <- DataExpresionV[-set4,]
testSet <- DataExpresionV[set4,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etrainSet <- trainSet
etrainSet$Event <- NULL
etrainSet$ct.dmfs <- NULL
etestSet <- testSet
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

BAGFORWARD_T4 <-baggedModel(BRCASignatureLOGITT4$BSWiMS.models$forward.selection.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT4$univariateAnalysis,frequencyThreshold=0.1,useFreq=FALSE)
tempPredict <- predict(BAGFORWARD_T4$bagged.model,testSet)
ROCTable$BAGFORWARD_T4 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="Bagging Forward Selection (T4)")

epiTable$BAGFORWARD_T4 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT4$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_T4 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="B:SWiMS Bagging Model (T4)")

epiTable$FULLBSWiMS_T4 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGBSWIMS1 <-baggedModel(BRCASignatureLOGITT4$BSWiMS.models$formula.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITT4$univariateAnalysis,frequencyThreshold=0.0)
varlist <- c(all.vars(BAGBSWIMS1$bagged.model$formula)[-1],all.vars(BAGFORWARD_T4$bagged.model$formula)[-1])
varlist <- unique(varlist)
if (length(varlist)>150) varlist <- varlist[1:150]
system.time(modSignatures$BAGGINSig_S4 <- getSignature(data=trainSet,varlist=varlist,Outcome="Event",method="spearman"))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S4$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S4$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_S4 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="Forward Selection Signature (T4)")  

epiTable$BAGGINSig_S4 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))


enetT4 <- cv.glmnet(as.matrix(etrainSet),as.vector(trainSet$Event),family="binomial");

enetPredict <- predict(enetT4,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_T4 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="CV LASSO Model (T4)")

epiTable$CVLASSO_T4 <- epi.tests(table(enetPredict<0,!testSet$Event))

cenet <- as.matrix(coef(enetT4,s="lambda.min"))
lassoNamesT4 <- names(cenet[as.vector(cenet[,1] != 0),])[-1]
system.time(modSignatures$LASSOSig_S4 <- getSignature(data=trainSet,varlist=lassoNamesT4,Outcome="Event",method="spearman"))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S4$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S4$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_S4 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="CV LASSO Signature (T4)")  

epiTable$CVLASSOSig_S4 <- epi.tests(table(LASSOdistance>0,!testSet$Event))

The Test-Validation Tables

errtables <- as.matrix(rbind(
                   1.0-0.5*(epiTable$CVLASSO_T1$elements$sensitivity+epiTable$CVLASSO_T1$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_T2$elements$sensitivity+epiTable$CVLASSO_T2$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_T3$elements$sensitivity+epiTable$CVLASSO_T3$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_T4$elements$sensitivity+epiTable$CVLASSO_T4$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_T1$elements$sensitivity+epiTable$BAGFORWARD_T1$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_T2$elements$sensitivity+epiTable$BAGFORWARD_T2$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_T3$elements$sensitivity+epiTable$BAGFORWARD_T3$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_T4$elements$sensitivity+epiTable$BAGFORWARD_T4$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_T1$elements$sensitivity+epiTable$FULLBSWiMS_T1$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_T2$elements$sensitivity+epiTable$FULLBSWiMS_T2$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_T3$elements$sensitivity+epiTable$FULLBSWiMS_T3$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_T4$elements$sensitivity+epiTable$FULLBSWiMS_T4$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_S1$elements$sensitivity+epiTable$CVLASSOSig_S1$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_S2$elements$sensitivity+epiTable$CVLASSOSig_S2$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_S3$elements$sensitivity+epiTable$CVLASSOSig_S3$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_S4$elements$sensitivity+epiTable$CVLASSOSig_S4$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_S1$elements$sensitivity+epiTable$BAGGINSig_S1$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_S2$elements$sensitivity+epiTable$BAGGINSig_S2$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_S3$elements$sensitivity+epiTable$BAGGINSig_S3$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_S4$elements$sensitivity+epiTable$BAGGINSig_S4$elements$specificity)
                  ))

bplot <- barPlotCiError(errtables,"eFPFN",c("des","min","loi","chin"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig"),main="LOSO Test Balanced Error")



acctables <- as.matrix(rbind(
                   epiTable$CVLASSO_T1$elements$diag.acc,
                   epiTable$CVLASSO_T2$elements$diag.acc,
                   epiTable$CVLASSO_T3$elements$diag.acc,
                   epiTable$CVLASSO_T4$elements$diag.acc,
                   epiTable$BAGFORWARD_T1$elements$diag.acc,
                   epiTable$BAGFORWARD_T2$elements$diag.acc,
                   epiTable$BAGFORWARD_T3$elements$diag.acc,
                   epiTable$BAGFORWARD_T4$elements$diag.acc,
                   epiTable$FULLBSWiMS_T1$elements$diag.acc,
                   epiTable$FULLBSWiMS_T2$elements$diag.acc,
                   epiTable$FULLBSWiMS_T3$elements$diag.acc,
                   epiTable$FULLBSWiMS_T4$elements$diag.acc,
                   epiTable$CVLASSOSig_S1$elements$diag.acc,
                   epiTable$CVLASSOSig_S2$elements$diag.acc,
                   epiTable$CVLASSOSig_S3$elements$diag.acc,
                   epiTable$CVLASSOSig_S4$elements$diag.acc,
                   epiTable$BAGGINSig_S1$elements$diag.acc,
                   epiTable$BAGGINSig_S2$elements$diag.acc,
                   epiTable$BAGGINSig_S3$elements$diag.acc,
                   epiTable$BAGGINSig_S4$elements$diag.acc
                   ))
bplot <- barPlotCiError(acctables,"Accuracy",c("des","min","loi","chin"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig"),main="LOSO Test Validation Accuracy",args.legend = list(x = "bottomright"))


sentables <- as.matrix(rbind(
                   epiTable$CVLASSO_T1$elements$sensitivity,
                   epiTable$CVLASSO_T2$elements$sensitivity,
                   epiTable$CVLASSO_T3$elements$sensitivity,
                   epiTable$CVLASSO_T4$elements$sensitivity,
                   epiTable$BAGFORWARD_T1$elements$sensitivity,
                   epiTable$BAGFORWARD_T2$elements$sensitivity,
                   epiTable$BAGFORWARD_T3$elements$sensitivity,
                   epiTable$BAGFORWARD_T4$elements$sensitivity,
                   epiTable$FULLBSWiMS_T1$elements$sensitivity,
                   epiTable$FULLBSWiMS_T2$elements$sensitivity,
                   epiTable$FULLBSWiMS_T3$elements$sensitivity,
                   epiTable$FULLBSWiMS_T4$elements$sensitivity,
                   epiTable$CVLASSOSig_S1$elements$sensitivity,
                   epiTable$CVLASSOSig_S2$elements$sensitivity,
                   epiTable$CVLASSOSig_S3$elements$sensitivity,
                   epiTable$CVLASSOSig_S4$elements$sensitivity,
                   epiTable$BAGGINSig_S1$elements$sensitivity,
                   epiTable$BAGGINSig_S2$elements$sensitivity,
                   epiTable$BAGGINSig_S3$elements$sensitivity,
                   epiTable$BAGGINSig_S4$elements$sensitivity
                   ))
bplot <- barPlotCiError(sentables,"Sensitivity",c("des","min","loi","chin"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig"),main="LOSO Test Validation Sensitivity",args.legend = list(x = "bottomright"))

Validation SOS


testSet <- DataExpresionLOGIT
trainSet <- DataExpresionV[-set1,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etestSet <- testSet[,colnames(trainSet)]
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

tempPredict <- predict(BAGFORWARD_T1$bagged.model,testSet)
ROCTable$BAGFORWARD_V_T1 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="SOS: Bagging Forward Selection (T1)")

epiTable$BAGFORWARD_V_T1 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT1$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_V_T1 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="SOS:B:SWiMS Bagging Model (T1)")

epiTable$FULLBSWiMS_V_T1 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S1$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S1$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_V_S1 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="SOS:Forward Selection Signature (T1)")  

epiTable$BAGGINSig_V_S1 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))

enetPredict <- predict(enetT1,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_V_T1 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="SOS:CV LASSO Model (T1)")

epiTable$CVLASSO_V_T1 <- epi.tests(table(enetPredict<0,!testSet$Event))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S1$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S1$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_V_S1 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="SOS:CV LASSO Signature (T1)")  

epiTable$CVLASSOSig_V_S1 <- epi.tests(table(LASSOdistance>0,!testSet$Event))


trainSet <- DataExpresionV[-set2,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etestSet <- testSet[,colnames(trainSet)]
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

tempPredict <- predict(BAGFORWARD_T2$bagged.model,testSet)
ROCTable$BAGFORWARD_V_T2 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="SOS:Bagging Forward Selection (T2)")

epiTable$BAGFORWARD_V_T2 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT2$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_V_T2 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="SOS:B:SWiMS Bagging Model (T2)")

epiTable$FULLBSWiMS_V_T2 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S2$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S2$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_V_S2 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="SOS:Forward Selection Signature (T2)")  

epiTable$BAGGINSig_V_S2 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))

enetPredict <- predict(enetT2,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_V_T2 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="SOS:CV LASSO Model (T2)")

epiTable$CVLASSO_V_T2 <- epi.tests(table(enetPredict<0,!testSet$Event))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S2$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S2$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_V_S2 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="SOS:CV LASSO Signature (T2)")  

epiTable$CVLASSOSig_V_S2 <- epi.tests(table(LASSOdistance>0,!testSet$Event))


trainSet <- DataExpresionV[-set3,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etestSet <- testSet[,colnames(trainSet)]
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

tempPredict <- predict(BAGFORWARD_T3$bagged.model,testSet)
ROCTable$BAGFORWARD_V_T3 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="SOS:Bagging Forward Selection (T3)")

epiTable$BAGFORWARD_V_T3 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT3$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_V_T3 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="SOS:B:SWiMS Bagging Model (T3)")

epiTable$FULLBSWiMS_V_T3 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S3$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S3$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_V_S3 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="SOS:Forward Selection Signature (T3)")  

epiTable$BAGGINSig_V_S3 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))

enetPredict <- predict(enetT3,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_V_T3 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="SOS:CV LASSO Model (T3)")

epiTable$CVLASSO_V_T3 <- epi.tests(table(enetPredict<0,!testSet$Event))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S3$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S3$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_V_S3 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="SOS:CV LASSO Signature (T3)")  

epiTable$CVLASSOSig_V_S3 <- epi.tests(table(LASSOdistance>0,!testSet$Event))



trainSet <- DataExpresionV[-set4,]
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etestSet <- testSet[,colnames(trainSet)]
etestSet$Event <- NULL
etestSet$ct.dmfs <- NULL

tempPredict <- predict(BAGFORWARD_T4$bagged.model,testSet)
ROCTable$BAGFORWARD_V_T4 <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="SOS:Bagging Forward Selection (T4)")

epiTable$BAGFORWARD_V_T4 <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITT4$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_V_T4 <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="SOS:B:SWiMS Bagging Model (T4)")

epiTable$FULLBSWiMS_V_T4 <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_S4$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_S4$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_V_S4 <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="SOS:Forward Selection Signature (T4)")  

epiTable$BAGGINSig_V_S4 <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))

enetPredict <- predict(enetT4,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_V_T4 <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="SOS:CV LASSO Model (T4)")

epiTable$CVLASSO_V_T4 <- epi.tests(table(enetPredict<0,!testSet$Event))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_S4$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_S4$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_V_S4 <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="SOS:CV LASSO Signature (T4)")  

epiTable$CVLASSOSig_V_S4 <- epi.tests(table(LASSOdistance>0,!testSet$Event))



trainSet <- DataExpresionV
removeLessthan5 <- (trainSet$Event==0) & (trainSet$ct.dmfs<5)
trainSet <- trainSet[!removeLessthan5,]
etrainSet <- trainSet
etrainSet$Event <- NULL
etrainSet$ct.dmfs <- NULL

BAGFORWARD_TALL <-baggedModel(BRCASignatureLOGITALL$BSWiMS.models$forward.selection.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITALL$univariateAnalysis,frequencyThreshold=0.1,useFreq=FALSE)
tempPredict <- predict(BAGFORWARD_TALL$bagged.model,testSet)
ROCTable$BAGFORWARD_TALL <- plotModels.ROC(cbind(testSet$Event,tempPredict),main="Bagging Forward Selection (All Sets)")

epiTable$BAGFORWARD_TALL <- epi.tests(table(tempPredict<0,!testSet$Event))

FULLBSWiMS <- predict(BRCASignatureLOGITALL$BSWiMS.model,testSet)
ROCTable$FULLBSWiMS_TALL <- plotModels.ROC(cbind(testSet$Event,FULLBSWiMS),main="B:SWiMS Bagging Model (All Sets)")

epiTable$FULLBSWiMS_TALL <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))

FULLeBSWiMS <- predict(BRCASignatureLOGITALL$eBSWiMS.model$equivalentModel,testSet)
ROCTable$FULLeBSWiMS_TALL <- plotModels.ROC(cbind(testSet$Event,FULLeBSWiMS),main="eB:SWiMS Bagging Model (All Sets)")

epiTable$FULLeBSWiMS_TALL <- epi.tests(table(FULLBSWiMS<0,!testSet$Event))


BAGBSWIMS1 <-baggedModel(BRCASignatureLOGITALL$BSWiMS.models$formula.list,trainSet,type="LOGIT",univariate=BRCASignatureLOGITALL$univariateAnalysis,frequencyThreshold=0.0)
varlist <- c(all.vars(BAGBSWIMS1$bagged.model$formula)[-1],all.vars(BAGFORWARD_TALL$bagged.model$formula)[-1])
varlist <- unique(varlist)
system.time(modSignatures$BAGGINSig_SALL <- getSignature(data=trainSet,varlist=varlist,Outcome="Event",method="spearman"))


BAGGINGdistance <- signatureDistance(modSignatures$BAGGINSig_SALL$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$BAGGINSig_SALL$controlTemplate,testSet,"spearman") 

ROCTable$BAGGINSig_SALL <- plotModels.ROC(cbind(as.vector(testSet$Event),-BAGGINGdistance),main="Forward Selection Signature (All Sets)")  

epiTable$BAGGINSig_SALL <- epi.tests(table(BAGGINGdistance>0,!testSet$Event))


enetTALL <- cv.glmnet(as.matrix(etrainSet),as.vector(trainSet$Event),family="binomial");

enetPredict <- predict(enetTALL,as.matrix(etestSet),s="lambda.min")
ROCTable$CVLASSO_TALL <- plotModels.ROC(cbind(testSet$Event,as.vector(enetPredict)),main="CV LASSO Model (All Sets)")

epiTable$CVLASSO_TALL <- epi.tests(table(enetPredict<0,!testSet$Event))

cenet <- as.matrix(coef(enetTALL,s="lambda.min"))
lassoNamesALL <- names(cenet[as.vector(cenet[,1] != 0),])[-1]
system.time(modSignatures$LASSOSig_SALL <- getSignature(data=trainSet,varlist=lassoNamesALL,Outcome="Event",method="spearman"))

LASSOdistance <- signatureDistance(modSignatures$LASSOSig_SALL$caseTamplate,testSet,"spearman") - signatureDistance(modSignatures$LASSOSig_SALL$controlTemplate,testSet,"spearman") 

ROCTable$CVLASSOSig_SALL <- plotModels.ROC(cbind(as.vector(testSet$Event),-LASSOdistance),main="CV LASSO Signature (All Sets)")  

epiTable$CVLASSOSig_SALL <- epi.tests(table(LASSOdistance>0,!testSet$Event))

All Validation Tables


errtables <- as.matrix(rbind(
                   1.0-0.5*(epiTable$CVLASSO_V_T1$elements$sensitivity+epiTable$CVLASSO_V_T1$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_V_T2$elements$sensitivity+epiTable$CVLASSO_V_T2$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_V_T3$elements$sensitivity+epiTable$CVLASSO_V_T3$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_V_T4$elements$sensitivity+epiTable$CVLASSO_V_T4$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSO_TALL$elements$sensitivity+epiTable$CVLASSO_TALL$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_V_T1$elements$sensitivity+epiTable$BAGFORWARD_V_T1$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_V_T2$elements$sensitivity+epiTable$BAGFORWARD_V_T2$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_V_T3$elements$sensitivity+epiTable$BAGFORWARD_V_T3$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_V_T4$elements$sensitivity+epiTable$BAGFORWARD_V_T4$elements$specificity),
                   1.0-0.5*(epiTable$BAGFORWARD_TALL$elements$sensitivity+epiTable$BAGFORWARD_TALL$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_V_T1$elements$sensitivity+epiTable$FULLBSWiMS_V_T1$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_V_T2$elements$sensitivity+epiTable$FULLBSWiMS_V_T2$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_V_T3$elements$sensitivity+epiTable$FULLBSWiMS_V_T3$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_V_T4$elements$sensitivity+epiTable$FULLBSWiMS_V_T4$elements$specificity),
                   1.0-0.5*(epiTable$FULLBSWiMS_TALL$elements$sensitivity+epiTable$FULLBSWiMS_TALL$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_V_S1$elements$sensitivity+epiTable$CVLASSOSig_V_S1$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_V_S2$elements$sensitivity+epiTable$CVLASSOSig_V_S2$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_V_S3$elements$sensitivity+epiTable$CVLASSOSig_V_S3$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_V_S4$elements$sensitivity+epiTable$CVLASSOSig_V_S4$elements$specificity),
                   1.0-0.5*(epiTable$CVLASSOSig_SALL$elements$sensitivity+epiTable$CVLASSOSig_SALL$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_V_S1$elements$sensitivity+epiTable$BAGGINSig_V_S1$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_V_S2$elements$sensitivity+epiTable$BAGGINSig_V_S2$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_V_S3$elements$sensitivity+epiTable$BAGGINSig_V_S3$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_V_S4$elements$sensitivity+epiTable$BAGGINSig_V_S4$elements$specificity),
                   1.0-0.5*(epiTable$BAGGINSig_SALL$elements$sensitivity+epiTable$BAGGINSig_SALL$elements$specificity),
                   1.0-0.5*(epi.signature70$elements$sensitivity+epi.signature70$elements$specificity),
                   1.0-0.5*(epi.signature70$elements$sensitivity+epi.signature70$elements$specificity),
                   1.0-0.5*(epi.signature70$elements$sensitivity+epi.signature70$elements$specificity),
                   1.0-0.5*(epi.signature70$elements$sensitivity+epi.signature70$elements$specificity),
                   1.0-0.5*(epi.signature70$elements$sensitivity+epi.signature70$elements$specificity)
                  ))

bplot <- barPlotCiError(errtables,"eFPFN",c("des","min","loi","chin","ALL"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig","70 Sig"),main="SOS Validation Test Balanced Error")



acctables <- as.matrix(rbind(
                   epiTable$CVLASSO_V_T1$elements$diag.acc,
                   epiTable$CVLASSO_V_T2$elements$diag.acc,
                   epiTable$CVLASSO_V_T3$elements$diag.acc,
                   epiTable$CVLASSO_V_T4$elements$diag.acc,
                   epiTable$CVLASSO_TALL$elements$diag.acc,
                   epiTable$BAGFORWARD_V_T1$elements$diag.acc,
                   epiTable$BAGFORWARD_V_T2$elements$diag.acc,
                   epiTable$BAGFORWARD_V_T3$elements$diag.acc,
                   epiTable$BAGFORWARD_V_T4$elements$diag.acc,
                   epiTable$BAGFORWARD_TALL$elements$diag.acc,
                   epiTable$FULLBSWiMS_V_T1$elements$diag.acc,
                   epiTable$FULLBSWiMS_V_T2$elements$diag.acc,
                   epiTable$FULLBSWiMS_V_T3$elements$diag.acc,
                   epiTable$FULLBSWiMS_V_T4$elements$diag.acc,
                   epiTable$FULLBSWiMS_TALL$elements$diag.acc,
                   epiTable$CVLASSOSig_V_S1$elements$diag.acc,
                   epiTable$CVLASSOSig_V_S2$elements$diag.acc,
                   epiTable$CVLASSOSig_V_S3$elements$diag.acc,
                   epiTable$CVLASSOSig_V_S4$elements$diag.acc,
                   epiTable$CVLASSOSig_SALL$elements$diag.acc,
                   epiTable$BAGGINSig_V_S1$elements$diag.acc,
                   epiTable$BAGGINSig_V_S2$elements$diag.acc,
                   epiTable$BAGGINSig_V_S3$elements$diag.acc,
                   epiTable$BAGGINSig_V_S4$elements$diag.acc,
                   epiTable$BAGGINSig_SALL$elements$diag.acc,
                   epi.signature70$elements$diag.acc,
                   epi.signature70$elements$diag.acc,
                   epi.signature70$elements$diag.acc,
                   epi.signature70$elements$diag.acc,
                   epi.signature70$elements$diag.acc
                   ))
bplot <- barPlotCiError(acctables,"Accuracy",c("des","min","loi","chin","ALL"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig","70 Sig"),main="SOS Test Validation Accuracy",args.legend = list(x = "bottomright"))


sentables <- as.matrix(rbind(
                   epiTable$CVLASSO_V_T1$elements$sensitivity,
                   epiTable$CVLASSO_V_T2$elements$sensitivity,
                   epiTable$CVLASSO_V_T3$elements$sensitivity,
                   epiTable$CVLASSO_V_T4$elements$sensitivity,
                   epiTable$CVLASSO_TALL$elements$sensitivity,
                   epiTable$BAGFORWARD_V_T1$elements$sensitivity,
                   epiTable$BAGFORWARD_V_T2$elements$sensitivity,
                   epiTable$BAGFORWARD_V_T3$elements$sensitivity,
                   epiTable$BAGFORWARD_V_T4$elements$sensitivity,
                   epiTable$BAGFORWARD_TALL$elements$sensitivity,
                   epiTable$FULLBSWiMS_V_T1$elements$sensitivity,
                   epiTable$FULLBSWiMS_V_T2$elements$sensitivity,
                   epiTable$FULLBSWiMS_V_T3$elements$sensitivity,
                   epiTable$FULLBSWiMS_V_T4$elements$sensitivity,
                   epiTable$FULLBSWiMS_TALL$elements$sensitivity,
                   epiTable$CVLASSOSig_V_S1$elements$sensitivity,
                   epiTable$CVLASSOSig_V_S2$elements$sensitivity,
                   epiTable$CVLASSOSig_V_S3$elements$sensitivity,
                   epiTable$CVLASSOSig_V_S4$elements$sensitivity,
                   epiTable$CVLASSOSig_SALL$elements$sensitivity,
                   epiTable$BAGGINSig_V_S1$elements$sensitivity,
                   epiTable$BAGGINSig_V_S2$elements$sensitivity,
                   epiTable$BAGGINSig_V_S3$elements$sensitivity,
                   epiTable$BAGGINSig_V_S4$elements$sensitivity,
                   epiTable$BAGGINSig_SALL$elements$sensitivity,
                   epi.signature70$elements$sensitivity,
                   epi.signature70$elements$sensitivity,
                   epi.signature70$elements$sensitivity,
                   epi.signature70$elements$sensitivity,
                   epi.signature70$elements$sensitivity
                   ))
bplot <- barPlotCiError(sentables,"Sensitivity",c("des","min","loi","chin","ALL"),c("LASSO","Forward Bag","B:SWiMS","LASSO Sig","Forward Sig","70 Sig"),main="SOS Test Validation Sensitivity",args.legend = list(x = "bottomright"))



numberofFeatures <- matrix(c(
  length(BRCASignatureLOGITT1$BSWiMS.model$coefficients),
  length(BRCASignatureLOGITT2$BSWiMS.model$coefficients),
  length(BRCASignatureLOGITT3$BSWiMS.model$coefficients),
  length(BRCASignatureLOGITT4$BSWiMS.model$coefficients),
  length(BRCASignatureLOGITALL$BSWiMS.model$coefficients),
  length(lassoNamesT1),
  length(lassoNamesT2),
  length(lassoNamesT3),
  length(lassoNamesT4),
  length(lassoNamesALL),
  ncol(modSignatures$LASSOSig_S1$controlTemplate),
  ncol(modSignatures$LASSOSig_S2$controlTemplate),
  ncol(modSignatures$LASSOSig_S3$controlTemplate),
  ncol(modSignatures$LASSOSig_S4$controlTemplate),
  ncol(modSignatures$LASSOSig_SALL$controlTemplate),
  ncol(modSignatures$BAGGINSig_S1$controlTemplate),
  ncol(modSignatures$BAGGINSig_S2$controlTemplate),
  ncol(modSignatures$BAGGINSig_S3$controlTemplate),
  ncol(modSignatures$BAGGINSig_S4$controlTemplate),
  ncol(modSignatures$BAGGINSig_SALL$controlTemplate)
  ),5,4)
rownames(numberofFeatures) <- c("des","min","loi","chin","ALL")
colnames(numberofFeatures) <- c("B:SWiMS","LASSO","CV LASSO Sig","Forward Sig")
barplot(numberofFeatures,cex.names=0.7,las=2,ylim=c(0.0,150),main="Feature Size",ylab="#",beside=TRUE,legend = rownames(numberofFeatures),args.legend = list(x = "topleft"))


pander::pander(numberofFeatures,caption="Number of Features")
Number of Features
  B:SWiMS LASSO CV LASSO Sig Forward Sig
des 47 58 53 179
min 74 80 65 107
loi 49 100 96 146
chin 67 90 85 69
ALL 28 142 132 203

Venn Diagrams for the four tests

par(mfrow=c(2,2))

names1 <- names(BAGFORWARD_TALL$bagged.model$coefficients)
names2 <- names(BRCASignatureLOGITALL$BSWiMS.model$coefficients)
names3 <- lassoNamesALL

featurelist <- list(Forward=names1,BSWiMS=names2,CVLASSO=names3)
vend <- venn(featurelist)
title("ALL")

Signature_1 <- colnames(modSignatures$BAGGINSig_S1$controlTemplate)
Signature_2 <- colnames(modSignatures$BAGGINSig_S2$controlTemplate)
Signature_3 <- colnames(modSignatures$BAGGINSig_S3$controlTemplate)
Signature_4 <- colnames(modSignatures$BAGGINSig_S4$controlTemplate)
Signature_5 <- colnames(modSignatures$BAGGINSig_SALL$controlTemplate)

featurelist <- list(des=Signature_1,min=Signature_2,loi=Signature_3,chin=Signature_4)
vend <- venn(featurelist)
title("Forward Sigantures")

Signature_1 <- colnames(modSignatures$LASSOSig_S1$controlTemplate)
Signature_2 <- colnames(modSignatures$LASSOSig_S2$controlTemplate)
Signature_3 <- colnames(modSignatures$LASSOSig_S3$controlTemplate)
Signature_4 <- colnames(modSignatures$LASSOSig_S4$controlTemplate)

featurelist <- list(des=Signature_1,min=Signature_2,loi=Signature_3,chin=Signature_4)
vend <- venn(featurelist)
title("LASSO Signatures")

Signature_1 <- names(BRCASignatureLOGITT1$BSWiMS.model$coefficients)
Signature_2 <- names(BRCASignatureLOGITT2$BSWiMS.model$coefficients)
Signature_3 <- names(BRCASignatureLOGITT3$BSWiMS.model$coefficients)
Signature_4 <- names(BRCASignatureLOGITT4$BSWiMS.model$coefficients)

featurelist <- list(des=Signature_1,min=Signature_2,loi=Signature_3,chin=Signature_4)
vend <- venn(featurelist)
title("B:SWiMS")

par(mfrow=c(1,1))