Lukemia Data Set Analysis

library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)

Loops <- 10
Repeats <- 5
filter <- 0.05

#LeukemiaA <- read.delim("~/Development/FresaPaper/Leukemia/Leukemia.txt")
#LeukemiaA <- read.delim("./Leukemia/Leukemia.txt")
LeukemiaA <- read.delim("./Leukemia.txt")
Leukemia <- LeukemiaA[,-1]
rownames(Leukemia) <- LeukemiaA[,1]
Leukemia2 <- Leukemia
#Leukemia$Status <- Leukemia$Status-0.5

Modeling


#system.time(LeukemiaModelBin <- FRESA.Model(formula = Status ~ 1, Leukemia,filter.p.value=filter))

filename = paste("LeukemiaModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(LeukemiaModelBin <- FRESA.Model(formula = Status ~ 1, Leukemia, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(LeukemiaModelBin,file=filename)
load(file=filename)

Summary Tables

pander::pander(summary(LeukemiaModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
  Estimate lower OR upper u.Accuracy
M31523_at -0.0004483 0.9995 0.9996 0.9996 0.9433
M27891_at 7.718e-05 1 1 1 0.9255
M23197_at 0.001027 1.001 1.001 1.001 0.961
X95735_at 9.432e-05 1 1 1 0.9326
M11722_at -0.0001223 0.9999 0.9999 0.9999 0.9255
M92287_at -9.988e-05 0.9999 0.9999 0.9999 0.9078
M84526_at 0.0001814 1 1 1 0.9007
D88422_at 0.0002429 1 1 1 0.8901
D88270_at -0.0006718 0.9993 0.9993 0.9994 0.8794
U05259_rna1_at -0.0003798 0.9995 0.9996 0.9997 0.8794
L09209_s_at 0.01117 1.009 1.011 1.013 0.9043
J05243_at -0.0006166 0.9992 0.9994 0.9996 0.883
X59417_at -6.743e-05 0.9999 0.9999 0.9999 0.8901
U46499_at 0.0001243 1 1 1 0.9149
M31211_s_at -0.0004152 0.9995 0.9996 0.9997 0.8582
X62654_rna1_at 0.0002732 1 1 1 0.8723
Z15115_at -0.0002173 0.9997 0.9998 0.9998 0.8404
M63138_at 6.873e-05 1 1 1 0.8617
X62320_at 0.0004958 1 1 1.001 0.9043
M96326_rna1_at 0.0001838 1 1 1 0.8759
M63379_at 0.09729 1.092 1.102 1.112 0.8617
HG1612.HT1612_at -0.0003798 0.9995 0.9996 0.9998 0.8617
M22960_at 0.0003057 1 1 1 0.8121
U16954_at -0.002269 0.997 0.9977 0.9984 0.8014
U57721_at 0.002286 1.001 1.002 1.003 0.7447
X68560_at -0.0382 0.9524 0.9625 0.9727 0.7482
X03934_at -0.0001888 0.9998 0.9998 0.9999 0.6277
M62982_at 0.1883 1.155 1.207 1.261 0.6064
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
M31523_at 0.5 0.9433 0.9433 0.5 0.9433
M27891_at 0.5 0.9255 0.9255 0.5 0.9255
M23197_at 0.5 0.961 0.961 0.5 0.961
X95735_at 0.5 0.9326 0.9326 0.5 0.9326
M11722_at 0.5 0.9255 0.9255 0.5 0.9255
M92287_at 0.5 0.9078 0.9078 0.5 0.9078
M84526_at 0.5 0.9007 0.9007 0.5 0.9007
D88422_at 0.5 0.8901 0.8901 0.5 0.8901
D88270_at 0.6277 0.9787 0.8794 0.6277 0.9787
U05259_rna1_at 0.8014 0.9894 0.8794 0.8014 0.9894
L09209_s_at 0.7482 1 0.9043 0.7482 1
J05243_at 0.8723 0.9574 0.883 0.8723 0.9574
X59417_at 0.5 0.8901 0.8901 0.5 0.8901
U46499_at 0.5 0.9149 0.9149 0.5 0.9149
M31211_s_at 0.5 0.8582 0.8582 0.5 0.8582
X62654_rna1_at 0.883 0.9574 0.8723 0.883 0.9574
Z15115_at 0.8121 0.9433 0.8404 0.8121 0.9433
M63138_at 0.5 0.8617 0.8617 0.5 0.8617
X62320_at 0.8617 0.9716 0.9043 0.8617 0.9716
M96326_rna1_at 0.7447 0.9291 0.8759 0.7447 0.9291
M63379_at 0.6064 1 0.8617 0.6064 1
HG1612.HT1612_at 0.9043 0.9716 0.8617 0.9043 0.9716
M22960_at 0.8404 0.9433 0.8121 0.8404 0.9433
U16954_at 0.8794 0.9894 0.8014 0.8794 0.9894
U57721_at 0.8759 0.9291 0.7447 0.8759 0.9291
X68560_at 0.9043 1 0.7482 0.9043 1
X03934_at 0.8794 0.9787 0.6277 0.8794 0.9787
M62982_at 0.8617 1 0.6064 0.8617 1
  IDI NRI z.IDI z.NRI
M31523_at 0.7939 1.773 19.14 18.65
M27891_at 0.7567 1.745 17.09 18.08
M23197_at 0.8174 1.674 20.52 15.17
X95735_at 0.7691 1.702 17.57 15.83
M11722_at 0.7355 1.759 16.29 18.28
M92287_at 0.727 1.716 16.07 16.27
M84526_at 0.7332 1.617 15.87 14.24
D88422_at 0.6973 1.603 14.59 13.97
D88270_at 0.7681 1.901 17.91 30.43
U05259_rna1_at 0.4748 1.887 9.05 30.67
L09209_s_at 0.6395 2 12.69 Inf
J05243_at 0.2327 1.716 5.172 16.65
X59417_at 0.6209 1.56 12.41 12.19
U46499_at 0.6491 1.546 15.21 12.26
M31211_s_at 0.5705 1.461 11.45 10.45
X62654_rna1_at 0.2984 1.759 6.214 18.63
Z15115_at 0.3444 1.404 6.864 9.642
M63138_at 0.6259 1.319 12.44 9.094
X62320_at 0.3322 1.773 6.794 25.75
M96326_rna1_at 0.4455 1.745 8.493 17.71
M63379_at 0.8251 2 20.71 Inf
HG1612.HT1612_at 0.2538 1.617 5.706 18.1
M22960_at 0.3405 1.66 6.937 15.23
U16954_at 0.3078 1.823 6.432 22.69
U57721_at 0.2371 1.518 5.363 12.49
X68560_at 0.3023 2 7.117 Inf
X03934_at 0.3179 1.773 6.722 22.23
M62982_at 0.434 1.922 8.42 49.46

B:SWiMS Heat Map Plots

opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))

hm <- heatMaps(LeukemiaModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Status",data=Leukemia,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)

ROC Plots

AccCITable <- NULL
BErrorCITable <- NULL


rp <- plotModels.ROC(LeukemiaModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LeukemiaModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Support Vector Machine(SVM) Analysis


LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel <- LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction <- LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction -0.5

rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Barplots of Accuracy and Balanced Error


CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")



bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots

baggLeukemiaBSWiMS <- baggedModel(LeukemiaModelBin$cvObject$allBSWiMSFormulas.list,Leukemia,type="LOGIT",Outcome="Status")
#> 
#> Num. Models: 896  To Test: 55  TopFreq: 50  Thrf: 1  Removed: 15 
#> .........................................................................................

cf <- length(LeukemiaModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)

namestoShow <- names(baggLeukemiaBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats

namestoShow <- namestoShow[baggLeukemiaBSWiMS$frequencyTable[namestoShow]>=frac]

fnshow <- min(11,length(namestoShow))
barplot(baggLeukemiaBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")


n <- network::network(cf*baggLeukemiaBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggLeukemiaBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")


ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

LASSO Feature Plots

baggLeukemiaLASSO <- baggedModel(LeukemiaModelBin$cvObject$LASSOVariables,Leukemia,type="LOGIT",Outcome="Status")
#> 
#> Num. Models: 51  To Test: 56  TopFreq: 48  Thrf: 1  Removed: 14 
#> .....

toshow <- sum(baggLeukemiaLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggLeukemiaLASSO$frequencyTable))
barplot(baggLeukemiaLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")


n <- network::network(baggLeukemiaLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggLeukemiaLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams

Here I will explore which features are similar between the LASSO and the BSWiMS models


pvalues <- p.adjust(1.0-pnorm(LeukemiaModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(LeukemiaModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggLeukemiaBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggLeukemiaBSWiMS$frequencyTable[tob]))
tob <- baggLeukemiaLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggLeukemiaLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)