Lukemia Data Set Analysis
library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
Loops <- 10
Repeats <- 5
filter <- 0.05
#LeukemiaA <- read.delim("~/Development/FresaPaper/Leukemia/Leukemia.txt")
#LeukemiaA <- read.delim("./Leukemia/Leukemia.txt")
LeukemiaA <- read.delim("./Leukemia.txt")
Leukemia <- LeukemiaA[,-1]
rownames(Leukemia) <- LeukemiaA[,1]
Leukemia2 <- Leukemia
#Leukemia$Status <- Leukemia$Status-0.5
Modeling
#system.time(LeukemiaModelBin <- FRESA.Model(formula = Status ~ 1, Leukemia,filter.p.value=filter))
filename = paste("LeukemiaModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(LeukemiaModelBin <- FRESA.Model(formula = Status ~ 1, Leukemia, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(LeukemiaModelBin,file=filename)
load(file=filename)
Summary Tables
pander::pander(summary(LeukemiaModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
| M31523_at |
-0.0004483 |
0.9995 |
0.9996 |
0.9996 |
0.9433 |
| M27891_at |
7.718e-05 |
1 |
1 |
1 |
0.9255 |
| M23197_at |
0.001027 |
1.001 |
1.001 |
1.001 |
0.961 |
| X95735_at |
9.432e-05 |
1 |
1 |
1 |
0.9326 |
| M11722_at |
-0.0001223 |
0.9999 |
0.9999 |
0.9999 |
0.9255 |
| M92287_at |
-9.988e-05 |
0.9999 |
0.9999 |
0.9999 |
0.9078 |
| M84526_at |
0.0001814 |
1 |
1 |
1 |
0.9007 |
| D88422_at |
0.0002429 |
1 |
1 |
1 |
0.8901 |
| D88270_at |
-0.0006718 |
0.9993 |
0.9993 |
0.9994 |
0.8794 |
| U05259_rna1_at |
-0.0003798 |
0.9995 |
0.9996 |
0.9997 |
0.8794 |
| L09209_s_at |
0.01117 |
1.009 |
1.011 |
1.013 |
0.9043 |
| J05243_at |
-0.0006166 |
0.9992 |
0.9994 |
0.9996 |
0.883 |
| X59417_at |
-6.743e-05 |
0.9999 |
0.9999 |
0.9999 |
0.8901 |
| U46499_at |
0.0001243 |
1 |
1 |
1 |
0.9149 |
| M31211_s_at |
-0.0004152 |
0.9995 |
0.9996 |
0.9997 |
0.8582 |
| X62654_rna1_at |
0.0002732 |
1 |
1 |
1 |
0.8723 |
| Z15115_at |
-0.0002173 |
0.9997 |
0.9998 |
0.9998 |
0.8404 |
| M63138_at |
6.873e-05 |
1 |
1 |
1 |
0.8617 |
| X62320_at |
0.0004958 |
1 |
1 |
1.001 |
0.9043 |
| M96326_rna1_at |
0.0001838 |
1 |
1 |
1 |
0.8759 |
| M63379_at |
0.09729 |
1.092 |
1.102 |
1.112 |
0.8617 |
| HG1612.HT1612_at |
-0.0003798 |
0.9995 |
0.9996 |
0.9998 |
0.8617 |
| M22960_at |
0.0003057 |
1 |
1 |
1 |
0.8121 |
| U16954_at |
-0.002269 |
0.997 |
0.9977 |
0.9984 |
0.8014 |
| U57721_at |
0.002286 |
1.001 |
1.002 |
1.003 |
0.7447 |
| X68560_at |
-0.0382 |
0.9524 |
0.9625 |
0.9727 |
0.7482 |
| X03934_at |
-0.0001888 |
0.9998 |
0.9998 |
0.9999 |
0.6277 |
| M62982_at |
0.1883 |
1.155 |
1.207 |
1.261 |
0.6064 |
Table continues below
| M31523_at |
0.5 |
0.9433 |
0.9433 |
0.5 |
0.9433 |
| M27891_at |
0.5 |
0.9255 |
0.9255 |
0.5 |
0.9255 |
| M23197_at |
0.5 |
0.961 |
0.961 |
0.5 |
0.961 |
| X95735_at |
0.5 |
0.9326 |
0.9326 |
0.5 |
0.9326 |
| M11722_at |
0.5 |
0.9255 |
0.9255 |
0.5 |
0.9255 |
| M92287_at |
0.5 |
0.9078 |
0.9078 |
0.5 |
0.9078 |
| M84526_at |
0.5 |
0.9007 |
0.9007 |
0.5 |
0.9007 |
| D88422_at |
0.5 |
0.8901 |
0.8901 |
0.5 |
0.8901 |
| D88270_at |
0.6277 |
0.9787 |
0.8794 |
0.6277 |
0.9787 |
| U05259_rna1_at |
0.8014 |
0.9894 |
0.8794 |
0.8014 |
0.9894 |
| L09209_s_at |
0.7482 |
1 |
0.9043 |
0.7482 |
1 |
| J05243_at |
0.8723 |
0.9574 |
0.883 |
0.8723 |
0.9574 |
| X59417_at |
0.5 |
0.8901 |
0.8901 |
0.5 |
0.8901 |
| U46499_at |
0.5 |
0.9149 |
0.9149 |
0.5 |
0.9149 |
| M31211_s_at |
0.5 |
0.8582 |
0.8582 |
0.5 |
0.8582 |
| X62654_rna1_at |
0.883 |
0.9574 |
0.8723 |
0.883 |
0.9574 |
| Z15115_at |
0.8121 |
0.9433 |
0.8404 |
0.8121 |
0.9433 |
| M63138_at |
0.5 |
0.8617 |
0.8617 |
0.5 |
0.8617 |
| X62320_at |
0.8617 |
0.9716 |
0.9043 |
0.8617 |
0.9716 |
| M96326_rna1_at |
0.7447 |
0.9291 |
0.8759 |
0.7447 |
0.9291 |
| M63379_at |
0.6064 |
1 |
0.8617 |
0.6064 |
1 |
| HG1612.HT1612_at |
0.9043 |
0.9716 |
0.8617 |
0.9043 |
0.9716 |
| M22960_at |
0.8404 |
0.9433 |
0.8121 |
0.8404 |
0.9433 |
| U16954_at |
0.8794 |
0.9894 |
0.8014 |
0.8794 |
0.9894 |
| U57721_at |
0.8759 |
0.9291 |
0.7447 |
0.8759 |
0.9291 |
| X68560_at |
0.9043 |
1 |
0.7482 |
0.9043 |
1 |
| X03934_at |
0.8794 |
0.9787 |
0.6277 |
0.8794 |
0.9787 |
| M62982_at |
0.8617 |
1 |
0.6064 |
0.8617 |
1 |
| M31523_at |
0.7939 |
1.773 |
19.14 |
18.65 |
| M27891_at |
0.7567 |
1.745 |
17.09 |
18.08 |
| M23197_at |
0.8174 |
1.674 |
20.52 |
15.17 |
| X95735_at |
0.7691 |
1.702 |
17.57 |
15.83 |
| M11722_at |
0.7355 |
1.759 |
16.29 |
18.28 |
| M92287_at |
0.727 |
1.716 |
16.07 |
16.27 |
| M84526_at |
0.7332 |
1.617 |
15.87 |
14.24 |
| D88422_at |
0.6973 |
1.603 |
14.59 |
13.97 |
| D88270_at |
0.7681 |
1.901 |
17.91 |
30.43 |
| U05259_rna1_at |
0.4748 |
1.887 |
9.05 |
30.67 |
| L09209_s_at |
0.6395 |
2 |
12.69 |
Inf |
| J05243_at |
0.2327 |
1.716 |
5.172 |
16.65 |
| X59417_at |
0.6209 |
1.56 |
12.41 |
12.19 |
| U46499_at |
0.6491 |
1.546 |
15.21 |
12.26 |
| M31211_s_at |
0.5705 |
1.461 |
11.45 |
10.45 |
| X62654_rna1_at |
0.2984 |
1.759 |
6.214 |
18.63 |
| Z15115_at |
0.3444 |
1.404 |
6.864 |
9.642 |
| M63138_at |
0.6259 |
1.319 |
12.44 |
9.094 |
| X62320_at |
0.3322 |
1.773 |
6.794 |
25.75 |
| M96326_rna1_at |
0.4455 |
1.745 |
8.493 |
17.71 |
| M63379_at |
0.8251 |
2 |
20.71 |
Inf |
| HG1612.HT1612_at |
0.2538 |
1.617 |
5.706 |
18.1 |
| M22960_at |
0.3405 |
1.66 |
6.937 |
15.23 |
| U16954_at |
0.3078 |
1.823 |
6.432 |
22.69 |
| U57721_at |
0.2371 |
1.518 |
5.363 |
12.49 |
| X68560_at |
0.3023 |
2 |
7.117 |
Inf |
| X03934_at |
0.3179 |
1.773 |
6.722 |
22.23 |
| M62982_at |
0.434 |
1.922 |
8.42 |
49.46 |
B:SWiMS Heat Map Plots
opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))
hm <- heatMaps(LeukemiaModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Status",data=Leukemia,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)
ROC Plots
AccCITable <- NULL
BErrorCITable <- NULL
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Support Vector Machine(SVM) Analysis
LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel <- LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction <- LeukemiaModelBin$cvObject$Models.testPrediction$usrFitFunction -0.5
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LeukemiaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Barplots of Accuracy and Balanced Error
CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")
bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots
baggLeukemiaBSWiMS <- baggedModel(LeukemiaModelBin$cvObject$allBSWiMSFormulas.list,Leukemia,type="LOGIT",Outcome="Status")
#>
#> Num. Models: 896 To Test: 55 TopFreq: 50 Thrf: 1 Removed: 15
#> .........................................................................................
cf <- length(LeukemiaModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)
namestoShow <- names(baggLeukemiaBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats
namestoShow <- namestoShow[baggLeukemiaBSWiMS$frequencyTable[namestoShow]>=frac]
fnshow <- min(11,length(namestoShow))
barplot(baggLeukemiaBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")

n <- network::network(cf*baggLeukemiaBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggLeukemiaBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)
LASSO Feature Plots
baggLeukemiaLASSO <- baggedModel(LeukemiaModelBin$cvObject$LASSOVariables,Leukemia,type="LOGIT",Outcome="Status")
#>
#> Num. Models: 51 To Test: 56 TopFreq: 48 Thrf: 1 Removed: 14
#> .....
toshow <- sum(baggLeukemiaLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggLeukemiaLASSO$frequencyTable))
barplot(baggLeukemiaLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")

n <- network::network(baggLeukemiaLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggLeukemiaLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams
Here I will explore which features are similar between the LASSO and the BSWiMS models
pvalues <- p.adjust(1.0-pnorm(LeukemiaModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(LeukemiaModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggLeukemiaBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggLeukemiaBSWiMS$frequencyTable[tob]))
tob <- baggLeukemiaLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggLeukemiaLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)
