Lymphoma Data Set Analysis

library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
a=as.numeric(Sys.time());
set.seed(a);

Loops <- 10
Repeats <- 5
filter <- 0.05

#DLBCL_Lymohoma_A <- read.delim("~/Development/FresaPaper/DLBCL/DLBCL_Lymohoma_A.txt")
#DLBCL_Lymohoma_A <- read.delim("./DLBCL/DLBCL_Lymohoma_A.txt")
DLBCL_Lymohoma_A <- read.delim("./DLBCL_Lymohoma_A.txt")

Lymphoma <- DLBCL_Lymohoma_A[,-c(1,3,4)]
rownames(Lymphoma) <- DLBCL_Lymohoma_A[,1]
Lymphoma2 <- Lymphoma
#Lymphoma$Class <- Lymphoma$Class-0.5

Modeling


#bsm <- BSWiMS.model(formula=Class ~ 1,data=Lymphoma,type="LOGIT",testType="zIDI")
#bsm$formula.list



filename = paste("LympaModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(LymphomaModelBin <- FRESA.Model(formula = Class ~ 1, Lymphoma, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(LymphomaModelBin,file=filename)
#load(file=filename)

Summary Tables

pander::pander(summary(LymphomaModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
  Estimate lower OR upper u.Accuracy
D55716_at 0.0002862 1 1 1 0.8908
X02152_at 2.519e-05 1 1 1 0.9109
HG4074.HT4344_at 0.0002532 1 1 1 0.8764
M63835_at 0.0004466 1 1 1.001 0.8764
M57710_at 4.778e-05 1 1 1 0.8247
HG1980.HT2023_at 0.0003416 1 1 1 0.8477
J03909_at 0.0001337 1 1 1 0.842
U14518_at 1.128 2.494 3.09 3.828 0.8477
X17620_at 7.504e-05 1 1 1 0.8707
X56494_at 2.894e-05 1 1 1 0.8247
X01060_at 0.0001585 1 1 1 0.8448
D79997_at 0.0004459 1 1 1.001 0.8678
D82348_at 7.593e-05 1 1 1 0.819
L17131_rna1_at 0.0001739 1 1 1 0.8046
U28386_at 0.001132 1.001 1.001 1.001 0.8477
M63138_at 4.036e-05 1 1 1 0.8218
M14328_s_at 1.737e-05 1 1 1 0.8362
L33842_rna1_at 9.869e-05 1 1 1 0.8333
M13792_at 9.743e-05 1 1 1 0.8707
L25876_at 0.001837 1.002 1.002 1.002 0.8132
Z21966_at -0.7035 0.4132 0.4949 0.5926 0.8534
D87119_at -0.001319 0.9983 0.9987 0.999 0.7931
M94880_f_at -9.552e-05 0.9999 0.9999 0.9999 0.7241
D78134_at -0.0003692 0.9995 0.9996 0.9997 0.8075
D83597_at -0.0006531 0.9992 0.9993 0.9995 0.6983
Z35227_at -0.0005659 0.9993 0.9994 0.9996 0.727
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
D55716_at 0.5 0.8908 0.8908 0.5 0.8908
X02152_at 0.5 0.9109 0.9109 0.5 0.9109
HG4074.HT4344_at 0.5 0.8764 0.8764 0.5 0.8764
M63835_at 0.5 0.8764 0.8764 0.5 0.8764
M57710_at 0.5 0.8247 0.8247 0.5 0.8247
HG1980.HT2023_at 0.8075 0.9626 0.8477 0.8075 0.9626
J03909_at 0.7241 0.9684 0.842 0.7241 0.9684
U14518_at 0.8534 0.9741 0.8477 0.8534 0.9741
X17620_at 0.5 0.8707 0.8707 0.5 0.8707
X56494_at 0.5 0.8247 0.8247 0.5 0.8247
X01060_at 0.5 0.8448 0.8448 0.5 0.8448
D79997_at 0.5 0.8678 0.8678 0.5 0.8678
D82348_at 0.5 0.819 0.819 0.5 0.819
L17131_rna1_at 0.6983 0.9483 0.8046 0.6983 0.9483
U28386_at 0.7931 0.9828 0.8477 0.7931 0.9828
M63138_at 0.5 0.8218 0.8218 0.5 0.8218
M14328_s_at 0.5 0.8362 0.8362 0.5 0.8362
L33842_rna1_at 0.5 0.8333 0.8333 0.5 0.8333
M13792_at 0.5 0.8707 0.8707 0.5 0.8707
L25876_at 0.727 0.9511 0.8132 0.727 0.9511
Z21966_at 0.8477 0.9741 0.8534 0.8477 0.9741
D87119_at 0.8477 0.9828 0.7931 0.8477 0.9828
M94880_f_at 0.842 0.9684 0.7241 0.842 0.9684
D78134_at 0.8477 0.9626 0.8075 0.8477 0.9626
D83597_at 0.8046 0.9483 0.6983 0.8046 0.9483
Z35227_at 0.8132 0.9511 0.727 0.8132 0.9511
  IDI NRI z.IDI z.NRI
D55716_at 0.7243 1.506 17.17 12.69
X02152_at 0.665 1.644 15.36 15.99
HG4074.HT4344_at 0.6389 1.414 14.17 11.12
M63835_at 0.6184 1.46 13.78 11.89
M57710_at 0.5517 1.46 12.02 12.54
HG1980.HT2023_at 0.6427 1.862 14.88 27.98
J03909_at 0.5252 1.805 11.94 22.98
U14518_at 0.4717 1.816 10.32 2.996e+307
X17620_at 0.6067 1.414 13.43 11.07
X56494_at 0.5902 1.483 12.76 12.71
X01060_at 0.573 1.379 12.33 10.78
D79997_at 0.5511 1.437 11.82 12.17
D82348_at 0.5542 1.287 11.9 9.333
L17131_rna1_at 0.5609 1.494 11.63 12.18
U28386_at 0.5042 1.885 11.53 31.07
M63138_at 0.5372 1.368 11.51 10.52
M14328_s_at 0.5565 1.529 12.03 13.74
L33842_rna1_at 0.5164 1.322 11 10.04
M13792_at 0.5391 1.356 11.66 10.62
L25876_at 0.5904 1.828 13.02 25.82
Z21966_at 0.339 1.747 7.646 2.996e+307
D87119_at 0.303 1.586 7.11 14.37
M94880_f_at 0.3146 1.724 7.567 19.18
D78134_at 0.3381 1.874 7.403 29.62
D83597_at 0.295 1.621 7.179 15.34
Z35227_at 0.3345 1.701 7.357 17.56

B:SWiMS Heat Map Plots

opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))

hm <- heatMaps(LymphomaModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Class",data=Lymphoma,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)

ROC Plots

AccCITable <- NULL
BErrorCITable <- NULL


rp <- plotModels.ROC(LymphomaModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LymphomaModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Support Vector Machine(SVM) Analysis


LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel <- LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction <- LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction -0.5

rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Barplots of Accuracy and Balanced Error


CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")



bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots

baggLymphomaBSWiMS <- baggedModel(LymphomaModelBin$cvObject$allBSWiMSFormulas.list,Lymphoma,type="LOGIT",Outcome="Class")
#> 
#> Num. Models: 972  To Test: 74  TopFreq: 50  Thrf: 1  Removed: 20 
#> .................................................................................................

cf <- length(LymphomaModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)

namestoShow <- names(baggLymphomaBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats

namestoShow <- namestoShow[baggLymphomaBSWiMS$frequencyTable[namestoShow]>=frac]

fnshow <- min(11,length(namestoShow))
barplot(baggLymphomaBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")


n <- network::network(cf*baggLymphomaBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggLymphomaBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")


ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

LASSO Feature Plots

baggLymphomaLASSO <- baggedModel(LymphomaModelBin$cvObject$LASSOVariables,Lymphoma,type="LOGIT",Outcome="Class")
#> 
#> Num. Models: 51  To Test: 112  TopFreq: 49  Thrf: 1  Removed: 39 
#> .....

toshow <- sum(baggLymphomaLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggLymphomaLASSO$frequencyTable))
barplot(baggLymphomaLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")


n <- network::network(baggLymphomaLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggLymphomaLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams

Here I will explore which features are similar between the LASSO and the BSWiMS models


pvalues <- p.adjust(1.0-pnorm(LymphomaModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(LymphomaModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggLymphomaBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggLymphomaBSWiMS$frequencyTable[tob]))
tob <- baggLymphomaLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggLymphomaLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)