Lymphoma Data Set Analysis
library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
a=as.numeric(Sys.time());
set.seed(a);
Loops <- 10
Repeats <- 5
filter <- 0.05
#DLBCL_Lymohoma_A <- read.delim("~/Development/FresaPaper/DLBCL/DLBCL_Lymohoma_A.txt")
#DLBCL_Lymohoma_A <- read.delim("./DLBCL/DLBCL_Lymohoma_A.txt")
DLBCL_Lymohoma_A <- read.delim("./DLBCL_Lymohoma_A.txt")
Lymphoma <- DLBCL_Lymohoma_A[,-c(1,3,4)]
rownames(Lymphoma) <- DLBCL_Lymohoma_A[,1]
Lymphoma2 <- Lymphoma
#Lymphoma$Class <- Lymphoma$Class-0.5
Modeling
#bsm <- BSWiMS.model(formula=Class ~ 1,data=Lymphoma,type="LOGIT",testType="zIDI")
#bsm$formula.list
filename = paste("LympaModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(LymphomaModelBin <- FRESA.Model(formula = Class ~ 1, Lymphoma, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(LymphomaModelBin,file=filename)
#load(file=filename)
Summary Tables
pander::pander(summary(LymphomaModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
| D55716_at |
0.0002862 |
1 |
1 |
1 |
0.8908 |
| X02152_at |
2.519e-05 |
1 |
1 |
1 |
0.9109 |
| HG4074.HT4344_at |
0.0002532 |
1 |
1 |
1 |
0.8764 |
| M63835_at |
0.0004466 |
1 |
1 |
1.001 |
0.8764 |
| M57710_at |
4.778e-05 |
1 |
1 |
1 |
0.8247 |
| HG1980.HT2023_at |
0.0003416 |
1 |
1 |
1 |
0.8477 |
| J03909_at |
0.0001337 |
1 |
1 |
1 |
0.842 |
| U14518_at |
1.128 |
2.494 |
3.09 |
3.828 |
0.8477 |
| X17620_at |
7.504e-05 |
1 |
1 |
1 |
0.8707 |
| X56494_at |
2.894e-05 |
1 |
1 |
1 |
0.8247 |
| X01060_at |
0.0001585 |
1 |
1 |
1 |
0.8448 |
| D79997_at |
0.0004459 |
1 |
1 |
1.001 |
0.8678 |
| D82348_at |
7.593e-05 |
1 |
1 |
1 |
0.819 |
| L17131_rna1_at |
0.0001739 |
1 |
1 |
1 |
0.8046 |
| U28386_at |
0.001132 |
1.001 |
1.001 |
1.001 |
0.8477 |
| M63138_at |
4.036e-05 |
1 |
1 |
1 |
0.8218 |
| M14328_s_at |
1.737e-05 |
1 |
1 |
1 |
0.8362 |
| L33842_rna1_at |
9.869e-05 |
1 |
1 |
1 |
0.8333 |
| M13792_at |
9.743e-05 |
1 |
1 |
1 |
0.8707 |
| L25876_at |
0.001837 |
1.002 |
1.002 |
1.002 |
0.8132 |
| Z21966_at |
-0.7035 |
0.4132 |
0.4949 |
0.5926 |
0.8534 |
| D87119_at |
-0.001319 |
0.9983 |
0.9987 |
0.999 |
0.7931 |
| M94880_f_at |
-9.552e-05 |
0.9999 |
0.9999 |
0.9999 |
0.7241 |
| D78134_at |
-0.0003692 |
0.9995 |
0.9996 |
0.9997 |
0.8075 |
| D83597_at |
-0.0006531 |
0.9992 |
0.9993 |
0.9995 |
0.6983 |
| Z35227_at |
-0.0005659 |
0.9993 |
0.9994 |
0.9996 |
0.727 |
Table continues below
| D55716_at |
0.5 |
0.8908 |
0.8908 |
0.5 |
0.8908 |
| X02152_at |
0.5 |
0.9109 |
0.9109 |
0.5 |
0.9109 |
| HG4074.HT4344_at |
0.5 |
0.8764 |
0.8764 |
0.5 |
0.8764 |
| M63835_at |
0.5 |
0.8764 |
0.8764 |
0.5 |
0.8764 |
| M57710_at |
0.5 |
0.8247 |
0.8247 |
0.5 |
0.8247 |
| HG1980.HT2023_at |
0.8075 |
0.9626 |
0.8477 |
0.8075 |
0.9626 |
| J03909_at |
0.7241 |
0.9684 |
0.842 |
0.7241 |
0.9684 |
| U14518_at |
0.8534 |
0.9741 |
0.8477 |
0.8534 |
0.9741 |
| X17620_at |
0.5 |
0.8707 |
0.8707 |
0.5 |
0.8707 |
| X56494_at |
0.5 |
0.8247 |
0.8247 |
0.5 |
0.8247 |
| X01060_at |
0.5 |
0.8448 |
0.8448 |
0.5 |
0.8448 |
| D79997_at |
0.5 |
0.8678 |
0.8678 |
0.5 |
0.8678 |
| D82348_at |
0.5 |
0.819 |
0.819 |
0.5 |
0.819 |
| L17131_rna1_at |
0.6983 |
0.9483 |
0.8046 |
0.6983 |
0.9483 |
| U28386_at |
0.7931 |
0.9828 |
0.8477 |
0.7931 |
0.9828 |
| M63138_at |
0.5 |
0.8218 |
0.8218 |
0.5 |
0.8218 |
| M14328_s_at |
0.5 |
0.8362 |
0.8362 |
0.5 |
0.8362 |
| L33842_rna1_at |
0.5 |
0.8333 |
0.8333 |
0.5 |
0.8333 |
| M13792_at |
0.5 |
0.8707 |
0.8707 |
0.5 |
0.8707 |
| L25876_at |
0.727 |
0.9511 |
0.8132 |
0.727 |
0.9511 |
| Z21966_at |
0.8477 |
0.9741 |
0.8534 |
0.8477 |
0.9741 |
| D87119_at |
0.8477 |
0.9828 |
0.7931 |
0.8477 |
0.9828 |
| M94880_f_at |
0.842 |
0.9684 |
0.7241 |
0.842 |
0.9684 |
| D78134_at |
0.8477 |
0.9626 |
0.8075 |
0.8477 |
0.9626 |
| D83597_at |
0.8046 |
0.9483 |
0.6983 |
0.8046 |
0.9483 |
| Z35227_at |
0.8132 |
0.9511 |
0.727 |
0.8132 |
0.9511 |
| D55716_at |
0.7243 |
1.506 |
17.17 |
12.69 |
| X02152_at |
0.665 |
1.644 |
15.36 |
15.99 |
| HG4074.HT4344_at |
0.6389 |
1.414 |
14.17 |
11.12 |
| M63835_at |
0.6184 |
1.46 |
13.78 |
11.89 |
| M57710_at |
0.5517 |
1.46 |
12.02 |
12.54 |
| HG1980.HT2023_at |
0.6427 |
1.862 |
14.88 |
27.98 |
| J03909_at |
0.5252 |
1.805 |
11.94 |
22.98 |
| U14518_at |
0.4717 |
1.816 |
10.32 |
2.996e+307 |
| X17620_at |
0.6067 |
1.414 |
13.43 |
11.07 |
| X56494_at |
0.5902 |
1.483 |
12.76 |
12.71 |
| X01060_at |
0.573 |
1.379 |
12.33 |
10.78 |
| D79997_at |
0.5511 |
1.437 |
11.82 |
12.17 |
| D82348_at |
0.5542 |
1.287 |
11.9 |
9.333 |
| L17131_rna1_at |
0.5609 |
1.494 |
11.63 |
12.18 |
| U28386_at |
0.5042 |
1.885 |
11.53 |
31.07 |
| M63138_at |
0.5372 |
1.368 |
11.51 |
10.52 |
| M14328_s_at |
0.5565 |
1.529 |
12.03 |
13.74 |
| L33842_rna1_at |
0.5164 |
1.322 |
11 |
10.04 |
| M13792_at |
0.5391 |
1.356 |
11.66 |
10.62 |
| L25876_at |
0.5904 |
1.828 |
13.02 |
25.82 |
| Z21966_at |
0.339 |
1.747 |
7.646 |
2.996e+307 |
| D87119_at |
0.303 |
1.586 |
7.11 |
14.37 |
| M94880_f_at |
0.3146 |
1.724 |
7.567 |
19.18 |
| D78134_at |
0.3381 |
1.874 |
7.403 |
29.62 |
| D83597_at |
0.295 |
1.621 |
7.179 |
15.34 |
| Z35227_at |
0.3345 |
1.701 |
7.357 |
17.56 |
B:SWiMS Heat Map Plots
opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))
hm <- heatMaps(LymphomaModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Class",data=Lymphoma,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)
ROC Plots
AccCITable <- NULL
BErrorCITable <- NULL
rp <- plotModels.ROC(LymphomaModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LymphomaModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Support Vector Machine(SVM) Analysis
LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel <- LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction <- LymphomaModelBin$cvObject$Models.testPrediction$usrFitFunction -0.5
rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(LymphomaModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Barplots of Accuracy and Balanced Error
CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")
bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots
baggLymphomaBSWiMS <- baggedModel(LymphomaModelBin$cvObject$allBSWiMSFormulas.list,Lymphoma,type="LOGIT",Outcome="Class")
#>
#> Num. Models: 972 To Test: 74 TopFreq: 50 Thrf: 1 Removed: 20
#> .................................................................................................
cf <- length(LymphomaModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)
namestoShow <- names(baggLymphomaBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats
namestoShow <- namestoShow[baggLymphomaBSWiMS$frequencyTable[namestoShow]>=frac]
fnshow <- min(11,length(namestoShow))
barplot(baggLymphomaBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")

n <- network::network(cf*baggLymphomaBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggLymphomaBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)
LASSO Feature Plots
baggLymphomaLASSO <- baggedModel(LymphomaModelBin$cvObject$LASSOVariables,Lymphoma,type="LOGIT",Outcome="Class")
#>
#> Num. Models: 51 To Test: 112 TopFreq: 49 Thrf: 1 Removed: 39
#> .....
toshow <- sum(baggLymphomaLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggLymphomaLASSO$frequencyTable))
barplot(baggLymphomaLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")

n <- network::network(baggLymphomaLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggLymphomaLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams
Here I will explore which features are similar between the LASSO and the BSWiMS models
pvalues <- p.adjust(1.0-pnorm(LymphomaModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(LymphomaModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggLymphomaBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggLymphomaBSWiMS$frequencyTable[tob]))
tob <- baggLymphomaLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggLymphomaLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)
