Lung Data Set Analysis

library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
a=as.numeric(Sys.time());
set.seed(a);

Loops <- 10
Repeats <- 5
filter <- 0.01

#LungData <- t(read.delim("./Survival/Lung.txt"))
LungData <- t(read.delim("./Lung.txt"))
Lung <- data.matrix(as.data.frame(LungData[-1,],stringsAsFactors = FALSE))
Lung <- as.data.frame(Lung)
colnames(Lung) <- paste("V",LungData[1,],sep="")
naurvLung <- Lung[,-c("VfreeSurvival" %in% colnames(Lung))]

Modeling


#system.time(LungModelBin <- FRESA.Model(formula = VStatus ~ 1,naurvLung))


#filename = paste("./Survival/LungModel_Surv",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
filename = paste("LungModel_Surv",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(LungModelBin <- FRESA.Model(formula = Surv(VfreeSurvival,VStatus) ~ 1, Lung, CVfolds=Loops, repeats=Repeats,filter.p.value=filter))
save(LungModelBin,file=filename)

Summary Tables

pander::pander(summary(LungModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
  Estimate lower OR upper u.Accuracy
V202908_at -0.0001515 0.9998 0.9998 0.9999 0.7586
V202497_x_at 0.0003256 1 1 1 0.7586
V203615_x_at -3.843e-05 0.9999 1 1 0.7586
V215299_x_at -3.595e-06 1 1 1 0.7759
V212911_at -0.0001358 0.9998 0.9999 0.9999 0.7931
V204624_at -0.0001573 0.9998 0.9998 0.9999 0.7931
V220968_s_at -5.134e-05 0.9999 0.9999 1 0.7241
V204567_s_at -0.0004629 0.9994 0.9995 0.9997 0.7414
V218262_at -0.0003356 0.9996 0.9997 0.9998 0.7414
V221583_s_at -0.0006655 0.9991 0.9993 0.9996 0.7414
V210853_at -0.01149 0.9856 0.9886 0.9916 0.7586
V220625_s_at 9.847e-05 1 1 1 0.7586
V207122_x_at -5.69e-05 0.9999 0.9999 1 0.7241
V218593_at 0.0003996 1 1 1.001 0.6897
V206102_at 0.0001066 1 1 1 0.7759
V210939_s_at -0.001605 0.9979 0.9984 0.9989 0.7586
V220898_at -0.0002115 0.9997 0.9998 0.9999 0.6897
V213923_at 8.788e-05 1 1 1 0.7586
V217553_at 0.000302 1 1 1 0.7414
V214645_at -0.0006136 0.9992 0.9994 0.9996 0.7586
V213085_s_at -0.002186 0.9972 0.9978 0.9984 0.7241
V202498_s_at 0.0008266 1.001 1.001 1.001 0.7241
V207638_at -0.002726 0.9965 0.9973 0.9981 0.6552
V219492_at 0.0002871 1 1 1 0.6724
V214894_x_at 7.214e-05 1 1 1 0.7414
V209201_x_at 0.0001711 1 1 1 0.7586
V209193_at 0.0001037 1 1 1 0.7241
V211964_at 2.082e-06 1 1 1 0.7069
V216025_x_at -0.001532 0.998 0.9985 0.9989 0.6552
V211919_s_at 2.153e-05 1 1 1 0.7241
V202388_at 6.304e-06 1 1 1 0.6724
V212145_at -0.0002216 0.9997 0.9998 0.9999 0.6552
V220301_at 0.0001705 1 1 1 0.7241
V206686_at 0.001442 1.001 1.001 1.002 0.7586
V205207_at 5.236e-05 1 1 1 0.6552
V218932_at 0.0007286 1 1.001 1.001 0.7069
V218246_at -0.0001085 0.9998 0.9999 0.9999 0.7069
V209373_at 5.366e-05 1 1 1 0.7069
V206576_s_at 0.0003316 1 1 1 0.6379
V207216_at 0.001571 1.001 1.002 1.002 0.6379
V201414_s_at -0.0001794 0.9998 0.9998 0.9999 0.6552
V220670_at -0.002952 0.996 0.9971 0.9981 0.6552
V212577_at 9.119e-05 1 1 1 0.7241
V204836_at 5.005e-05 1 1 1 0.7069
V203821_at 0.001158 1.001 1.001 1.001 0.7069
V213456_at -0.0004027 0.9995 0.9996 0.9997 0.7241
V218723_s_at 2.299e-05 1 1 1 0.6379
V216793_x_at -0.0002158 0.9997 0.9998 0.9999 0.6552
V205199_at 1.022e-05 1 1 1 0.6724
V205714_s_at -0.002964 0.9961 0.997 0.998 0.6552
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
V202908_at 0.7759 0.931 0.7586 0.7759 0.931
V202497_x_at 0.9483 0.9828 0.7586 0.9483 0.9828
V203615_x_at 0.7759 0.8448 0.7586 0.7759 0.8448
V215299_x_at 0.5 0.7759 0.7759 0.5 0.7759
V212911_at 0.6552 0.8276 0.7931 0.6552 0.8276
V204624_at 0.7414 0.8621 0.7931 0.7414 0.8621
V220968_s_at 0.6724 0.8103 0.7241 0.6724 0.8103
V204567_s_at 0.8103 0.8966 0.7414 0.8103 0.8966
V218262_at 0.8621 0.9483 0.7414 0.8621 0.9483
V221583_s_at 0.8793 0.8793 0.7414 0.8793 0.8793
V210853_at 0.8621 0.931 0.7586 0.8621 0.931
V220625_s_at 0.931 0.931 0.7586 0.931 0.931
V207122_x_at 0.7241 0.7759 0.7241 0.7241 0.7759
V218593_at 0.8793 0.8793 0.6897 0.8793 0.8793
V206102_at 0.8621 0.9483 0.7759 0.8621 0.9483
V210939_s_at 0.8966 0.9828 0.7586 0.8966 0.9828
V220898_at 0.7586 0.7759 0.6897 0.7586 0.7759
V213923_at 0.8448 0.8966 0.7586 0.8448 0.8966
V217553_at 0.8103 0.8966 0.7414 0.8103 0.8966
V214645_at 0.8793 0.8793 0.7586 0.8793 0.8793
V213085_s_at 0.9483 0.931 0.7241 0.9483 0.931
V202498_s_at 0.8966 0.9483 0.7241 0.8966 0.9483
V207638_at 0.8966 0.9483 0.6552 0.8966 0.9483
V219492_at 0.8621 0.9828 0.6724 0.8621 0.9828
V214894_x_at 0.8966 0.9828 0.7414 0.8966 0.9828
V209201_x_at 0.931 0.931 0.7586 0.931 0.931
V209193_at 0.8621 0.8966 0.7241 0.8621 0.8966
V211964_at 0.7931 0.7759 0.7069 0.7931 0.7759
V216025_x_at 0.931 0.931 0.6552 0.931 0.931
V211919_s_at 0.8448 0.8966 0.7241 0.8448 0.8966
V202388_at 0.8966 0.931 0.6724 0.8966 0.931
V212145_at 0.8966 0.9828 0.6552 0.8966 0.9828
V220301_at 0.7931 0.8448 0.7241 0.7931 0.8448
V206686_at 0.9138 0.931 0.7586 0.9138 0.931
V205207_at 0.7759 0.8448 0.6552 0.7759 0.8448
V218932_at 0.8276 0.8793 0.7069 0.8276 0.8793
V218246_at 0.8276 0.8621 0.7069 0.8276 0.8621
V209373_at 0.8448 0.8966 0.7069 0.8448 0.8966
V206576_s_at 0.7759 0.931 0.6379 0.7759 0.931
V207216_at 0.8621 0.8793 0.6379 0.8621 0.8793
V201414_s_at 0.8793 0.8966 0.6552 0.8793 0.8966
V220670_at 0.9138 0.8793 0.6552 0.9138 0.8793
V212577_at 0.8621 0.8966 0.7241 0.8621 0.8966
V204836_at 0.8966 0.931 0.7069 0.8966 0.931
V203821_at 0.931 0.931 0.7069 0.931 0.931
V213456_at 0.9655 0.931 0.7241 0.9655 0.931
V218723_s_at 0.8103 0.8621 0.6379 0.8103 0.8621
V216793_x_at 0.7931 0.8276 0.6552 0.7931 0.8276
V205199_at 0.7241 0.8103 0.6724 0.7241 0.8103
V205714_s_at 0.8966 0.8793 0.6552 0.8966 0.8793
  IDI NRI z.IDI z.NRI
V202908_at 0.2309 1.034 6.14 4.862
V202497_x_at 0.2142 1.379 6.798 7.261
V203615_x_at 0.1889 0.9655 4.783 4.367
V215299_x_at 0.2057 1.103 5.117 5.263
V212911_at 0.2501 1.172 5.382 5.592
V204624_at 0.1793 1.172 4.573 5.861
V220968_s_at 0.1928 1.103 5.657 5.077
V204567_s_at 0.2168 1.241 6.126 6.337
V218262_at 0.2022 1.172 6.458 5.7
V221583_s_at 0.04761 1.448 5.722 8.383
V210853_at 0.2699 1.655 7.496 11.25
V220625_s_at 0.1454 1.655 7.884 11.8
V207122_x_at 0.1857 1.103 5.215 5.149
V218593_at 0.09196 0.9655 5.245 4.201
V206102_at 0.1824 1.793 10.81 15.46
V210939_s_at 0.1887 1.517 6.673 8.88
V220898_at 0.1457 0.8276 4.178 3.808
V213923_at 0.2398 1.31 6.899 6.605
V217553_at 0.1778 1.655 7.168 11.25
V214645_at 0.04855 1.655 7.101 11.42
V213085_s_at 0.2438 1.517 6.769 8.981
V202498_s_at 0.1283 1.31 7.923 6.605
V207638_at 0.2065 0.9655 6.549 4.201
V219492_at 0.1658 1.655 7.947 11.25
V214894_x_at 0.07137 1.241 5.747 6.082
V209201_x_at 0.1722 1.655 7.849 11.42
V209193_at 0.1522 1.241 4.894 6.337
V211964_at 0.06062 1.103 4.941 5.042
V216025_x_at 0.05811 1.586 6.922 10.18
V211919_s_at 0.1038 1.379 5.613 7.468
V202388_at 0.04155 1.034 4.772 4.743
V212145_at 0.09071 1.172 6.062 5.531
V220301_at 0.06754 1.172 5.047 5.531
V206686_at 0.08675 1.724 5.439 12.95
V205207_at 0.08712 0.6207 4.286 2.547
V218932_at 0.1732 1.172 4.734 5.511
V218246_at 0.1144 1.034 4.854 4.743
V209373_at 0.1129 1.31 6.016 6.632
V206576_s_at 0.2141 1.241 6.354 6.035
V207216_at 0.1551 1.517 5.877 8.88
V201414_s_at 0.1981 1.379 6.748 7.328
V220670_at 0.1086 1.31 5.61 6.867
V212577_at 0.09086 1.31 5.101 6.867
V204836_at 0.06981 1.172 4.286 5.861
V203821_at 0.1076 1.448 6.768 8.161
V213456_at 0.1524 1.517 6.871 8.88
V218723_s_at 0.1469 1.103 5.793 5.263
V216793_x_at 0.1145 0.8966 5.002 3.926
V205199_at 0.09939 1.241 5.408 6.18
V205714_s_at 0.106 1.241 6.134 6.565

B:SWiMS Heat Map Plots

opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))

hm <- heatMaps(LungModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="VStatus",data=Lung,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)

ROC Plots

AccCITable <- NULL
BErrorCITable <- NULL


rp <- plotModels.ROC(LungModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LungModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LungModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(LungModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Barplots of Accuracy and Balanced Error


CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble")



bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots

baggLungBSWiMS <- baggedModel(LungModelBin$cvObject$allBSWiMSFormulas.list,Lung,type="LOGIT",Outcome="VStatus")
#> 
#> Num. Models: 877  To Test: 307  TopFreq: 49  Thrf: 1  Removed: 61 
#> .......................................................................................

cf <- length(LungModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)

namestoShow <- names(baggLungBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats

namestoShow <- namestoShow[baggLungBSWiMS$frequencyTable[namestoShow]>=frac]

fnshow <- min(11,length(namestoShow))
barplot(baggLungBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")


n <- network::network(cf*baggLungBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggLungBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")


ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

LASSO Feature Plots

baggLungLASSO <- baggedModel(LungModelBin$cvObject$LASSOVariables,Lung,type="LOGIT",Outcome="VStatus")
#> 
#> Num. Models: 51  To Test: 143  TopFreq: 42  Thrf: 1  Removed: 45 
#> .....

toshow <- sum(baggLungLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggLungLASSO$frequencyTable))
barplot(baggLungLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")


n <- network::network(baggLungLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggLungLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams

Here I will explore which features are similar between the LASSO and the BSWiMS models


pvalues <- p.adjust(1.0-pnorm(LungModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(LungModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggLungBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggLungBSWiMS$frequencyTable[tob]))
tob <- baggLungLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggLungLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)