Prostate Data Set Analysis

library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
a=as.numeric(Sys.time());
set.seed(a);
Loops <- 10
Repeats <- 5
filter <- 0.01

#ProstateData <- read.delim("~/Development/FresaPaper/prostate/prostate.txt")
#ProstateData <- read.delim("./prostate/prostate.txt")
ProstateData <- read.delim("./prostate.txt")
Prostate <- ProstateData[,-1]
rownames(Prostate) <- ProstateData[,1]

Modeling


filename = paste("ProstateModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(ProstateModelBin <- FRESA.Model(formula = Class ~ 1, Prostate, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(ProstateModelBin,file=filename)
#load(file=filename)

Summary Tables

pander::pander(summary(ProstateModelBin$BSWiMS.model)$coefficients,digits=4)
Table continues below
  Estimate lower OR upper u.Accuracy
X37639_at 0.002987 1.003 1.003 1.003 0.8725
X38406_f_at -0.0002181 0.9997 0.9998 0.9999 0.8627
X32598_at -0.001627 0.9976 0.9984 0.9992 0.8824
X37720_at 0.000783 1 1.001 1.001 0.8431
X40282_s_at -0.0005818 0.9992 0.9994 0.9996 0.8333
X41468_at 0.00125 1.001 1.001 1.002 0.8529
X37366_at 0.001225 1.001 1.001 1.002 0.8431
X41288_at -0.002716 0.9967 0.9973 0.9979 0.8137
X32243_g_at -0.0026 0.9969 0.9974 0.9979 0.8137
X40856_at -0.0008614 0.9988 0.9991 0.9995 0.8431
X36601_at -0.0005823 0.9992 0.9994 0.9997 0.8333
X37068_at 0.01576 1.011 1.016 1.021 0.8039
X33121_g_at 0.002976 1.002 1.003 1.004 0.7941
X39756_g_at 0.001324 1.001 1.001 1.002 0.7941
X1767_s_at -0.004538 0.994 0.9955 0.997 0.7941
X36491_at 0.002366 1.002 1.002 1.003 0.8039
X39315_at -0.009685 0.9873 0.9904 0.9935 0.7745
X575_s_at 0.007071 1.005 1.007 1.009 0.8333
X40436_g_at 0.001017 1 1.001 1.002 0.7745
X38028_at -0.01805 0.9789 0.9821 0.9853 0.8137
X34840_at 0.007991 1.005 1.008 1.011 0.8039
X31444_s_at -0.00038 0.9995 0.9996 0.9997 0.7843
X38044_at -0.005067 0.9928 0.9949 0.9971 0.8235
X556_s_at -0.00117 0.9984 0.9988 0.9993 0.7843
X769_s_at -0.0009848 0.9987 0.999 0.9994 0.7745
X39939_at -0.01543 0.9797 0.9847 0.9897 0.7941
X38634_at -0.005972 0.9925 0.994 0.9956 0.8333
X36589_at -0.003556 0.995 0.9964 0.9979 0.7647
X33198_at -0.008566 0.9889 0.9915 0.994 0.7941
X38087_s_at -0.0035 0.9947 0.9965 0.9983 0.7353
X914_g_at 0.006441 1.004 1.006 1.009 0.7941
X216_at -0.0003403 0.9995 0.9997 0.9998 0.7843
X41504_s_at -0.002136 0.9971 0.9979 0.9986 0.7353
X36864_at -0.005887 0.9908 0.9941 0.9975 0.7451
X1980_s_at 0.001317 1.001 1.001 1.002 0.7353
X2041_i_at -0.01199 0.9824 0.9881 0.9938 0.7647
X39545_at -0.01065 0.9849 0.9894 0.9939 0.7843
X36587_at 0.001083 1.001 1.001 1.002 0.7059
X41385_at -0.008559 0.9886 0.9915 0.9944 0.7451
X829_s_at -0.00138 0.9981 0.9986 0.9991 0.6863
X38322_at -0.0009929 0.9985 0.999 0.9995 0.7157
X32786_at 0.0008663 1.001 1.001 1.001 0.7255
X36928_at 0.002266 1.001 1.002 1.003 0.6961
X41106_at 0.01091 1.005 1.011 1.017 0.6961
X35177_at -0.01276 0.9808 0.9873 0.9939 0.6863
X34735_at -0.007546 0.9904 0.9925 0.9946 0.6275
X41388_at -0.009278 0.9868 0.9908 0.9948 0.6863
X35807_at -0.005375 0.9924 0.9946 0.9969 0.6765
X32923_r_at -0.0004003 0.9994 0.9996 0.9998 0.6863
X41562_at -0.01289 0.9806 0.9872 0.9938 0.6961
X34820_at -0.001109 0.9983 0.9989 0.9995 0.7157
X37707_i_at -0.004092 0.9939 0.9959 0.9979 0.7255
X1846_at 0.01389 1.008 1.014 1.02 0.6373
X1052_s_at 0.0005953 1 1.001 1.001 0.6569
X37736_at -0.003186 0.9951 0.9968 0.9986 0.6275
X31687_f_at -0.00165 0.9975 0.9984 0.9992 0.5686
X33758_f_at -0.007421 0.9896 0.9926 0.9956 0.5882
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
X37639_at 0.6569 0.9118 0.8727 0.6573 0.9127
X38406_f_at 0.9118 0.9608 0.8619 0.9108 0.9604
X32598_at 0.8922 0.9608 0.8808 0.8919 0.9604
X37720_at 0.8333 0.9412 0.8435 0.8315 0.9415
X40282_s_at 0.8431 0.9412 0.8315 0.8435 0.9415
X41468_at 0.7353 0.9314 0.8538 0.7346 0.9319
X37366_at 0.7843 0.9216 0.845 0.7842 0.9215
X41288_at 0.6863 0.9216 0.8131 0.6854 0.9215
X32243_g_at 0.7255 0.9216 0.8131 0.7254 0.9215
X40856_at 0.8431 0.9216 0.8423 0.8431 0.9219
X36601_at 0.8039 0.902 0.8323 0.8038 0.9015
X37068_at 0.8627 0.9412 0.8035 0.8615 0.9415
X33121_g_at 0.8529 0.9216 0.7958 0.8527 0.9219
X39756_g_at 0.8431 0.9412 0.7946 0.8419 0.9408
X1767_s_at 0.8627 0.9412 0.7923 0.8619 0.9408
X36491_at 0.8039 0.9118 0.805 0.8042 0.9123
X39315_at 0.8627 0.9608 0.7742 0.8631 0.9604
X575_s_at 0.8235 0.9216 0.8342 0.8231 0.9215
X40436_g_at 0.9412 0.9706 0.7746 0.9412 0.9704
X38028_at 0.6275 0.9118 0.8112 0.6242 0.9115
X34840_at 0.8529 0.9412 0.8038 0.8523 0.9412
X31444_s_at 0.8137 0.9314 0.7842 0.8138 0.9315
X38044_at 0.8922 0.951 0.8231 0.8923 0.9508
X556_s_at 0.8529 0.9412 0.7831 0.8523 0.9415
X769_s_at 0.8627 0.9608 0.7742 0.8623 0.9604
X39939_at 0.8529 0.9412 0.7931 0.8531 0.9412
X38634_at 0.8235 0.9706 0.8315 0.8235 0.9704
X36589_at 0.8333 0.902 0.7642 0.8319 0.9015
X33198_at 0.7941 0.8627 0.7938 0.7938 0.8627
X38087_s_at 0.8922 0.9216 0.7331 0.8919 0.9215
X914_g_at 0.8039 0.9118 0.7965 0.8038 0.9123
X216_at 0.902 0.9216 0.7827 0.9027 0.9215
X41504_s_at 0.8529 0.9314 0.7346 0.8538 0.9319
X36864_at 0.8922 0.9216 0.7442 0.8923 0.9219
X1980_s_at 0.8431 0.9314 0.7354 0.8431 0.9315
X2041_i_at 0.9314 0.9706 0.7623 0.9315 0.9704
X39545_at 0.8333 0.8627 0.7842 0.8327 0.8627
X36587_at 0.8922 0.951 0.7054 0.8915 0.9508
X41385_at 0.8529 0.9412 0.7438 0.8531 0.9412
X829_s_at 0.8137 0.9216 0.6854 0.8131 0.9215
X38322_at 0.8824 0.9412 0.7127 0.8819 0.9415
X32786_at 0.8137 0.9216 0.7254 0.8131 0.9215
X36928_at 0.8627 0.9216 0.6969 0.8635 0.9215
X41106_at 0.9314 0.9608 0.6985 0.9315 0.9604
X35177_at 0.8529 0.9216 0.685 0.8527 0.9215
X34735_at 0.8137 0.9118 0.6242 0.8112 0.9115
X41388_at 0.902 0.951 0.6858 0.9015 0.9508
X35807_at 0.8333 0.8627 0.6738 0.8327 0.8627
X32923_r_at 0.9118 0.9608 0.6854 0.9112 0.9604
X41562_at 0.902 0.951 0.695 0.9019 0.9508
X34820_at 0.8824 0.9118 0.7135 0.8827 0.9123
X37707_i_at 0.9118 0.9314 0.725 0.9123 0.9315
X1846_at 0.8725 0.9216 0.6377 0.8727 0.9215
X1052_s_at 0.8725 0.9118 0.6573 0.8727 0.9127
X37736_at 0.8824 0.902 0.6265 0.8819 0.9015
X31687_f_at 0.8824 0.9412 0.565 0.8819 0.9408
X33758_f_at 0.9118 0.9608 0.5885 0.9112 0.9604
  IDI NRI z.IDI z.NRI
X37639_at 0.6946 1.568 15.05 12.74
X38406_f_at 0.1284 1.092 3.765 6.671
X32598_at 0.114 1.057 3.951 6.291
X37720_at 0.198 0.5692 5.13 3.461
X40282_s_at 0.197 1.362 5.162 10.08
X41468_at 0.4396 1.571 9.186 12.89
X37366_at 0.2445 1.457 6.186 11.06
X41288_at 0.4474 1.409 9.146 10.05
X32243_g_at 0.4709 1.568 10.02 12.74
X40856_at 0.1528 1.165 4.355 7.745
X36601_at 0.1556 0.9523 4.718 5.665
X37068_at 0.2359 1.531 6.13 12.04
X33121_g_at 0.2033 1.289 5.063 8.606
X39756_g_at 0.3061 1.532 6.945 12.15
X1767_s_at 0.2302 1.489 5.944 11.26
X36491_at 0.2295 1.106 6.045 6.944
X39315_at 0.2407 1.571 6.065 12.89
X575_s_at 0.2899 1.494 6.89 11.49
X40436_g_at 0.1032 1.058 3.635 6.298
X38028_at 0.5262 1.603 10.87 13.75
X34840_at 0.2357 1.295 6.075 8.598
X31444_s_at 0.2745 1.369 6.685 9.528
X38044_at 0.1491 1.488 4.509 11.27
X556_s_at 0.1828 1.285 5.111 8.869
X769_s_at 0.221 1.568 5.504 12.74
X39939_at 0.2117 1.489 5.954 11.26
X38634_at 0.321 1.363 7.398 9.896
X36589_at 0.2025 0.8538 4.873 4.913
X33198_at 0.291 1.374 6.488 9.562
X38087_s_at 0.1202 1.209 3.748 7.809
X914_g_at 0.1635 0.9138 5.045 5.539
X216_at 0.163 1.172 4.738 7.355
X41504_s_at 0.2142 1.446 5.579 10.7
X36864_at 0.101 0.8154 3.42 4.62
X1980_s_at 0.2205 1.094 6.054 6.64
X2041_i_at 0.1283 1.028 4.106 6.249
X39545_at 0.1745 1.451 4.588 10.64
X36587_at 0.1834 1.334 4.878 9.042
X41385_at 0.1768 1.688 5.774 15.95
X829_s_at 0.215 1.058 5.579 6.298
X38322_at 0.1218 0.9631 4.025 6.376
X32786_at 0.2589 1.137 6.068 6.976
X36928_at 0.177 1.34 4.894 9.402
X41106_at 0.09401 1.111 3.58 7.369
X35177_at 0.1108 1.212 3.781 7.729
X34735_at 0.3082 1.529 7.048 11.98
X41388_at 0.139 1.209 4.525 7.809
X35807_at 0.1881 1.491 4.651 11.3
X32923_r_at 0.07493 0.82 3.198 4.557
X41562_at 0.1012 1.058 3.776 6.298
X34820_at 0.1156 1.011 3.807 6.107
X37707_i_at 0.1032 1.092 4.05 6.671
X1846_at 0.1607 1.415 4.621 10.23
X1052_s_at 0.09526 1.248 3.653 8.261
X37736_at 0.1054 0.9185 3.53 5.271
X31687_f_at 0.104 1.02 3.66 5.988
X33758_f_at 0.1576 1.571 4.811 12.89

B:SWiMS Heat Map Plots

opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))

hm <- heatMaps(ProstateModelBin$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Class",data=Prostate,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)

ROC Plots

AccCITable <- NULL
BErrorCITable <- NULL


rp <- plotModels.ROC(ProstateModelBin$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(ProstateModelBin$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(ProstateModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(ProstateModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Support Vector Machine(SVM) Analysis


ProstateModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel <- ProstateModelBin$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
ProstateModelBin$cvObject$Models.testPrediction$usrFitFunction <- ProstateModelBin$cvObject$Models.testPrediction$usrFitFunction -0.5

rp <- plotModels.ROC(ProstateModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

rp <- plotModels.ROC(ProstateModelBin$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))

Barplots of Accuracy and Balanced Error


CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")



bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots

baggProstateBSWiMS <- baggedModel(ProstateModelBin$cvObject$allBSWiMSFormulas.list,Prostate,type="LOGIT",Outcome="Class")
#> 
#> Num. Models: 980  To Test: 204  TopFreq: 50  Thrf: 1  Removed: 63 
#> ..................................................................................................

cf <- length(ProstateModelBin$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)

namestoShow <- names(baggProstateBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats

namestoShow <- namestoShow[baggProstateBSWiMS$frequencyTable[namestoShow]>=frac]

fnshow <- min(11,length(namestoShow))
barplot(baggProstateBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")


n <- network::network(cf*baggProstateBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggProstateBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")


ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

LASSO Feature Plots

baggProstateLASSO <- baggedModel(ProstateModelBin$cvObject$LASSOVariables,Prostate,type="LOGIT",Outcome="Class")
#> 
#> Num. Models: 51  To Test: 109  TopFreq: 46  Thrf: 1  Removed: 40 
#> .....

toshow <- sum(baggProstateLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggProstateLASSO$frequencyTable))
barplot(baggProstateLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")


n <- network::network(baggProstateLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggProstateLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams

Here I will explore which features are similar between the LASSO and the BSWiMS models


pvalues <- p.adjust(1.0-pnorm(ProstateModelBin$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(ProstateModelBin$univariateAnalysis$Name[pvalues<0.05])
tob <- baggProstateBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggProstateBSWiMS$frequencyTable[tob]))
tob <- baggProstateLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggProstateLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)