i— title: “Colon Cancer Bin” author: “José Tamez-Peña” date: “Feb, 2017” output: word_document: fig_height: 6 fig_width: 8 toc: yes —
Colon Data Set Analysis
The Libraries
library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
library("gplots")
library("randomForest")
library(rpart)
The Parameters and Data Sets
Loops <- 10
Repeats <- 5
filter <- 0.05
#ColonData <- read.delim("./Colon/cancerColonb.txt")
ColonData <- read.delim("./cancerColonb.txt")
Colon <- ColonData[,-1]
rownames(Colon) <- ColonData[,1]
Modeling with FRESA.CAD
filename = paste("ColonModelBin",Loops,Repeats,sprintf("%5.4f",filter),"res.RDATA",sep="_")
system.time(ColonModelBIN <- FRESA.Model(formula = Class ~ 1, Colon, CVfolds=Loops, repeats=Repeats,filter.p.value=filter,usrFitFun=svm))
save(ColonModelBIN,file=filename)
#load(file=paste("./Colon/",filename,sep=""))
Summary Tables
pander::pander(summary(ColonModelBIN$BSWiMS.model)$coefficients,digits=4)
Table continues below
| H08393 |
0.0009409 |
1 |
1.001 |
1.001 |
0.8 |
0.8083 |
| R36977 |
0.0008331 |
1.001 |
1.001 |
1.001 |
0.8083 |
0.8333 |
| M22382 |
0.0001797 |
1 |
1 |
1 |
0.8083 |
0.7583 |
| R87126 |
-0.0001431 |
0.9998 |
0.9999 |
0.9999 |
0.8083 |
0.8 |
| M63391 |
-4.661e-05 |
0.9999 |
1 |
1 |
0.8333 |
0.8083 |
| X63629 |
0.001393 |
1.001 |
1.001 |
1.002 |
0.7958 |
0.7792 |
| H40095 |
8.335e-05 |
1 |
1 |
1 |
0.7625 |
0.6833 |
| M76378.2 |
-0.0001263 |
0.9998 |
0.9999 |
0.9999 |
0.7583 |
0.8083 |
| T47377 |
8.949e-05 |
1 |
1 |
1 |
0.725 |
0.775 |
| J02854 |
-0.0002822 |
0.9997 |
0.9997 |
0.9998 |
0.7917 |
0.5958 |
| X12671 |
0.0001964 |
1 |
1 |
1 |
0.75 |
0.6833 |
| M76378 |
-0.0001534 |
0.9998 |
0.9998 |
0.9999 |
0.7583 |
0.725 |
| J05032 |
0.00108 |
1.001 |
1.001 |
1.001 |
0.6625 |
0.6667 |
| U09564 |
0.00496 |
1.004 |
1.005 |
1.006 |
0.7167 |
0.6583 |
| U30825 |
0.001089 |
1.001 |
1.001 |
1.001 |
0.6958 |
0.7333 |
| M76378.1 |
-5.291e-05 |
0.9999 |
0.9999 |
1 |
0.775 |
0.725 |
| R84411 |
0.0004562 |
1 |
1 |
1.001 |
0.6875 |
0.6792 |
| Z50753 |
-0.0004939 |
0.9993 |
0.9995 |
0.9997 |
0.7792 |
0.7958 |
| T71025 |
-0.000118 |
0.9998 |
0.9999 |
0.9999 |
0.7333 |
0.6958 |
| R10066 |
0.0002187 |
1 |
1 |
1 |
0.7458 |
0.6958 |
| T92451 |
-7.543e-05 |
0.9999 |
0.9999 |
0.9999 |
0.7417 |
0.625 |
| T62947 |
0.001664 |
1.001 |
1.002 |
1.002 |
0.725 |
0.7583 |
| H43887 |
-3.067e-05 |
1 |
1 |
1 |
0.6833 |
0.7625 |
| M36634 |
-0.001084 |
0.9986 |
0.9989 |
0.9992 |
0.6667 |
0.7 |
| H55916 |
0.001342 |
1.001 |
1.001 |
1.002 |
0.625 |
0.7417 |
| H06524 |
-0.001936 |
0.9977 |
0.9981 |
0.9984 |
0.7292 |
0.6208 |
| H11084 |
0.0007596 |
1.001 |
1.001 |
1.001 |
0.7 |
0.6667 |
| X12369 |
-0.0001579 |
0.9998 |
0.9998 |
0.9999 |
0.6667 |
0.6625 |
| H64489 |
-4.463e-05 |
0.9999 |
1 |
1 |
0.6958 |
0.7458 |
| T90350 |
0.001369 |
1.001 |
1.001 |
1.002 |
0.5958 |
0.7917 |
| U14631 |
-0.0002922 |
0.9996 |
0.9997 |
0.9998 |
0.6792 |
0.6875 |
| L12723 |
0.005501 |
1.004 |
1.006 |
1.007 |
0.6708 |
0.6292 |
| M59040 |
0.003246 |
1.003 |
1.003 |
1.004 |
0.6292 |
0.6417 |
| L07648 |
-0.0125 |
0.9862 |
0.9876 |
0.9889 |
0.6417 |
0.5667 |
| H20709 |
-0.0002639 |
0.9997 |
0.9997 |
0.9998 |
0.6417 |
0.6292 |
| D14812 |
0.001434 |
1.001 |
1.001 |
1.002 |
0.6208 |
0.7292 |
| D25217 |
-0.0002523 |
0.9996 |
0.9997 |
0.9998 |
0.6833 |
0.75 |
| H09719 |
0.03764 |
1.033 |
1.038 |
1.044 |
0.5667 |
0.6417 |
| R88740 |
-0.001422 |
0.9982 |
0.9986 |
0.9989 |
0.6292 |
0.6708 |
| T47383 |
-0.004492 |
0.9944 |
0.9955 |
0.9967 |
0.6583 |
0.7167 |
Table continues below
| H08393 |
0.9083 |
0.8 |
0.8083 |
0.9083 |
0.1657 |
0.65 |
| R36977 |
0.9333 |
0.8083 |
0.8333 |
0.9333 |
0.2888 |
1.117 |
| M22382 |
0.8958 |
0.8083 |
0.7583 |
0.8958 |
0.2701 |
1.35 |
| R87126 |
0.9083 |
0.8083 |
0.8 |
0.9083 |
0.2334 |
1.083 |
| M63391 |
0.9333 |
0.8333 |
0.8083 |
0.9333 |
0.2553 |
1.333 |
| X63629 |
0.9125 |
0.7958 |
0.7792 |
0.9125 |
0.3178 |
1.367 |
| H40095 |
0.8667 |
0.7625 |
0.6833 |
0.8667 |
0.1962 |
0.65 |
| M76378.2 |
0.8958 |
0.7583 |
0.8083 |
0.8958 |
0.2107 |
1.117 |
| T47377 |
0.8167 |
0.725 |
0.775 |
0.8167 |
0.2125 |
0.8333 |
| J02854 |
0.8583 |
0.7917 |
0.5958 |
0.8583 |
0.4804 |
1.383 |
| X12671 |
0.8167 |
0.75 |
0.6833 |
0.8167 |
0.3981 |
1.233 |
| M76378 |
0.8583 |
0.7583 |
0.725 |
0.8583 |
0.3779 |
1.217 |
| J05032 |
0.8875 |
0.6625 |
0.6667 |
0.8875 |
0.3514 |
1.167 |
| U09564 |
0.9083 |
0.7167 |
0.6583 |
0.9083 |
0.6029 |
1.65 |
| U30825 |
0.8875 |
0.6958 |
0.7333 |
0.8875 |
0.3623 |
1.183 |
| M76378.1 |
0.8167 |
0.775 |
0.725 |
0.8167 |
0.2283 |
1.133 |
| R84411 |
0.8833 |
0.6875 |
0.6792 |
0.8833 |
0.4443 |
1.367 |
| Z50753 |
0.9125 |
0.7792 |
0.7958 |
0.9125 |
0.3286 |
1.35 |
| T71025 |
0.8875 |
0.7333 |
0.6958 |
0.8875 |
0.3236 |
1.283 |
| R10066 |
0.85 |
0.7458 |
0.6958 |
0.85 |
0.2976 |
1.117 |
| T92451 |
0.8625 |
0.7417 |
0.625 |
0.8625 |
0.3462 |
1.267 |
| T62947 |
0.8583 |
0.725 |
0.7583 |
0.8583 |
0.2875 |
1.2 |
| H43887 |
0.8667 |
0.6833 |
0.7625 |
0.8667 |
0.1141 |
0.5167 |
| M36634 |
0.9 |
0.6667 |
0.7 |
0.9 |
0.3755 |
1.433 |
| H55916 |
0.8625 |
0.625 |
0.7417 |
0.8625 |
0.3094 |
0.9833 |
| H06524 |
0.8958 |
0.7292 |
0.6208 |
0.8958 |
0.5765 |
1.783 |
| H11084 |
0.9 |
0.7 |
0.6667 |
0.9 |
0.3538 |
1.317 |
| X12369 |
0.8875 |
0.6667 |
0.6625 |
0.8875 |
0.2874 |
1.317 |
| H64489 |
0.85 |
0.6958 |
0.7458 |
0.85 |
0.2767 |
1.183 |
| T90350 |
0.8583 |
0.5958 |
0.7917 |
0.8583 |
0.2377 |
1.383 |
| U14631 |
0.8833 |
0.6792 |
0.6875 |
0.8833 |
0.3135 |
1.133 |
| L12723 |
0.8417 |
0.6708 |
0.6292 |
0.8417 |
0.4916 |
1.367 |
| M59040 |
0.8708 |
0.6292 |
0.6417 |
0.8708 |
0.5457 |
1.367 |
| L07648 |
0.9542 |
0.6417 |
0.5667 |
0.9542 |
0.8099 |
1.767 |
| H20709 |
0.8708 |
0.6417 |
0.6292 |
0.8708 |
0.4972 |
1.433 |
| D14812 |
0.8958 |
0.6208 |
0.7292 |
0.8958 |
0.5032 |
1.5 |
| D25217 |
0.8167 |
0.6833 |
0.75 |
0.8167 |
0.2311 |
1.217 |
| H09719 |
0.9542 |
0.5667 |
0.6417 |
0.9542 |
0.7232 |
1.817 |
| R88740 |
0.8417 |
0.6292 |
0.6708 |
0.8417 |
0.4519 |
1.267 |
| T47383 |
0.9083 |
0.6583 |
0.7167 |
0.9083 |
0.4121 |
1.383 |
| H08393 |
3.863 |
3.108 |
| R36977 |
5.972 |
6.21 |
| M22382 |
5.67 |
8.486 |
| R87126 |
5 |
5.852 |
| M63391 |
5.335 |
8.405 |
| X63629 |
6.037 |
8.641 |
| H40095 |
4.539 |
3.298 |
| M76378.2 |
4.731 |
6.401 |
| T47377 |
4.824 |
4.338 |
| J02854 |
8.601 |
8.692 |
| X12671 |
7.285 |
7.177 |
| M76378 |
7.015 |
7.286 |
| J05032 |
6.788 |
6.496 |
| U09564 |
11.3 |
14.11 |
| U30825 |
7.036 |
6.65 |
| M76378.1 |
4.957 |
6.686 |
| R84411 |
8.188 |
8.473 |
| Z50753 |
6.15 |
8.403 |
| T71025 |
6.254 |
7.735 |
| R10066 |
5.706 |
6.32 |
| T92451 |
6.724 |
7.676 |
| T62947 |
5.634 |
7.15 |
| H43887 |
3.626 |
2.599 |
| M36634 |
7.729 |
9.517 |
| H55916 |
6.218 |
5.504 |
| H06524 |
10.29 |
17.79 |
| H11084 |
7.534 |
8.609 |
| X12369 |
5.994 |
8.189 |
| H64489 |
5.435 |
6.777 |
| T90350 |
5.089 |
8.675 |
| U14631 |
6.335 |
6.464 |
| L12723 |
8.801 |
8.413 |
| M59040 |
9.989 |
8.489 |
| L07648 |
18.1 |
16.95 |
| H20709 |
9.247 |
9.434 |
| D14812 |
8.866 |
10.29 |
| D25217 |
5.014 |
6.93 |
| H09719 |
14.13 |
19.93 |
| R88740 |
8.064 |
7.479 |
| T47383 |
7.661 |
9.4 |
B:SWiMS Heat Map Plots
opg <- par(no.readonly = TRUE)
par(mfrow=c(1,1))
hm <- heatMaps(ColonModelBIN$BSWiMS.model$baggingAnalysis$RelativeFrequency,Outcome="Class",data=Colon,hCluster = TRUE,Scale=TRUE,xlab="Subject ID",transpose=TRUE,title="B:SWIMS Features")
#> [1] 2

par(opg)
ROC Plots
AccCITable <- NULL
BErrorCITable <- NULL
rp <- plotModels.ROC(ColonModelBIN$cvObject$LASSO.testPredictions,theCVfolds=Loops,main="LASSO",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(ColonModelBIN$cvObject$KNN.testPrediction,theCVfolds=Loops,main="KNN",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(ColonModelBIN$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Prediction",main="B:SWiMS",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(ColonModelBIN$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="Ensemble.B.SWiMS",main="Ensembe B:SWiMS ",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Support Vector Machine(SVM) Analysis
ColonModelBIN$cvObject$Models.testPrediction$usrFitFunction_Sel <- ColonModelBIN$cvObject$Models.testPrediction$usrFitFunction_Sel -0.5
ColonModelBIN$cvObject$Models.testPrediction$usrFitFunction <- ColonModelBIN$cvObject$Models.testPrediction$usrFitFunction -0.5
rp <- plotModels.ROC(ColonModelBIN$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction",main="Filtered:SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
rp <- plotModels.ROC(ColonModelBIN$cvObject$Models.testPrediction,theCVfolds=Loops,predictor="usrFitFunction_Sel",main="B:SWiMS/SVM",cex=0.90)

ci <- epi.tests(rp$predictionTable)
AccCITable <- rbind(AccCITable,ci$elements$diag.acc)
BErrorCITable <- rbind(BErrorCITable,1-0.5*(ci$elements$sensitivity+ci$elements$specificity))
Barplots of Accuracy and Balanced Error
CVthesets <- c("LASSO","KNN","B:SWiMS","B:SWiMS Ensemble","SVM:Filterd","SVM:BSWIMS")
bp <- barPlotCiError(as.matrix(AccCITable),metricname="Accuracy",thesets=CVthesets,themethod="CV",main="Accuracy",args.legend = list(x = "bottomright"))

bp <- barPlotCiError(as.matrix(BErrorCITable),metricname="Balanced Error",thesets=CVthesets,themethod="CV",main="Balanced Error",args.legend = list(x = "topright"))

B:SWiMS Feature Plots
baggColonBSWiMS <- baggedModel(ColonModelBIN$cvObject$allBSWiMSFormulas.list,Colon,type="LOGIT",Outcome="Class")
#>
#> Num. Models: 946 To Test: 159 TopFreq: 50 Thrf: 1 Removed: 43
#> ..............................................................................................
cf <- length(ColonModelBIN$cvObject$allBSWiMSFormulas.list)/(Loops*Repeats)
namestoShow <- names(baggColonBSWiMS$coefEvolution)[-c(1,2)]
frac = 0.25*Loops*Repeats
namestoShow <- namestoShow[baggColonBSWiMS$frequencyTable[namestoShow]>=frac]
fnshow <- min(11,length(namestoShow))
barplot(baggColonBSWiMS$frequencyTable[namestoShow],las = 2,cex.axis=1.0,cex.names=0.75,main="B:SWiMS Feature Frequency")

n <- network::network(cf*baggColonBSWiMS$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(cf*baggColonBSWiMS$formulaNetwork[namestoShow,namestoShow],trace="none",mar=c(10,10),main="B:SWiMS Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

LASSO Feature Plots
baggColonLASSO <- baggedModel(ColonModelBIN$cvObject$LASSOVariables,Colon,type="LOGIT",Outcome="Class")
#>
#> Num. Models: 51 To Test: 35 TopFreq: 48 Thrf: 1 Removed: 9
#> .....
toshow <- sum(baggColonLASSO$frequencyTable>=frac)
fnshow <- min(11,length(baggColonLASSO$frequencyTable))
barplot(baggColonLASSO$frequencyTable[1:toshow],las = 2,cex.axis=1.0,cex.names=0.75,main="LASSO Feature Frequency")

n <- network::network(baggColonLASSO$formulaNetwork[1:fnshow,1:fnshow], directed = FALSE,ignore.eval = FALSE,names.eval = "weights")
gplots::heatmap.2(baggColonLASSO$formulaNetwork[1:toshow,1:toshow],trace="none",mar=c(10,10),main="LASSO Formula Network")

ggnet2(n, label = TRUE, size = "degree",size.cut = 3,size.min = 1, mode = "circle",edge.label = "weights",edge.label.size=4)

Venn Diagrams
Here I will explore which features are similar between the LASSO and the BSWiMS models
pvalues <- p.adjust(1.0-pnorm(ColonModelBIN$univariateAnalysis$ZUni),"BH")
topunivec <- as.character(ColonModelBIN$univariateAnalysis$Name[pvalues<0.05])
tob <- baggColonBSWiMS$frequencyTable>frac
topBSwims <- as.character(names(baggColonBSWiMS$frequencyTable[tob]))
tob <- baggColonLASSO$frequencyTable>frac
topLASSO <- as.character(names(baggColonLASSO$frequencyTable[tob]))
featurelist <- list(Univariate=topunivec,CVLASSO=topLASSO,BSWIMS=topBSwims)
vend <- venn(featurelist)
vgroups <- attr(vend, "intersections")
legend("center",vgroups$`Univariate:CVLASSO:BSWIMS`,cex=0.75)
