library(DT) # For Data Tables
library(lattice) # The lattice add-on of Trellis graphics for R
library(knitr) # For Dynamic Report Generation in R
library(gplots) # Various R Programming Tools for Plotting Data
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggplot2) # An Implementation of the Grammar of Graphics
library(ClustOfVar) # Clustering of variables
library(ape) # Analyses of Phylogenetics and Evolution (as.phylo)
library(Information) # Data Exploration with Information Theory (Weight-of-Evidence and Information Value)
##
## Attaching package: 'Information'
## The following object is masked from 'package:ape':
##
## is.binary
library(ROCR) # Model Performance and ROC curve
library(caret) # Classification and Regression Training - for any machine learning algorithms
library(rpart) # Recursive partitioning for classification, regression and survival trees
library(rpart.utils) # Tools for parsing and manipulating rpart objects, including generating machine readable rules
library(rpart.plot) # Plot 'rpart' Models: An Enhanced Version of 'plot.rpart'
library(randomForest)# Leo Breiman and Cutler's Random Forests for Classification and Regression
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(party) # A computational toolbox for recursive partitioning - Conditional inference Trees
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'party'
## The following object is masked from 'package:ape':
##
## where
library(bnlearn) # Bayesian Network Structure Learning, Parameter Learning and Inference
##
## Attaching package: 'bnlearn'
## The following object is masked from 'package:stats':
##
## sigma
library(DAAG) # Data Analysis and Graphics Data and Functions
library(vcd) # Visualizing Categorical Data
library(kernlab) # Support Vector Machine
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:modeltools':
##
## prior
## The following object is masked from 'package:ggplot2':
##
## alpha
# Following libraries we have load for model 8 and model 9
#library(neuralnet) # Neural Network
#library(lars) # For Least Angle Regression, Lasso and Forward Stagewise
#library(glmnet) # Lasso and Elastic-Net Regularized Generalized Linear Models
require(ggplot2)
library(caret)
library(nnet)
library(NeuralNetTools)
library(mlbench)
data(BreastCancer)
str(BreastCancer)
## 'data.frame': 699 obs. of 11 variables:
## $ Id : chr "1000025" "1002945" "1015425" "1016277" ...
## $ Cl.thickness : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 5 5 3 6 4 8 1 2 2 4 ...
## $ Cell.size : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 1 1 2 ...
## $ Cell.shape : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 2 1 1 ...
## $ Marg.adhesion : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 5 1 1 3 8 1 1 1 1 ...
## $ Epith.c.size : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 2 7 2 3 2 7 2 2 2 2 ...
## $ Bare.nuclei : Factor w/ 10 levels "1","2","3","4",..: 1 10 2 4 1 10 10 1 1 1 ...
## $ Bl.cromatin : Factor w/ 10 levels "1","2","3","4",..: 3 3 3 3 3 9 3 3 1 2 ...
## $ Normal.nucleoli: Factor w/ 10 levels "1","2","3","4",..: 1 2 1 7 1 7 1 1 1 1 ...
## $ Mitoses : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 5 1 ...
## $ Class : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
BreastCancer$Id <- NULL
BreastCancer$Cl.thickness <- as.numeric(BreastCancer$Cl.thickness)
BreastCancer$Cell.size <- as.numeric(BreastCancer$Cell.size)
BreastCancer$Cell.shape <- as.numeric(BreastCancer$Cell.shape)
BreastCancer$Marg.adhesion <- as.numeric(BreastCancer$Marg.adhesion)
BreastCancer$Epith.c.size <- as.numeric(BreastCancer$Epith.c.size)
BreastCancer$Bare.nuclei <- as.numeric(BreastCancer$Bare.nuclei)
BreastCancer$Bl.cromatin <- as.numeric(BreastCancer$Bl.cromatin)
BreastCancer$Normal.nucleoli <- as.numeric(BreastCancer$Normal.nucleoli)
BreastCancer$Mitoses <- as.numeric(BreastCancer$Mitoses)
BreastCancer = na.omit(BreastCancer)
str(BreastCancer)
## 'data.frame': 683 obs. of 10 variables:
## $ Cl.thickness : num 5 5 3 6 4 8 1 2 2 4 ...
## $ Cell.size : num 1 4 1 8 1 10 1 1 1 2 ...
## $ Cell.shape : num 1 4 1 8 1 10 1 2 1 1 ...
## $ Marg.adhesion : num 1 5 1 1 3 8 1 1 1 1 ...
## $ Epith.c.size : num 2 7 2 3 2 7 2 2 2 2 ...
## $ Bare.nuclei : num 1 10 2 4 1 10 10 1 1 1 ...
## $ Bl.cromatin : num 3 3 3 3 3 9 3 3 1 2 ...
## $ Normal.nucleoli: num 1 2 1 7 1 7 1 1 1 1 ...
## $ Mitoses : num 1 1 1 1 1 1 1 1 5 1 ...
## $ Class : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
## - attr(*, "na.action")=Class 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## .. ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
set.seed(123) #random number generator
ind = sample(2, nrow(BreastCancer), replace=TRUE, prob=c(0.6, 0.4))
train = BreastCancer[ind==1,]
test = BreastCancer[ind==2,]
str(train)
## 'data.frame': 411 obs. of 10 variables:
## $ Cl.thickness : num 5 3 8 1 2 4 2 1 8 4 ...
## $ Cell.size : num 1 1 10 1 1 2 1 1 7 1 ...
## $ Cell.shape : num 1 1 10 1 1 1 1 1 5 1 ...
## $ Marg.adhesion : num 1 1 8 1 1 1 1 1 10 1 ...
## $ Epith.c.size : num 2 2 7 2 2 2 2 2 7 2 ...
## $ Bare.nuclei : num 1 2 10 10 1 1 1 3 9 1 ...
## $ Bl.cromatin : num 3 3 9 3 1 2 2 3 5 2 ...
## $ Normal.nucleoli: num 1 1 7 1 1 1 1 1 5 1 ...
## $ Mitoses : num 1 1 1 1 5 1 1 1 4 1 ...
## $ Class : Factor w/ 2 levels "benign","malignant": 1 1 2 1 1 1 1 1 2 1 ...
## - attr(*, "na.action")=Class 'omit' Named int [1:16] 24 41 140 146 159 165 236 250 276 293 ...
## .. ..- attr(*, "names")= chr [1:16] "24" "41" "140" "146" ...
library(stats)
# Model: Stepwise Logistic Regression Model
m1 <- glm(Class~.,data=train,family=binomial())
m1 <- step(m1)
## Start: AIC=87.55
## Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion +
## Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli +
## Mitoses
##
## Df Deviance AIC
## - Cell.size 1 68.005 86.005
## - Bl.cromatin 1 68.378 86.378
## <none> 67.545 87.545
## - Normal.nucleoli 1 69.587 87.587
## - Cell.shape 1 69.845 87.845
## - Mitoses 1 70.495 88.495
## - Marg.adhesion 1 70.548 88.548
## - Epith.c.size 1 71.519 89.519
## - Cl.thickness 1 77.448 95.448
## - Bare.nuclei 1 77.965 95.965
##
## Step: AIC=86.01
## Class ~ Cl.thickness + Cell.shape + Marg.adhesion + Epith.c.size +
## Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses
##
## Df Deviance AIC
## - Bl.cromatin 1 68.800 84.800
## - Normal.nucleoli 1 69.615 85.615
## - Cell.shape 1 69.973 85.973
## <none> 68.005 86.005
## - Marg.adhesion 1 70.634 86.634
## - Mitoses 1 70.635 86.635
## - Epith.c.size 1 71.529 87.529
## - Cl.thickness 1 77.481 93.481
## - Bare.nuclei 1 78.064 94.064
##
## Step: AIC=84.8
## Class ~ Cl.thickness + Cell.shape + Marg.adhesion + Epith.c.size +
## Bare.nuclei + Normal.nucleoli + Mitoses
##
## Df Deviance AIC
## <none> 68.800 84.800
## - Normal.nucleoli 1 71.185 85.185
## - Cell.shape 1 71.193 85.193
## - Marg.adhesion 1 71.483 85.483
## - Mitoses 1 71.688 85.688
## - Epith.c.size 1 74.073 88.073
## - Cl.thickness 1 79.044 93.044
## - Bare.nuclei 1 82.885 96.885
summary(m1)
##
## Call:
## glm(formula = Class ~ Cl.thickness + Cell.shape + Marg.adhesion +
## Epith.c.size + Bare.nuclei + Normal.nucleoli + Mitoses, family = binomial(),
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.96097 -0.13932 -0.07928 0.03354 2.51104
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.2316 1.2422 -7.432 1.07e-13 ***
## Cl.thickness 0.4426 0.1500 2.951 0.003170 **
## Cell.shape 0.3250 0.2243 1.449 0.147273
## Marg.adhesion 0.2649 0.1679 1.577 0.114786
## Epith.c.size 0.4530 0.2046 2.214 0.026852 *
## Bare.nuclei 0.4159 0.1147 3.625 0.000288 ***
## Normal.nucleoli 0.2190 0.1469 1.491 0.135953
## Mitoses 0.4813 0.3025 1.591 0.111582
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 538.3 on 410 degrees of freedom
## Residual deviance: 68.8 on 403 degrees of freedom
## AIC: 84.8
##
## Number of Fisher Scoring iterations: 8
# List of significant variables and features with p-value <0.01
significant.variables <- summary(m1)$coeff[-1,4] < 0.01
names(significant.variables)[significant.variables == TRUE]
## [1] "Cl.thickness" "Bare.nuclei"
prob <- predict(m1, type = "response")
res <- residuals(m1, type = "deviance")
#Plot Residuals
plot(predict(m1), res,
xlab="Fitted values", ylab = "Residuals",
ylim = max(abs(res)) * c(-1,1))

#score test data set
test$m1_score <- predict(m1,type='response',test)
m1_pred <- prediction(test$m1_score, test$Class)
m1_perf <- performance(m1_pred,"tpr","fpr")
#ROC
plot(m1_perf, lwd=2, colorize=TRUE, main="ROC m1: Logistic Regression Performance")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Print, plot variable importance
print(varImp(m1, scale = FALSE))
## Overall
## Cl.thickness 2.950789
## Cell.shape 1.449232
## Marg.adhesion 1.577043
## Epith.c.size 2.213662
## Bare.nuclei 3.625448
## Normal.nucleoli 1.491033
## Mitoses 1.591125
# Plot precision/recall curve
m1_perf_precision <- performance(m1_pred, measure = "prec", x.measure = "rec")
plot(m1_perf_precision, main="m1 Logistic:Precision/recall curve")

# Plot accuracy as function of threshold
m1_perf_acc <- performance(m1_pred, measure = "acc")
plot(m1_perf_acc, main="m1 Logistic:Accuracy as function of threshold")

#KS, Gini & AUC m1
m1_KS <- round(max(attr(m1_perf,'y.values')[[1]]-attr(m1_perf,'x.values')[[1]])*100, 2)
m1_AUROC <- round(performance(m1_pred, measure = "auc")@y.values[[1]]*100, 2)
m1_Gini <- (2*m1_AUROC - 100)
cat("AUROC: ",m1_AUROC,"\tKS: ", m1_KS, "\tGini:", m1_Gini, "\n")
## AUROC: 99.53 KS: 95.05 Gini: 99.06
library(rpart)
m2 <- rpart(Class~.,data=train)
# Print tree detail
printcp(m2)
##
## Classification tree:
## rpart(formula = Class ~ ., data = train)
##
## Variables actually used in tree construction:
## [1] Bare.nuclei Cell.size
##
## Root node error: 149/411 = 0.36253
##
## n= 411
##
## CP nsplit rel error xerror xstd
## 1 0.805369 0 1.00000 1.00000 0.065409
## 2 0.073826 1 0.19463 0.22819 0.037480
## 3 0.020134 2 0.12081 0.16107 0.031905
## 4 0.010000 3 0.10067 0.16107 0.031905
# Tree plot
plot(m2, main="Tree:Recursive Partitioning")
text(m2)

# Better version of plot
prp(m2,type=2,extra=1, main="Tree:Recursive Partitioning")

# score test data
test$m2_score <- predict(m2,type='prob',test)
m2_pred <- prediction(test$m2_score[,2],test$Class)
m2_perf <- performance(m2_pred,"tpr","fpr")
# MOdel performance plot
plot(m2_perf, lwd=2, colorize=TRUE, main="ROC m2: Traditional Recursive Partitioning")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m2_perf_precision <- performance(m2_pred, measure = "prec", x.measure = "rec")
plot(m2_perf_precision, main="m2 Recursive Partitioning:Precision/recall curve")

# Plot accuracy as function of threshold
m2_perf_acc <- performance(m2_pred, measure = "acc")
plot(m2_perf_acc, main="m2 Recursive Partitioning:Accuracy as function of threshold")

# KS & AUC m1
m2_AUROC <- round(performance(m2_pred, measure = "auc")@y.values[[1]]*100, 2)
m2_KS <- round(max(attr(m2_perf,'y.values')[[1]]-attr(m2_perf,'x.values')[[1]])*100, 2)
m2_Gini <- (2*m2_AUROC - 100)
cat("AUROC: ",m2_AUROC,"\tKS: ", m2_KS, "\tGini:", m2_Gini, "\n")
## AUROC: 95.65 KS: 90.05 Gini: 91.3
library(randomForest)
m3 <- randomForest(Class ~ ., data = train)
m3_fitForest <- predict(m3, newdata = test, type="prob")[,2]
m3_pred <- prediction( m3_fitForest, test$Class)
m3_perf <- performance(m3_pred, "tpr", "fpr")
#plot variable importance
varImpPlot(m3, main="Random Forest: Variable Importance")

# Model Performance plot
plot(m3_perf,colorize=TRUE, lwd=2, main = "m3 ROC: Random Forest", col = "blue")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m3_perf_precision <- performance(m3_pred, measure = "prec", x.measure = "rec")
plot(m3_perf_precision, main="m3 Random Forests:Precision/recall curve")

# Plot accuracy as function of threshold
m3_perf_acc <- performance(m3_pred, measure = "acc")
plot(m3_perf_acc, main="m3 Random Forests:Accuracy as function of threshold")

#KS & AUC m3
m3_AUROC <- round(performance(m3_pred, measure = "auc")@y.values[[1]]*100, 2)
m3_KS <- round(max(attr(m3_perf,'y.values')[[1]] - attr(m3_perf,'x.values')[[1]])*100, 2)
m3_Gini <- (2*m3_AUROC - 100)
cat("AUROC: ",m3_AUROC,"\tKS: ", m3_KS, "\tGini:", m3_Gini, "\n")
## AUROC: 99.62 KS: 94.51 Gini: 99.24
library(party)
set.seed(123456742)
m3_1 <- cforest(Class~., control = cforest_unbiased(mtry = 2, ntree = 50), data = train)
# Variable Importance
kable(as.data.frame(varimp(m3_1)))
| Cl.thickness |
0.0140397 |
| Cell.size |
0.0585430 |
| Cell.shape |
0.0688742 |
| Marg.adhesion |
0.0083444 |
| Epith.c.size |
0.0072848 |
| Bare.nuclei |
0.0349669 |
| Bl.cromatin |
0.0263576 |
| Normal.nucleoli |
0.0275497 |
| Mitoses |
-0.0007947 |
# Model Summary
summary(m3_1)
## Length Class Mode
## 1 RandomForest S4
# Model Performance
m3_1_fitForest <- predict(m3, newdata = test, type = "prob")[,2]
m3_1_pred <- prediction(m3_1_fitForest, test$Class)
m3_1_perf <- performance(m3_1_pred, "tpr", "fpr")
# Model Performance Plot
plot(m3_1_perf, colorize=TRUE, lwd=2, main = " m3_1 ROC: Conditional Random Forests")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m3_1_perf_precision <- performance(m3_1_pred, measure = "prec", x.measure = "rec")
plot(m3_1_perf_precision, main="m3_1 Conditional Random Forests:Precision/recall curve")

# Plot accuracy as function of threshold
m3_1_perf_acc <- performance(m3_1_pred, measure = "acc")
plot(m3_1_perf_acc, main="m3_1 Conditional Random Forests:Accuracy as function of threshold")

# KS & AUC m3_1
m3_1_AUROC <- round(performance(m3_1_pred, measure = "auc")@y.values[[1]]*100, 2)
m3_1_KS <- round(max(attr(m3_perf,'y.values')[[1]] - attr(m3_perf,'x.values')[[1]])*100, 2)
m3_1_Gini <- (2*m3_1_AUROC - 100)
cat("AUROC: ",m3_1_AUROC,"\tKS: ", m3_1_KS, "\tGini:", m3_1_Gini, "\n")
## AUROC: 99.62 KS: 94.51 Gini: 99.24
#library(party)
m4 <- ctree(Class~.,data=train)
plot(m4, main="m4: Conditional inference Tree",col="blue")

resultdfr <- as.data.frame(do.call("rbind", treeresponse(m4, newdata = test)))
test$m4_score <- resultdfr[,2]
m4_pred <- prediction(test$m4_score,test$Class)
m4_perf <- performance(m4_pred,"tpr","fpr")
# Model Performance
plot(m4_perf, colorize=TRUE, lwd=2, main="ROC m4: Conditional inference Tree")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m4_perf_precision <- performance(m4_pred, measure = "prec", x.measure = "rec")
plot(m4_perf_precision, main="m4 CIT:Plot precision/recall curve")

# Plot accuracy as function of threshold
m4_perf_acc <- performance(m4_pred, measure = "acc")
plot(m4_perf_acc, main="m4 CIT:Plot accuracy as function of threshold")

#KS & AUC m4
m4_AUROC <- round(performance(m4_pred, measure = "auc")@y.values[[1]]*100, 2)
m4_KS <- round(max(attr(m4_perf,'y.values')[[1]]-attr(m4_perf,'x.values')[[1]])*100, 2)
m4_Gini <- (2*m4_AUROC - 100)
cat("AUROC: ",m4_AUROC,"\tKS: ", m4_KS, "\tGini:", m4_Gini, "\n")
## AUROC: 97.66 KS: 91.71 Gini: 95.32
library(kernlab) #for SVM
# Basic Model
m7_1 <- ksvm(Class ~ ., data = train, kernel = "vanilladot")
## Setting default kernel parameters
m7_1_pred <- predict(m7_1, test[,1:9], type="response")
head(m7_1_pred)
## [1] malignant malignant benign benign benign benign
## Levels: benign malignant
#Model accuracy:
table(m7_1_pred, test$Class)
##
## m7_1_pred benign malignant
## benign 176 4
## malignant 6 86
#agreement
m7_1_accuracy <- (m7_1_pred == test$Class)
sum(m7_1_accuracy)
## [1] 262
# Compute at the prediction scores
m7_1_score <- predict(m7_1,test, type="decision")
m7_1_pred <- prediction(m7_1_score, test$Class)
# Plot ROC curve
m7_1_perf <- performance(m7_1_pred, measure = "tpr", x.measure = "fpr")
plot(m7_1_perf, colorize=TRUE, lwd=2, main="m7_1 SVM:Plot ROC curve - Vanilladot")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m7_1_perf_precision <- performance(m7_1_pred, measure = "prec", x.measure = "rec")
plot(m7_1_perf_precision, main="m7_1 SVM:Plot precision/recall curve")

# Plot accuracy as function of threshold
m7_1_perf_acc <- performance(m7_1_pred, measure = "acc")
plot(m7_1_perf_acc, main="m7_1 SVM:Plot accuracy as function of threshold")

# Model Performance
#KS & AUC m7_1
m7_1_AUROC <- round(performance(m7_1_pred, measure = "auc")@y.values[[1]]*100, 2)
m7_1_KS <- round(max(attr(m7_1_perf,'y.values')[[1]]-attr(m7_1_perf,'x.values')[[1]])*100, 2)
m7_1_Gini <- (2*m7_1_AUROC - 100)
cat("AUROC: ",m7_1_AUROC,"\tKS: ", m7_1_KS, "\tGini:", m7_1_Gini, "\n")
## AUROC: 99.68 KS: 96.15 Gini: 99.36
library(kernlab)
# Model Improvement with Gaussian RBF kernel
m7_2 <- ksvm(Class ~ ., data = train, kernel = "rbfdot")
m7_2_pred <- predict(m7_2, test[,1:9], type="response")
head(m7_2_pred)
## [1] malignant malignant benign benign benign benign
## Levels: benign malignant
# Model accuracy:
table(m7_2_pred, test$Class)
##
## m7_2_pred benign malignant
## benign 169 2
## malignant 13 88
# Compute at the prediction scores
m7_2_score <- predict(m7_2,test, type="decision")
m7_2_pred <- prediction(m7_2_score, test$Class)
# Plot ROC curve
m7_2_perf <- performance(m7_2_pred, measure = "tpr", x.measure = "fpr")
plot(m7_2_perf, colorize=TRUE, lwd=2, main="SVM:Plot ROC curve - RBF", col="blue")
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);
lines(x=c(1, 0), y=c(0, 1), col="green", lwd=1, lty=4)

# Plot precision/recall curve
m7_2_perf_precision <- performance(m7_2_pred, measure = "prec", x.measure = "rec")
plot(m7_2_perf_precision, main="m7_2 SVM:Plot precision/recall curve")

# Model Performance
#KS &AUC m7_2
m7_2_AUROC <- round(performance(m7_2_pred, measure = "auc")@y.values[[1]]*100, 2)
m7_2_KS <- round(max(attr(m7_2_perf,'y.values')[[1]]-attr(m7_2_perf,'x.values')[[1]])*100, 2)
m7_2_Gini <- (2*m7_2_AUROC - 100)
cat("AUROC: ",m7_2_AUROC,"\tKS: ", m7_2_KS, "\tGini:", m7_2_Gini, "\n")
## AUROC: 99.09 KS: 92.31 Gini: 98.18
# ROC Comparision
plot(m7_1_perf, col='blue', lty=1, main='SVM:Model Performance Comparision (m7 ROC)')
plot(m7_2_perf, col='green',lty=2, add=TRUE); # simple tree
legend(0.5,0.4,
c("m7_1: SVM vanilladot", "m7_2: SVM RBF kernel"),
col=c('blue', 'green'),
lwd=3);
lines(x=c(0, 1), y=c(0, 1), col="red", lwd=1, lty=3);# random line

#Compare ROC Performance of Models
plot(m1_perf, col='blue', lty=1, main='ROCs: Model Performance Comparision') # logistic regression
plot(m2_perf, col='gold',lty=2, add=TRUE); # simple tree
plot(m3_perf, col='green',add=TRUE,lty=4); # random forest
plot(m4_perf, col='dark gray',add=TRUE,lty=5); # Conditional Inference Tree
plot(m7_2_perf, col='black',add=TRUE,lty=7); # Support Vector Machine (SVM)
legend(0.6,0.5,
c('m1:logistic reg','m2:Recursive Partitioning',
'm3:random forest', "m4:condtn inference tree", "m7_2:SVM"),
col=c('blue','gold', 'orange','green', 'dark gray'),
lwd=3);
lines(c(0,1),c(0,1),col = "gray", lty = 4 ) # random line
