# Decision Tree Classification on Breast cancer dataset
# Downloading the file
fileURL <- "http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
download.file(fileURL, destfile="breast-cancer-wisconsin.data", method="curl")
# read the data
data <- read.table("breast-cancer-wisconsin.data", na.strings = "?", sep=",")
str(data)
## 'data.frame':    699 obs. of  11 variables:
##  $ V1 : int  1000025 1002945 1015425 1016277 1017023 1017122 1018099 1018561 1033078 1033078 ...
##  $ V2 : int  5 5 3 6 4 8 1 2 2 4 ...
##  $ V3 : int  1 4 1 8 1 10 1 1 1 2 ...
##  $ V4 : int  1 4 1 8 1 10 1 2 1 1 ...
##  $ V5 : int  1 5 1 1 3 8 1 1 1 1 ...
##  $ V6 : int  2 7 2 3 2 7 2 2 2 2 ...
##  $ V7 : int  1 10 2 4 1 10 10 1 1 1 ...
##  $ V8 : int  3 3 3 3 3 9 3 3 1 2 ...
##  $ V9 : int  1 2 1 7 1 7 1 1 1 1 ...
##  $ V10: int  1 1 1 1 1 1 1 1 5 1 ...
##  $ V11: int  2 2 2 2 2 4 2 2 2 2 ...
# Remove ID column, col = 1
data <- data[,-1]

# Name the columns. 
# These names are displayed in the tree to facilitate semantic interpretation

names(data) <- c("ClumpThickness",
                 "UniformityCellSize",
                 "UniformityCellShape",
                 "MarginalAdhesion",
                 "SingleEpithelialCellSize",
                 "BareNuclei",
                 "BlandChromatin",
                 "NormalNucleoli",
                 "Mitoses",
                 "Class")

# Numerical values in the response variable are converted to labels

data$Class <- factor(data$Class, levels=c(2,4), labels=c("benign", "malignant"))

print(summary(data))
##  ClumpThickness   UniformityCellSize UniformityCellShape MarginalAdhesion
##  Min.   : 1.000   Min.   : 1.000     Min.   : 1.000      Min.   : 1.000  
##  1st Qu.: 2.000   1st Qu.: 1.000     1st Qu.: 1.000      1st Qu.: 1.000  
##  Median : 4.000   Median : 1.000     Median : 1.000      Median : 1.000  
##  Mean   : 4.418   Mean   : 3.134     Mean   : 3.207      Mean   : 2.807  
##  3rd Qu.: 6.000   3rd Qu.: 5.000     3rd Qu.: 5.000      3rd Qu.: 4.000  
##  Max.   :10.000   Max.   :10.000     Max.   :10.000      Max.   :10.000  
##                                                                          
##  SingleEpithelialCellSize   BareNuclei     BlandChromatin   NormalNucleoli  
##  Min.   : 1.000           Min.   : 1.000   Min.   : 1.000   Min.   : 1.000  
##  1st Qu.: 2.000           1st Qu.: 1.000   1st Qu.: 2.000   1st Qu.: 1.000  
##  Median : 2.000           Median : 1.000   Median : 3.000   Median : 1.000  
##  Mean   : 3.216           Mean   : 3.545   Mean   : 3.438   Mean   : 2.867  
##  3rd Qu.: 4.000           3rd Qu.: 6.000   3rd Qu.: 5.000   3rd Qu.: 4.000  
##  Max.   :10.000           Max.   :10.000   Max.   :10.000   Max.   :10.000  
##                           NA's   :16                                        
##     Mitoses             Class    
##  Min.   : 1.000   benign   :458  
##  1st Qu.: 1.000   malignant:241  
##  Median : 1.000                  
##  Mean   : 1.589                  
##  3rd Qu.: 1.000                  
##  Max.   :10.000                  
## 
#Note that there are 16 missing values in BareNuclei
#Later you will see that there is no imputation of these missing values. 
#Investigate how decision trees handle missing values
#Read rpart documentation from this.
#This link has some extra information: 
#https://stats.stackexchange.com/questions/96025/how-do-decision-tree-learning-algorithms-deal-with-missing-values-under-the-hoo


# Dividing the dataset into training and validation sets. There are many ways to do this.
# Alternate method is also listed here.

set.seed(123)
ind <- sample(2, nrow(data), replace=TRUE, prob=c(0.7, 0.3))
trainData <- data[ind==1,]
validationData <- data[ind==2,]
table(trainData$Class)
## 
##    benign malignant 
##       322       166
prop.table(table(trainData$Class))
## 
##    benign malignant 
## 0.6598361 0.3401639
library(caTools)
## Warning: package 'caTools' was built under R version 3.6.2
# Alternate method 
set.seed(123)
split = sample.split(data$Class, SplitRatio = 0.7)
split
##   [1]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
##  [13]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [25]  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE
##  [37]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
##  [49] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
##  [61]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [85]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
##  [97]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [109] FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121] FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE
## [133] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [157] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [169] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [181] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [193]  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE
## [205]  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
## [217]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [229]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [241]  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [253]  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [265]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
## [277]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
## [289]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [301] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [313]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [325]  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [337]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [349] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [361] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [373]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [385] FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE
## [397]  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [409]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [421] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [433]  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE
## [445] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [457] FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
## [469]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [481]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE
## [493] FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [505]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE
## [517] FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
## [529]  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
## [541] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [553] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [565]  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [577]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [589]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
## [601]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE
## [613] FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [625]  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
## [637]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
## [649]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [661]  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [673]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE
## [685]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
## [697]  TRUE  TRUE  TRUE
# Create training and testing sets
dataTrain = subset(data, split == TRUE)
dataTest = subset(data, split == FALSE) 

# install.packages('rpart")
# install.packages("rpart.plot")

library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.2
library(party)
## Warning: package 'party' was built under R version 3.6.2
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.6.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.6.2
# run help on rpart and rpart.control to understand the functions
# Can generate different types of trees with rpart
# Default split is with Gini index
tree = rpart(Class ~ ., data=trainData, method="class")
print(tree)
## n= 488 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 488 166 benign (0.65983607 0.34016393)  
##   2) UniformityCellSize< 2.5 303   4 benign (0.98679868 0.01320132) *
##   3) UniformityCellSize>=2.5 185  23 malignant (0.12432432 0.87567568)  
##     6) UniformityCellShape< 2.5 13   3 benign (0.76923077 0.23076923) *
##     7) UniformityCellShape>=2.5 172  13 malignant (0.07558140 0.92441860) *
prp(tree)

prp (tree, type = 3)

rpart.plot(tree, extra = 104, nn = TRUE)

plotcp(tree)

# Split with entropy information
entTree = rpart(Class ~ ., data=trainData, method="class", parms=list(split="information"))
prp(entTree)

prp (entTree, type = 3)

rpart.plot(entTree, extra = 104, nn = TRUE)

library(rpart.plot)
plotcp(tree)

# Here we use tree with parameter settings.
# This code generates the tree with training data
tree_with_params = rpart(Class ~ ., data=trainData, method="class", minsplit = 1, minbucket = 10, cp = -1)
prp (tree_with_params)

print(tree_with_params)
## n= 488 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 488 166 benign (0.65983607 0.34016393)  
##    2) UniformityCellSize< 2.5 303   4 benign (0.98679868 0.01320132)  
##      4) NormalNucleoli< 2.5 292   0 benign (1.00000000 0.00000000) *
##      5) NormalNucleoli>=2.5 11   4 benign (0.63636364 0.36363636) *
##    3) UniformityCellSize>=2.5 185  23 malignant (0.12432432 0.87567568)  
##      6) UniformityCellShape< 2.5 13   3 benign (0.76923077 0.23076923) *
##      7) UniformityCellShape>=2.5 172  13 malignant (0.07558140 0.92441860)  
##       14) UniformityCellSize< 4.5 44  11 malignant (0.25000000 0.75000000)  
##         28) BareNuclei< 3.5 16   7 benign (0.56250000 0.43750000) *
##         29) BareNuclei>=3.5 28   2 malignant (0.07142857 0.92857143)  
##           58) MarginalAdhesion>=4.5 13   2 malignant (0.15384615 0.84615385) *
##           59) MarginalAdhesion< 4.5 15   0 malignant (0.00000000 1.00000000) *
##       15) UniformityCellSize>=4.5 128   2 malignant (0.01562500 0.98437500)  
##         30) ClumpThickness< 6.5 49   2 malignant (0.04081633 0.95918367)  
##           60) ClumpThickness>=5.5 12   2 malignant (0.16666667 0.83333333) *
##           61) ClumpThickness< 5.5 37   0 malignant (0.00000000 1.00000000) *
##         31) ClumpThickness>=6.5 79   0 malignant (0.00000000 1.00000000) *
summary(tree_with_params)
## Call:
## rpart(formula = Class ~ ., data = trainData, method = "class", 
##     minsplit = 1, minbucket = 10, cp = -1)
##   n= 488 
## 
##             CP nsplit rel error    xerror       xstd
## 1  0.837349398      0 1.0000000 1.0000000 0.06304694
## 2  0.042168675      1 0.1626506 0.1807229 0.03196505
## 3  0.006024096      2 0.1204819 0.1265060 0.02700537
## 4  0.000000000      4 0.1084337 0.1204819 0.02638274
## 5 -1.000000000      8 0.1084337 0.1204819 0.02638274
## 
## Variable importance
##       UniformityCellSize      UniformityCellShape SingleEpithelialCellSize 
##                       21                       18                       16 
##           NormalNucleoli           BlandChromatin               BareNuclei 
##                       15                       15                       14 
## 
## Node number 1: 488 observations,    complexity param=0.8373494
##   predicted class=benign     expected loss=0.3401639  P(node) =1
##     class counts:   322   166
##    probabilities: 0.660 0.340 
##   left son=2 (303 obs) right son=3 (185 obs)
##   Primary splits:
##       UniformityCellSize       < 2.5 to the left,  improve=170.8901, (0 missing)
##       UniformityCellShape      < 2.5 to the left,  improve=158.2395, (0 missing)
##       BareNuclei               < 3.5 to the left,  improve=143.0350, (9 missing)
##       SingleEpithelialCellSize < 2.5 to the left,  improve=136.7937, (0 missing)
##       BlandChromatin           < 3.5 to the left,  improve=135.7491, (0 missing)
##   Surrogate splits:
##       UniformityCellShape      < 2.5 to the left,  agree=0.924, adj=0.800, (0 split)
##       SingleEpithelialCellSize < 2.5 to the left,  agree=0.904, adj=0.746, (0 split)
##       NormalNucleoli           < 2.5 to the left,  agree=0.889, adj=0.708, (0 split)
##       BlandChromatin           < 3.5 to the left,  agree=0.879, adj=0.681, (0 split)
##       BareNuclei               < 2.5 to the left,  agree=0.871, adj=0.659, (0 split)
## 
## Node number 2: 303 observations,    complexity param=0
##   predicted class=benign     expected loss=0.01320132  P(node) =0.6209016
##     class counts:   299     4
##    probabilities: 0.987 0.013 
## 
## Node number 3: 185 observations,    complexity param=0.04216867
##   predicted class=malignant  expected loss=0.1243243  P(node) =0.3790984
##     class counts:    23   162
##    probabilities: 0.124 0.876 
##   left son=6 (13 obs) right son=7 (172 obs)
##   Primary splits:
##       UniformityCellShape < 2.5 to the left,  improve=11.630810, (0 missing)
##       BareNuclei          < 1.5 to the left,  improve=10.654530, (3 missing)
##       UniformityCellSize  < 4.5 to the left,  improve=10.093100, (0 missing)
##       BlandChromatin      < 2.5 to the left,  improve= 7.386963, (0 missing)
##       MarginalAdhesion    < 2.5 to the left,  improve= 5.247007, (0 missing)
##   Surrogate splits:
##       BlandChromatin < 1.5 to the left,  agree=0.941, adj=0.154, (0 split)
## 
## Node number 6: 13 observations
##   predicted class=benign     expected loss=0.2307692  P(node) =0.02663934
##     class counts:    10     3
##    probabilities: 0.769 0.231 
## 
## Node number 7: 172 observations,    complexity param=0.006024096
##   predicted class=malignant  expected loss=0.0755814  P(node) =0.352459
##     class counts:    13   159
##    probabilities: 0.076 0.924 
##   left son=14 (44 obs) right son=15 (128 obs)
##   Primary splits:
##       UniformityCellSize       < 4.5 to the left,  improve=3.597384, (0 missing)
##       BareNuclei               < 3.5 to the left,  improve=2.699434, (3 missing)
##       BlandChromatin           < 4.5 to the left,  improve=2.074169, (0 missing)
##       SingleEpithelialCellSize < 2.5 to the left,  improve=1.503064, (0 missing)
##       UniformityCellShape      < 3.5 to the left,  improve=1.376989, (0 missing)
##   Surrogate splits:
##       SingleEpithelialCellSize < 2.5 to the left,  agree=0.808, adj=0.250, (0 split)
##       UniformityCellShape      < 3.5 to the left,  agree=0.802, adj=0.227, (0 split)
##       MarginalAdhesion         < 1.5 to the left,  agree=0.762, adj=0.068, (0 split)
##       BlandChromatin           < 2.5 to the left,  agree=0.762, adj=0.068, (0 split)
## 
## Node number 14: 44 observations,    complexity param=0.006024096
##   predicted class=malignant  expected loss=0.25  P(node) =0.09016393
##     class counts:    11    33
##    probabilities: 0.250 0.750 
##   left son=28 (16 obs) right son=29 (28 obs)
##   Primary splits:
##       BareNuclei               < 3.5 to the left,  improve=4.1678850, (1 missing)
##       BlandChromatin           < 4.5 to the left,  improve=2.3034190, (0 missing)
##       ClumpThickness           < 8.5 to the left,  improve=2.0625000, (0 missing)
##       MarginalAdhesion         < 6   to the left,  improve=2.0625000, (0 missing)
##       SingleEpithelialCellSize < 4.5 to the left,  improve=0.3411911, (0 missing)
##   Surrogate splits:
##       MarginalAdhesion < 1.5 to the left,  agree=0.698, adj=0.133, (1 split)
##       ClumpThickness   < 5.5 to the left,  agree=0.674, adj=0.067, (0 split)
##       BlandChromatin   < 4.5 to the left,  agree=0.674, adj=0.067, (0 split)
##       NormalNucleoli   < 8.5 to the right, agree=0.674, adj=0.067, (0 split)
## 
## Node number 15: 128 observations,    complexity param=0
##   predicted class=malignant  expected loss=0.015625  P(node) =0.2622951
##     class counts:     2   126
##    probabilities: 0.016 0.984 
## 
## Node number 28: 16 observations
##   predicted class=benign     expected loss=0.4375  P(node) =0.03278689
##     class counts:     9     7
##    probabilities: 0.562 0.438 
## 
## Node number 29: 28 observations,    complexity param=0
##   predicted class=malignant  expected loss=0.07142857  P(node) =0.05737705
##     class counts:     2    26
##    probabilities: 0.071 0.929
plot(tree_with_params)
text(tree_with_params)

plotcp(tree_with_params)

# Now we predict and evaluate the performance of the trained tree model 
Predict = predict(tree_with_params, validationData)
# Now examine the values of Predict. These are the class probabilities
Predict
##        benign malignant
## 2   0.1538462 0.8461538
## 4   0.1666667 0.8333333
## 5   1.0000000 0.0000000
## 8   1.0000000 0.0000000
## 11  1.0000000 0.0000000
## 16  0.5625000 0.4375000
## 20  1.0000000 0.0000000
## 21  0.7692308 0.2307692
## 24  0.5625000 0.4375000
## 26  0.6363636 0.3636364
## 31  1.0000000 0.0000000
## 32  1.0000000 0.0000000
## 34  1.0000000 0.0000000
## 37  0.0000000 1.0000000
## 50  0.0000000 1.0000000
## 53  0.0000000 1.0000000
## 58  0.6363636 0.3636364
## 59  1.0000000 0.0000000
## 65  1.0000000 0.0000000
## 67  1.0000000 0.0000000
## 68  0.0000000 1.0000000
## 69  0.0000000 1.0000000
## 71  1.0000000 0.0000000
## 73  0.5625000 0.4375000
## 84  1.0000000 0.0000000
## 87  0.0000000 1.0000000
## 88  0.0000000 1.0000000
## 89  1.0000000 0.0000000
## 97  1.0000000 0.0000000
## 104 1.0000000 0.0000000
## 106 0.5625000 0.4375000
## 107 0.0000000 1.0000000
## 111 0.7692308 0.2307692
## 114 0.0000000 1.0000000
## 115 0.7692308 0.2307692
## 118 0.0000000 1.0000000
## 126 1.0000000 0.0000000
## 132 1.0000000 0.0000000
## 134 1.0000000 0.0000000
## 137 1.0000000 0.0000000
## 138 1.0000000 0.0000000
## 139 1.0000000 0.0000000
## 145 1.0000000 0.0000000
## 150 0.0000000 1.0000000
## 151 1.0000000 0.0000000
## 167 0.0000000 1.0000000
## 173 1.0000000 0.0000000
## 174 0.1666667 0.8333333
## 179 1.0000000 0.0000000
## 181 1.0000000 0.0000000
## 183 1.0000000 0.0000000
## 189 0.0000000 1.0000000
## 190 1.0000000 0.0000000
## 193 1.0000000 0.0000000
## 195 1.0000000 0.0000000
## 202 0.0000000 1.0000000
## 206 0.0000000 1.0000000
## 216 0.0000000 1.0000000
## 219 0.1666667 0.8333333
## 220 1.0000000 0.0000000
## 222 0.0000000 1.0000000
## 223 1.0000000 0.0000000
## 230 0.0000000 1.0000000
## 238 0.0000000 1.0000000
## 240 0.0000000 1.0000000
## 246 1.0000000 0.0000000
## 248 0.0000000 1.0000000
## 249 0.6363636 0.3636364
## 250 1.0000000 0.0000000
## 256 0.0000000 1.0000000
## 260 0.0000000 1.0000000
## 261 0.0000000 1.0000000
## 262 0.0000000 1.0000000
## 264 0.1538462 0.8461538
## 271 0.0000000 1.0000000
## 275 1.0000000 0.0000000
## 276 1.0000000 0.0000000
## 277 1.0000000 0.0000000
## 281 1.0000000 0.0000000
## 294 0.1538462 0.8461538
## 295 1.0000000 0.0000000
## 296 0.0000000 1.0000000
## 297 0.0000000 1.0000000
## 300 0.6363636 0.3636364
## 301 0.1538462 0.8461538
## 303 0.0000000 1.0000000
## 304 1.0000000 0.0000000
## 313 0.0000000 1.0000000
## 316 0.0000000 1.0000000
## 317 0.0000000 1.0000000
## 320 0.0000000 1.0000000
## 321 0.0000000 1.0000000
## 324 0.1538462 0.8461538
## 327 0.6363636 0.3636364
## 330 0.0000000 1.0000000
## 333 1.0000000 0.0000000
## 334 0.1538462 0.8461538
## 340 0.0000000 1.0000000
## 347 0.6363636 0.3636364
## 352 1.0000000 0.0000000
## 356 1.0000000 0.0000000
## 360 0.0000000 1.0000000
## 363 1.0000000 0.0000000
## 366 1.0000000 0.0000000
## 373 1.0000000 0.0000000
## 376 1.0000000 0.0000000
## 377 1.0000000 0.0000000
## 380 0.5625000 0.4375000
## 382 0.0000000 1.0000000
## 384 1.0000000 0.0000000
## 386 0.7692308 0.2307692
## 391 1.0000000 0.0000000
## 393 1.0000000 0.0000000
## 394 1.0000000 0.0000000
## 400 1.0000000 0.0000000
## 401 0.0000000 1.0000000
## 403 0.5625000 0.4375000
## 407 1.0000000 0.0000000
## 412 1.0000000 0.0000000
## 417 0.0000000 1.0000000
## 425 1.0000000 0.0000000
## 430 1.0000000 0.0000000
## 431 0.7692308 0.2307692
## 434 1.0000000 0.0000000
## 443 1.0000000 0.0000000
## 445 1.0000000 0.0000000
## 446 1.0000000 0.0000000
## 447 1.0000000 0.0000000
## 456 1.0000000 0.0000000
## 457 0.0000000 1.0000000
## 458 0.0000000 1.0000000
## 461 1.0000000 0.0000000
## 470 1.0000000 0.0000000
## 472 1.0000000 0.0000000
## 474 1.0000000 0.0000000
## 480 0.0000000 1.0000000
## 482 0.7692308 0.2307692
## 485 1.0000000 0.0000000
## 490 0.7692308 0.2307692
## 491 1.0000000 0.0000000
## 494 0.0000000 1.0000000
## 496 1.0000000 0.0000000
## 500 1.0000000 0.0000000
## 509 1.0000000 0.0000000
## 513 1.0000000 0.0000000
## 515 0.1666667 0.8333333
## 518 1.0000000 0.0000000
## 520 0.0000000 1.0000000
## 527 1.0000000 0.0000000
## 529 1.0000000 0.0000000
## 531 0.0000000 1.0000000
## 534 1.0000000 0.0000000
## 536 1.0000000 0.0000000
## 538 1.0000000 0.0000000
## 541 1.0000000 0.0000000
## 543 0.7692308 0.2307692
## 545 1.0000000 0.0000000
## 546 1.0000000 0.0000000
## 548 1.0000000 0.0000000
## 549 1.0000000 0.0000000
## 554 0.7692308 0.2307692
## 562 1.0000000 0.0000000
## 563 1.0000000 0.0000000
## 568 1.0000000 0.0000000
## 570 0.0000000 1.0000000
## 572 0.0000000 1.0000000
## 575 0.0000000 1.0000000
## 576 1.0000000 0.0000000
## 581 1.0000000 0.0000000
## 582 0.0000000 1.0000000
## 583 0.1666667 0.8333333
## 585 1.0000000 0.0000000
## 589 0.0000000 1.0000000
## 593 0.1538462 0.8461538
## 596 1.0000000 0.0000000
## 599 1.0000000 0.0000000
## 606 0.0000000 1.0000000
## 608 1.0000000 0.0000000
## 614 0.7692308 0.2307692
## 616 1.0000000 0.0000000
## 617 1.0000000 0.0000000
## 618 1.0000000 0.0000000
## 619 1.0000000 0.0000000
## 621 1.0000000 0.0000000
## 622 0.5625000 0.4375000
## 623 1.0000000 0.0000000
## 628 1.0000000 0.0000000
## 631 1.0000000 0.0000000
## 632 1.0000000 0.0000000
## 634 0.0000000 1.0000000
## 637 0.0000000 1.0000000
## 638 1.0000000 0.0000000
## 639 1.0000000 0.0000000
## 642 1.0000000 0.0000000
## 643 1.0000000 0.0000000
## 647 1.0000000 0.0000000
## 651 1.0000000 0.0000000
## 652 1.0000000 0.0000000
## 655 1.0000000 0.0000000
## 656 1.0000000 0.0000000
## 657 1.0000000 0.0000000
## 661 1.0000000 0.0000000
## 662 1.0000000 0.0000000
## 664 1.0000000 0.0000000
## 667 1.0000000 0.0000000
## 677 1.0000000 0.0000000
## 681 0.0000000 1.0000000
## 682 0.0000000 1.0000000
## 683 1.0000000 0.0000000
## 684 1.0000000 0.0000000
## 685 1.0000000 0.0000000
""
## [1] ""
# pred <= predict (mymodel, dataset, type = 'prob')
# To produce classes only, without the probabilities, run the next command.
# By default threshold is set at 0.5 to produce the classes
""
## [1] ""
Predict = predict(tree_with_params, validationData, type = "class")
Predict
##         2         4         5         8        11        16        20        21 
## malignant malignant    benign    benign    benign    benign    benign    benign 
##        24        26        31        32        34        37        50        53 
##    benign    benign    benign    benign    benign malignant malignant malignant 
##        58        59        65        67        68        69        71        73 
##    benign    benign    benign    benign malignant malignant    benign    benign 
##        84        87        88        89        97       104       106       107 
##    benign malignant malignant    benign    benign    benign    benign malignant 
##       111       114       115       118       126       132       134       137 
##    benign malignant    benign malignant    benign    benign    benign    benign 
##       138       139       145       150       151       167       173       174 
##    benign    benign    benign malignant    benign malignant    benign malignant 
##       179       181       183       189       190       193       195       202 
##    benign    benign    benign malignant    benign    benign    benign malignant 
##       206       216       219       220       222       223       230       238 
## malignant malignant malignant    benign malignant    benign malignant malignant 
##       240       246       248       249       250       256       260       261 
## malignant    benign malignant    benign    benign malignant malignant malignant 
##       262       264       271       275       276       277       281       294 
## malignant malignant malignant    benign    benign    benign    benign malignant 
##       295       296       297       300       301       303       304       313 
##    benign malignant malignant    benign malignant malignant    benign malignant 
##       316       317       320       321       324       327       330       333 
## malignant malignant malignant malignant malignant    benign malignant    benign 
##       334       340       347       352       356       360       363       366 
## malignant malignant    benign    benign    benign malignant    benign    benign 
##       373       376       377       380       382       384       386       391 
##    benign    benign    benign    benign malignant    benign    benign    benign 
##       393       394       400       401       403       407       412       417 
##    benign    benign    benign malignant    benign    benign    benign malignant 
##       425       430       431       434       443       445       446       447 
##    benign    benign    benign    benign    benign    benign    benign    benign 
##       456       457       458       461       470       472       474       480 
##    benign malignant malignant    benign    benign    benign    benign malignant 
##       482       485       490       491       494       496       500       509 
##    benign    benign    benign    benign malignant    benign    benign    benign 
##       513       515       518       520       527       529       531       534 
##    benign malignant    benign malignant    benign    benign malignant    benign 
##       536       538       541       543       545       546       548       549 
##    benign    benign    benign    benign    benign    benign    benign    benign 
##       554       562       563       568       570       572       575       576 
##    benign    benign    benign    benign malignant malignant malignant    benign 
##       581       582       583       585       589       593       596       599 
##    benign malignant malignant    benign malignant malignant    benign    benign 
##       606       608       614       616       617       618       619       621 
## malignant    benign    benign    benign    benign    benign    benign    benign 
##       622       623       628       631       632       634       637       638 
##    benign    benign    benign    benign    benign malignant malignant    benign 
##       639       642       643       647       651       652       655       656 
##    benign    benign    benign    benign    benign    benign    benign    benign 
##       657       661       662       664       667       677       681       682 
##    benign    benign    benign    benign    benign    benign malignant malignant 
##       683       684       685 
##    benign    benign    benign 
## Levels: benign malignant
# Producing confusion matrix

Confusion_matrix = table(Predict, validationData$Class)
print(Confusion_matrix)
##            
## Predict     benign malignant
##   benign       130        13
##   malignant      6        62
(130+62)/(13+6+130+62)
## [1] 0.9099526
# ROC curve
#install.packages("ROCR")
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.6.2
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.6.2
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# install.packages("gplots")

# To draw ROC we need to predict the prob values. So we run predict again
# Note that PredictROC is same as Predict with "type = prob"

PredictROC = predict(tree_with_params, validationData)
PredictROC
##        benign malignant
## 2   0.1538462 0.8461538
## 4   0.1666667 0.8333333
## 5   1.0000000 0.0000000
## 8   1.0000000 0.0000000
## 11  1.0000000 0.0000000
## 16  0.5625000 0.4375000
## 20  1.0000000 0.0000000
## 21  0.7692308 0.2307692
## 24  0.5625000 0.4375000
## 26  0.6363636 0.3636364
## 31  1.0000000 0.0000000
## 32  1.0000000 0.0000000
## 34  1.0000000 0.0000000
## 37  0.0000000 1.0000000
## 50  0.0000000 1.0000000
## 53  0.0000000 1.0000000
## 58  0.6363636 0.3636364
## 59  1.0000000 0.0000000
## 65  1.0000000 0.0000000
## 67  1.0000000 0.0000000
## 68  0.0000000 1.0000000
## 69  0.0000000 1.0000000
## 71  1.0000000 0.0000000
## 73  0.5625000 0.4375000
## 84  1.0000000 0.0000000
## 87  0.0000000 1.0000000
## 88  0.0000000 1.0000000
## 89  1.0000000 0.0000000
## 97  1.0000000 0.0000000
## 104 1.0000000 0.0000000
## 106 0.5625000 0.4375000
## 107 0.0000000 1.0000000
## 111 0.7692308 0.2307692
## 114 0.0000000 1.0000000
## 115 0.7692308 0.2307692
## 118 0.0000000 1.0000000
## 126 1.0000000 0.0000000
## 132 1.0000000 0.0000000
## 134 1.0000000 0.0000000
## 137 1.0000000 0.0000000
## 138 1.0000000 0.0000000
## 139 1.0000000 0.0000000
## 145 1.0000000 0.0000000
## 150 0.0000000 1.0000000
## 151 1.0000000 0.0000000
## 167 0.0000000 1.0000000
## 173 1.0000000 0.0000000
## 174 0.1666667 0.8333333
## 179 1.0000000 0.0000000
## 181 1.0000000 0.0000000
## 183 1.0000000 0.0000000
## 189 0.0000000 1.0000000
## 190 1.0000000 0.0000000
## 193 1.0000000 0.0000000
## 195 1.0000000 0.0000000
## 202 0.0000000 1.0000000
## 206 0.0000000 1.0000000
## 216 0.0000000 1.0000000
## 219 0.1666667 0.8333333
## 220 1.0000000 0.0000000
## 222 0.0000000 1.0000000
## 223 1.0000000 0.0000000
## 230 0.0000000 1.0000000
## 238 0.0000000 1.0000000
## 240 0.0000000 1.0000000
## 246 1.0000000 0.0000000
## 248 0.0000000 1.0000000
## 249 0.6363636 0.3636364
## 250 1.0000000 0.0000000
## 256 0.0000000 1.0000000
## 260 0.0000000 1.0000000
## 261 0.0000000 1.0000000
## 262 0.0000000 1.0000000
## 264 0.1538462 0.8461538
## 271 0.0000000 1.0000000
## 275 1.0000000 0.0000000
## 276 1.0000000 0.0000000
## 277 1.0000000 0.0000000
## 281 1.0000000 0.0000000
## 294 0.1538462 0.8461538
## 295 1.0000000 0.0000000
## 296 0.0000000 1.0000000
## 297 0.0000000 1.0000000
## 300 0.6363636 0.3636364
## 301 0.1538462 0.8461538
## 303 0.0000000 1.0000000
## 304 1.0000000 0.0000000
## 313 0.0000000 1.0000000
## 316 0.0000000 1.0000000
## 317 0.0000000 1.0000000
## 320 0.0000000 1.0000000
## 321 0.0000000 1.0000000
## 324 0.1538462 0.8461538
## 327 0.6363636 0.3636364
## 330 0.0000000 1.0000000
## 333 1.0000000 0.0000000
## 334 0.1538462 0.8461538
## 340 0.0000000 1.0000000
## 347 0.6363636 0.3636364
## 352 1.0000000 0.0000000
## 356 1.0000000 0.0000000
## 360 0.0000000 1.0000000
## 363 1.0000000 0.0000000
## 366 1.0000000 0.0000000
## 373 1.0000000 0.0000000
## 376 1.0000000 0.0000000
## 377 1.0000000 0.0000000
## 380 0.5625000 0.4375000
## 382 0.0000000 1.0000000
## 384 1.0000000 0.0000000
## 386 0.7692308 0.2307692
## 391 1.0000000 0.0000000
## 393 1.0000000 0.0000000
## 394 1.0000000 0.0000000
## 400 1.0000000 0.0000000
## 401 0.0000000 1.0000000
## 403 0.5625000 0.4375000
## 407 1.0000000 0.0000000
## 412 1.0000000 0.0000000
## 417 0.0000000 1.0000000
## 425 1.0000000 0.0000000
## 430 1.0000000 0.0000000
## 431 0.7692308 0.2307692
## 434 1.0000000 0.0000000
## 443 1.0000000 0.0000000
## 445 1.0000000 0.0000000
## 446 1.0000000 0.0000000
## 447 1.0000000 0.0000000
## 456 1.0000000 0.0000000
## 457 0.0000000 1.0000000
## 458 0.0000000 1.0000000
## 461 1.0000000 0.0000000
## 470 1.0000000 0.0000000
## 472 1.0000000 0.0000000
## 474 1.0000000 0.0000000
## 480 0.0000000 1.0000000
## 482 0.7692308 0.2307692
## 485 1.0000000 0.0000000
## 490 0.7692308 0.2307692
## 491 1.0000000 0.0000000
## 494 0.0000000 1.0000000
## 496 1.0000000 0.0000000
## 500 1.0000000 0.0000000
## 509 1.0000000 0.0000000
## 513 1.0000000 0.0000000
## 515 0.1666667 0.8333333
## 518 1.0000000 0.0000000
## 520 0.0000000 1.0000000
## 527 1.0000000 0.0000000
## 529 1.0000000 0.0000000
## 531 0.0000000 1.0000000
## 534 1.0000000 0.0000000
## 536 1.0000000 0.0000000
## 538 1.0000000 0.0000000
## 541 1.0000000 0.0000000
## 543 0.7692308 0.2307692
## 545 1.0000000 0.0000000
## 546 1.0000000 0.0000000
## 548 1.0000000 0.0000000
## 549 1.0000000 0.0000000
## 554 0.7692308 0.2307692
## 562 1.0000000 0.0000000
## 563 1.0000000 0.0000000
## 568 1.0000000 0.0000000
## 570 0.0000000 1.0000000
## 572 0.0000000 1.0000000
## 575 0.0000000 1.0000000
## 576 1.0000000 0.0000000
## 581 1.0000000 0.0000000
## 582 0.0000000 1.0000000
## 583 0.1666667 0.8333333
## 585 1.0000000 0.0000000
## 589 0.0000000 1.0000000
## 593 0.1538462 0.8461538
## 596 1.0000000 0.0000000
## 599 1.0000000 0.0000000
## 606 0.0000000 1.0000000
## 608 1.0000000 0.0000000
## 614 0.7692308 0.2307692
## 616 1.0000000 0.0000000
## 617 1.0000000 0.0000000
## 618 1.0000000 0.0000000
## 619 1.0000000 0.0000000
## 621 1.0000000 0.0000000
## 622 0.5625000 0.4375000
## 623 1.0000000 0.0000000
## 628 1.0000000 0.0000000
## 631 1.0000000 0.0000000
## 632 1.0000000 0.0000000
## 634 0.0000000 1.0000000
## 637 0.0000000 1.0000000
## 638 1.0000000 0.0000000
## 639 1.0000000 0.0000000
## 642 1.0000000 0.0000000
## 643 1.0000000 0.0000000
## 647 1.0000000 0.0000000
## 651 1.0000000 0.0000000
## 652 1.0000000 0.0000000
## 655 1.0000000 0.0000000
## 656 1.0000000 0.0000000
## 657 1.0000000 0.0000000
## 661 1.0000000 0.0000000
## 662 1.0000000 0.0000000
## 664 1.0000000 0.0000000
## 667 1.0000000 0.0000000
## 677 1.0000000 0.0000000
## 681 0.0000000 1.0000000
## 682 0.0000000 1.0000000
## 683 1.0000000 0.0000000
## 684 1.0000000 0.0000000
## 685 1.0000000 0.0000000
PredictROC[,2]
##         2         4         5         8        11        16        20        21 
## 0.8461538 0.8333333 0.0000000 0.0000000 0.0000000 0.4375000 0.0000000 0.2307692 
##        24        26        31        32        34        37        50        53 
## 0.4375000 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 
##        58        59        65        67        68        69        71        73 
## 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 0.4375000 
##        84        87        88        89        97       104       106       107 
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 
##       111       114       115       118       126       132       134       137 
## 0.2307692 1.0000000 0.2307692 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       138       139       145       150       151       167       173       174 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 1.0000000 0.0000000 0.8333333 
##       179       181       183       189       190       193       195       202 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 1.0000000 
##       206       216       219       220       222       223       230       238 
## 1.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.0000000 1.0000000 1.0000000 
##       240       246       248       249       250       256       260       261 
## 1.0000000 0.0000000 1.0000000 0.3636364 0.0000000 1.0000000 1.0000000 1.0000000 
##       262       264       271       275       276       277       281       294 
## 1.0000000 0.8461538 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8461538 
##       295       296       297       300       301       303       304       313 
## 0.0000000 1.0000000 1.0000000 0.3636364 0.8461538 1.0000000 0.0000000 1.0000000 
##       316       317       320       321       324       327       330       333 
## 1.0000000 1.0000000 1.0000000 1.0000000 0.8461538 0.3636364 1.0000000 0.0000000 
##       334       340       347       352       356       360       363       366 
## 0.8461538 1.0000000 0.3636364 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 
##       373       376       377       380       382       384       386       391 
## 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 0.0000000 0.2307692 0.0000000 
##       393       394       400       401       403       407       412       417 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.4375000 0.0000000 0.0000000 1.0000000 
##       425       430       431       434       443       445       446       447 
## 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       456       457       458       461       470       472       474       480 
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 
##       482       485       490       491       494       496       500       509 
## 0.2307692 0.0000000 0.2307692 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 
##       513       515       518       520       527       529       531       534 
## 0.0000000 0.8333333 0.0000000 1.0000000 0.0000000 0.0000000 1.0000000 0.0000000 
##       536       538       541       543       545       546       548       549 
## 0.0000000 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 
##       554       562       563       568       570       572       575       576 
## 0.2307692 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 0.0000000 
##       581       582       583       585       589       593       596       599 
## 0.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.8461538 0.0000000 0.0000000 
##       606       608       614       616       617       618       619       621 
## 1.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       622       623       628       631       632       634       637       638 
## 0.4375000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 
##       639       642       643       647       651       652       655       656 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       657       661       662       664       667       677       681       682 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 
##       683       684       685 
## 0.0000000 0.0000000 0.0000000
pred = prediction(PredictROC[,2], validationData$Class)
perf = performance(pred, "tpr", "fpr")
pred
## An object of class "prediction"
## Slot "predictions":
## [[1]]
##         2         4         5         8        11        16        20        21 
## 0.8461538 0.8333333 0.0000000 0.0000000 0.0000000 0.4375000 0.0000000 0.2307692 
##        24        26        31        32        34        37        50        53 
## 0.4375000 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 
##        58        59        65        67        68        69        71        73 
## 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 0.4375000 
##        84        87        88        89        97       104       106       107 
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 
##       111       114       115       118       126       132       134       137 
## 0.2307692 1.0000000 0.2307692 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       138       139       145       150       151       167       173       174 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 1.0000000 0.0000000 0.8333333 
##       179       181       183       189       190       193       195       202 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 1.0000000 
##       206       216       219       220       222       223       230       238 
## 1.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.0000000 1.0000000 1.0000000 
##       240       246       248       249       250       256       260       261 
## 1.0000000 0.0000000 1.0000000 0.3636364 0.0000000 1.0000000 1.0000000 1.0000000 
##       262       264       271       275       276       277       281       294 
## 1.0000000 0.8461538 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8461538 
##       295       296       297       300       301       303       304       313 
## 0.0000000 1.0000000 1.0000000 0.3636364 0.8461538 1.0000000 0.0000000 1.0000000 
##       316       317       320       321       324       327       330       333 
## 1.0000000 1.0000000 1.0000000 1.0000000 0.8461538 0.3636364 1.0000000 0.0000000 
##       334       340       347       352       356       360       363       366 
## 0.8461538 1.0000000 0.3636364 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 
##       373       376       377       380       382       384       386       391 
## 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 0.0000000 0.2307692 0.0000000 
##       393       394       400       401       403       407       412       417 
## 0.0000000 0.0000000 0.0000000 1.0000000 0.4375000 0.0000000 0.0000000 1.0000000 
##       425       430       431       434       443       445       446       447 
## 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       456       457       458       461       470       472       474       480 
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 
##       482       485       490       491       494       496       500       509 
## 0.2307692 0.0000000 0.2307692 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 
##       513       515       518       520       527       529       531       534 
## 0.0000000 0.8333333 0.0000000 1.0000000 0.0000000 0.0000000 1.0000000 0.0000000 
##       536       538       541       543       545       546       548       549 
## 0.0000000 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 
##       554       562       563       568       570       572       575       576 
## 0.2307692 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 0.0000000 
##       581       582       583       585       589       593       596       599 
## 0.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.8461538 0.0000000 0.0000000 
##       606       608       614       616       617       618       619       621 
## 1.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       622       623       628       631       632       634       637       638 
## 0.4375000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 
##       639       642       643       647       651       652       655       656 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 
##       657       661       662       664       667       677       681       682 
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 
##       683       684       685 
## 0.0000000 0.0000000 0.0000000 
## 
## 
## Slot "labels":
## [[1]]
##   [1] benign    benign    benign    benign    benign    malignant benign   
##   [8] malignant malignant malignant benign    benign    benign    malignant
##  [15] malignant malignant malignant malignant benign    benign    malignant
##  [22] malignant benign    benign    benign    malignant malignant benign   
##  [29] benign    malignant malignant malignant benign    malignant benign   
##  [36] malignant benign    benign    benign    benign    benign    benign   
##  [43] benign    malignant benign    malignant benign    malignant benign   
##  [50] benign    benign    malignant benign    benign    benign    malignant
##  [57] malignant malignant malignant benign    malignant malignant malignant
##  [64] malignant malignant benign    malignant benign    benign    malignant
##  [71] benign    malignant malignant malignant malignant benign    benign   
##  [78] benign    benign    malignant benign    malignant benign    malignant
##  [85] malignant malignant benign    malignant benign    malignant benign   
##  [92] malignant malignant malignant malignant benign    malignant malignant
##  [99] benign    benign    benign    malignant benign    benign    benign   
## [106] benign    benign    benign    malignant benign    benign    benign   
## [113] benign    benign    benign    malignant benign    benign    benign   
## [120] malignant benign    benign    benign    benign    benign    benign   
## [127] benign    benign    malignant malignant malignant benign    benign   
## [134] benign    benign    malignant benign    benign    malignant benign   
## [141] malignant benign    benign    benign    benign    malignant benign   
## [148] malignant benign    benign    malignant benign    benign    benign   
## [155] benign    benign    benign    benign    benign    benign    benign   
## [162] benign    benign    benign    malignant malignant malignant benign   
## [169] benign    malignant malignant benign    malignant malignant benign   
## [176] benign    malignant benign    benign    benign    benign    benign   
## [183] benign    benign    benign    benign    benign    benign    benign   
## [190] malignant malignant benign    benign    benign    benign    benign   
## [197] benign    benign    benign    benign    benign    benign    benign   
## [204] benign    benign    benign    malignant malignant benign    benign   
## [211] benign   
## Levels: benign < malignant
## 
## 
## Slot "cutoffs":
## [[1]]
##                 682       593       583       622       347       614       685 
##       Inf 1.0000000 0.8461538 0.8333333 0.4375000 0.3636364 0.2307692 0.0000000 
## 
## 
## Slot "fp":
## [[1]]
## [1]   0   4   5   6  10  12  20 136
## 
## 
## Slot "tp":
## [[1]]
## [1]  0 52 58 62 65 69 71 75
## 
## 
## Slot "tn":
## [[1]]
## [1] 136 132 131 130 126 124 116   0
## 
## 
## Slot "fn":
## [[1]]
## [1] 75 23 17 13 10  6  4  0
## 
## 
## Slot "n.pos":
## [[1]]
## [1] 75
## 
## 
## Slot "n.neg":
## [[1]]
## [1] 136
## 
## 
## Slot "n.pos.pred":
## [[1]]
## [1]   0  56  63  68  75  81  91 211
## 
## 
## Slot "n.neg.pred":
## [[1]]
## [1] 211 155 148 143 136 130 120   0
perf
## An object of class "performance"
## Slot "x.name":
## [1] "False positive rate"
## 
## Slot "y.name":
## [1] "True positive rate"
## 
## Slot "alpha.name":
## [1] "Cutoff"
## 
## Slot "x.values":
## [[1]]
## [1] 0.00000000 0.02941176 0.03676471 0.04411765 0.07352941 0.08823529 0.14705882
## [8] 1.00000000
## 
## 
## Slot "y.values":
## [[1]]
## [1] 0.0000000 0.6933333 0.7733333 0.8266667 0.8666667 0.9200000 0.9466667
## [8] 1.0000000
## 
## 
## Slot "alpha.values":
## [[1]]
## [1]       Inf 1.0000000 0.8461538 0.8333333 0.4375000 0.3636364 0.2307692
## [8] 0.0000000
plot(perf, colorize = T)

plot(perf, colorize=T, 
     main = "ROC curve",
     ylab = "Sensitivity",
     xlab = "Specificity",
     print.cutoffs.at=seq(0,1,0.3),
     text.adj= c(-0.2,1.7))

# Area Under Curve

auc = as.numeric(performance(pred, "auc")@y.values)
auc = round(auc, 3)
auc
## [1] 0.945