# Decision Tree Classification on Breast cancer dataset
# Downloading the file
fileURL <- "http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
download.file(fileURL, destfile="breast-cancer-wisconsin.data", method="curl")
# read the data
data <- read.table("breast-cancer-wisconsin.data", na.strings = "?", sep=",")
str(data)
## 'data.frame': 699 obs. of 11 variables:
## $ V1 : int 1000025 1002945 1015425 1016277 1017023 1017122 1018099 1018561 1033078 1033078 ...
## $ V2 : int 5 5 3 6 4 8 1 2 2 4 ...
## $ V3 : int 1 4 1 8 1 10 1 1 1 2 ...
## $ V4 : int 1 4 1 8 1 10 1 2 1 1 ...
## $ V5 : int 1 5 1 1 3 8 1 1 1 1 ...
## $ V6 : int 2 7 2 3 2 7 2 2 2 2 ...
## $ V7 : int 1 10 2 4 1 10 10 1 1 1 ...
## $ V8 : int 3 3 3 3 3 9 3 3 1 2 ...
## $ V9 : int 1 2 1 7 1 7 1 1 1 1 ...
## $ V10: int 1 1 1 1 1 1 1 1 5 1 ...
## $ V11: int 2 2 2 2 2 4 2 2 2 2 ...
# Remove ID column, col = 1
data <- data[,-1]
# Name the columns.
# These names are displayed in the tree to facilitate semantic interpretation
names(data) <- c("ClumpThickness",
"UniformityCellSize",
"UniformityCellShape",
"MarginalAdhesion",
"SingleEpithelialCellSize",
"BareNuclei",
"BlandChromatin",
"NormalNucleoli",
"Mitoses",
"Class")
# Numerical values in the response variable are converted to labels
data$Class <- factor(data$Class, levels=c(2,4), labels=c("benign", "malignant"))
print(summary(data))
## ClumpThickness UniformityCellSize UniformityCellShape MarginalAdhesion
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 4.000 Median : 1.000 Median : 1.000 Median : 1.000
## Mean : 4.418 Mean : 3.134 Mean : 3.207 Mean : 2.807
## 3rd Qu.: 6.000 3rd Qu.: 5.000 3rd Qu.: 5.000 3rd Qu.: 4.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
##
## SingleEpithelialCellSize BareNuclei BlandChromatin NormalNucleoli
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 2.000 1st Qu.: 1.000
## Median : 2.000 Median : 1.000 Median : 3.000 Median : 1.000
## Mean : 3.216 Mean : 3.545 Mean : 3.438 Mean : 2.867
## 3rd Qu.: 4.000 3rd Qu.: 6.000 3rd Qu.: 5.000 3rd Qu.: 4.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## NA's :16
## Mitoses Class
## Min. : 1.000 benign :458
## 1st Qu.: 1.000 malignant:241
## Median : 1.000
## Mean : 1.589
## 3rd Qu.: 1.000
## Max. :10.000
##
#Note that there are 16 missing values in BareNuclei
#Later you will see that there is no imputation of these missing values.
#Investigate how decision trees handle missing values
#Read rpart documentation from this.
#This link has some extra information:
#https://stats.stackexchange.com/questions/96025/how-do-decision-tree-learning-algorithms-deal-with-missing-values-under-the-hoo
# Dividing the dataset into training and validation sets. There are many ways to do this.
# Alternate method is also listed here.
set.seed(123)
ind <- sample(2, nrow(data), replace=TRUE, prob=c(0.7, 0.3))
trainData <- data[ind==1,]
validationData <- data[ind==2,]
table(trainData$Class)
##
## benign malignant
## 322 166
prop.table(table(trainData$Class))
##
## benign malignant
## 0.6598361 0.3401639
library(caTools)
## Warning: package 'caTools' was built under R version 3.6.2
# Alternate method
set.seed(123)
split = sample.split(data$Class, SplitRatio = 0.7)
split
## [1] TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
## [13] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [25] TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE FALSE TRUE FALSE
## [37] TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## [49] FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
## [61] TRUE FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
## [73] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [109] FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] FALSE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## [133] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## [157] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## [169] FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
## [181] FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE FALSE TRUE
## [193] TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE
## [205] TRUE FALSE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE
## [217] TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## [241] TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## [253] TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
## [277] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
## [289] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [325] TRUE FALSE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE
## [337] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## [349] FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
## [361] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
## [373] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [385] FALSE TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
## [397] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [409] TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [421] FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [433] TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE TRUE
## [445] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
## [457] FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
## [469] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [481] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE
## [493] FALSE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## [505] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE
## [517] FALSE TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE
## [529] TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE TRUE
## [541] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [553] FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
## [565] TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
## [577] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [589] TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE TRUE FALSE
## [601] TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE FALSE
## [613] FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## [625] TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE
## [637] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
## [649] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [661] TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [673] TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE FALSE
## [685] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
## [697] TRUE TRUE TRUE
# Create training and testing sets
dataTrain = subset(data, split == TRUE)
dataTest = subset(data, split == FALSE)
# install.packages('rpart")
# install.packages("rpart.plot")
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.2
library(party)
## Warning: package 'party' was built under R version 3.6.2
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.6.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.6.2
# run help on rpart and rpart.control to understand the functions
# Can generate different types of trees with rpart
# Default split is with Gini index
tree = rpart(Class ~ ., data=trainData, method="class")
print(tree)
## n= 488
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 488 166 benign (0.65983607 0.34016393)
## 2) UniformityCellSize< 2.5 303 4 benign (0.98679868 0.01320132) *
## 3) UniformityCellSize>=2.5 185 23 malignant (0.12432432 0.87567568)
## 6) UniformityCellShape< 2.5 13 3 benign (0.76923077 0.23076923) *
## 7) UniformityCellShape>=2.5 172 13 malignant (0.07558140 0.92441860) *
prp(tree)

prp (tree, type = 3)

rpart.plot(tree, extra = 104, nn = TRUE)

plotcp(tree)

# Split with entropy information
entTree = rpart(Class ~ ., data=trainData, method="class", parms=list(split="information"))
prp(entTree)

prp (entTree, type = 3)

rpart.plot(entTree, extra = 104, nn = TRUE)

library(rpart.plot)
plotcp(tree)

# Here we use tree with parameter settings.
# This code generates the tree with training data
tree_with_params = rpart(Class ~ ., data=trainData, method="class", minsplit = 1, minbucket = 10, cp = -1)
prp (tree_with_params)

print(tree_with_params)
## n= 488
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 488 166 benign (0.65983607 0.34016393)
## 2) UniformityCellSize< 2.5 303 4 benign (0.98679868 0.01320132)
## 4) NormalNucleoli< 2.5 292 0 benign (1.00000000 0.00000000) *
## 5) NormalNucleoli>=2.5 11 4 benign (0.63636364 0.36363636) *
## 3) UniformityCellSize>=2.5 185 23 malignant (0.12432432 0.87567568)
## 6) UniformityCellShape< 2.5 13 3 benign (0.76923077 0.23076923) *
## 7) UniformityCellShape>=2.5 172 13 malignant (0.07558140 0.92441860)
## 14) UniformityCellSize< 4.5 44 11 malignant (0.25000000 0.75000000)
## 28) BareNuclei< 3.5 16 7 benign (0.56250000 0.43750000) *
## 29) BareNuclei>=3.5 28 2 malignant (0.07142857 0.92857143)
## 58) MarginalAdhesion>=4.5 13 2 malignant (0.15384615 0.84615385) *
## 59) MarginalAdhesion< 4.5 15 0 malignant (0.00000000 1.00000000) *
## 15) UniformityCellSize>=4.5 128 2 malignant (0.01562500 0.98437500)
## 30) ClumpThickness< 6.5 49 2 malignant (0.04081633 0.95918367)
## 60) ClumpThickness>=5.5 12 2 malignant (0.16666667 0.83333333) *
## 61) ClumpThickness< 5.5 37 0 malignant (0.00000000 1.00000000) *
## 31) ClumpThickness>=6.5 79 0 malignant (0.00000000 1.00000000) *
summary(tree_with_params)
## Call:
## rpart(formula = Class ~ ., data = trainData, method = "class",
## minsplit = 1, minbucket = 10, cp = -1)
## n= 488
##
## CP nsplit rel error xerror xstd
## 1 0.837349398 0 1.0000000 1.0000000 0.06304694
## 2 0.042168675 1 0.1626506 0.1807229 0.03196505
## 3 0.006024096 2 0.1204819 0.1265060 0.02700537
## 4 0.000000000 4 0.1084337 0.1204819 0.02638274
## 5 -1.000000000 8 0.1084337 0.1204819 0.02638274
##
## Variable importance
## UniformityCellSize UniformityCellShape SingleEpithelialCellSize
## 21 18 16
## NormalNucleoli BlandChromatin BareNuclei
## 15 15 14
##
## Node number 1: 488 observations, complexity param=0.8373494
## predicted class=benign expected loss=0.3401639 P(node) =1
## class counts: 322 166
## probabilities: 0.660 0.340
## left son=2 (303 obs) right son=3 (185 obs)
## Primary splits:
## UniformityCellSize < 2.5 to the left, improve=170.8901, (0 missing)
## UniformityCellShape < 2.5 to the left, improve=158.2395, (0 missing)
## BareNuclei < 3.5 to the left, improve=143.0350, (9 missing)
## SingleEpithelialCellSize < 2.5 to the left, improve=136.7937, (0 missing)
## BlandChromatin < 3.5 to the left, improve=135.7491, (0 missing)
## Surrogate splits:
## UniformityCellShape < 2.5 to the left, agree=0.924, adj=0.800, (0 split)
## SingleEpithelialCellSize < 2.5 to the left, agree=0.904, adj=0.746, (0 split)
## NormalNucleoli < 2.5 to the left, agree=0.889, adj=0.708, (0 split)
## BlandChromatin < 3.5 to the left, agree=0.879, adj=0.681, (0 split)
## BareNuclei < 2.5 to the left, agree=0.871, adj=0.659, (0 split)
##
## Node number 2: 303 observations, complexity param=0
## predicted class=benign expected loss=0.01320132 P(node) =0.6209016
## class counts: 299 4
## probabilities: 0.987 0.013
##
## Node number 3: 185 observations, complexity param=0.04216867
## predicted class=malignant expected loss=0.1243243 P(node) =0.3790984
## class counts: 23 162
## probabilities: 0.124 0.876
## left son=6 (13 obs) right son=7 (172 obs)
## Primary splits:
## UniformityCellShape < 2.5 to the left, improve=11.630810, (0 missing)
## BareNuclei < 1.5 to the left, improve=10.654530, (3 missing)
## UniformityCellSize < 4.5 to the left, improve=10.093100, (0 missing)
## BlandChromatin < 2.5 to the left, improve= 7.386963, (0 missing)
## MarginalAdhesion < 2.5 to the left, improve= 5.247007, (0 missing)
## Surrogate splits:
## BlandChromatin < 1.5 to the left, agree=0.941, adj=0.154, (0 split)
##
## Node number 6: 13 observations
## predicted class=benign expected loss=0.2307692 P(node) =0.02663934
## class counts: 10 3
## probabilities: 0.769 0.231
##
## Node number 7: 172 observations, complexity param=0.006024096
## predicted class=malignant expected loss=0.0755814 P(node) =0.352459
## class counts: 13 159
## probabilities: 0.076 0.924
## left son=14 (44 obs) right son=15 (128 obs)
## Primary splits:
## UniformityCellSize < 4.5 to the left, improve=3.597384, (0 missing)
## BareNuclei < 3.5 to the left, improve=2.699434, (3 missing)
## BlandChromatin < 4.5 to the left, improve=2.074169, (0 missing)
## SingleEpithelialCellSize < 2.5 to the left, improve=1.503064, (0 missing)
## UniformityCellShape < 3.5 to the left, improve=1.376989, (0 missing)
## Surrogate splits:
## SingleEpithelialCellSize < 2.5 to the left, agree=0.808, adj=0.250, (0 split)
## UniformityCellShape < 3.5 to the left, agree=0.802, adj=0.227, (0 split)
## MarginalAdhesion < 1.5 to the left, agree=0.762, adj=0.068, (0 split)
## BlandChromatin < 2.5 to the left, agree=0.762, adj=0.068, (0 split)
##
## Node number 14: 44 observations, complexity param=0.006024096
## predicted class=malignant expected loss=0.25 P(node) =0.09016393
## class counts: 11 33
## probabilities: 0.250 0.750
## left son=28 (16 obs) right son=29 (28 obs)
## Primary splits:
## BareNuclei < 3.5 to the left, improve=4.1678850, (1 missing)
## BlandChromatin < 4.5 to the left, improve=2.3034190, (0 missing)
## ClumpThickness < 8.5 to the left, improve=2.0625000, (0 missing)
## MarginalAdhesion < 6 to the left, improve=2.0625000, (0 missing)
## SingleEpithelialCellSize < 4.5 to the left, improve=0.3411911, (0 missing)
## Surrogate splits:
## MarginalAdhesion < 1.5 to the left, agree=0.698, adj=0.133, (1 split)
## ClumpThickness < 5.5 to the left, agree=0.674, adj=0.067, (0 split)
## BlandChromatin < 4.5 to the left, agree=0.674, adj=0.067, (0 split)
## NormalNucleoli < 8.5 to the right, agree=0.674, adj=0.067, (0 split)
##
## Node number 15: 128 observations, complexity param=0
## predicted class=malignant expected loss=0.015625 P(node) =0.2622951
## class counts: 2 126
## probabilities: 0.016 0.984
##
## Node number 28: 16 observations
## predicted class=benign expected loss=0.4375 P(node) =0.03278689
## class counts: 9 7
## probabilities: 0.562 0.438
##
## Node number 29: 28 observations, complexity param=0
## predicted class=malignant expected loss=0.07142857 P(node) =0.05737705
## class counts: 2 26
## probabilities: 0.071 0.929
plot(tree_with_params)
text(tree_with_params)

plotcp(tree_with_params)

# Now we predict and evaluate the performance of the trained tree model
Predict = predict(tree_with_params, validationData)
# Now examine the values of Predict. These are the class probabilities
Predict
## benign malignant
## 2 0.1538462 0.8461538
## 4 0.1666667 0.8333333
## 5 1.0000000 0.0000000
## 8 1.0000000 0.0000000
## 11 1.0000000 0.0000000
## 16 0.5625000 0.4375000
## 20 1.0000000 0.0000000
## 21 0.7692308 0.2307692
## 24 0.5625000 0.4375000
## 26 0.6363636 0.3636364
## 31 1.0000000 0.0000000
## 32 1.0000000 0.0000000
## 34 1.0000000 0.0000000
## 37 0.0000000 1.0000000
## 50 0.0000000 1.0000000
## 53 0.0000000 1.0000000
## 58 0.6363636 0.3636364
## 59 1.0000000 0.0000000
## 65 1.0000000 0.0000000
## 67 1.0000000 0.0000000
## 68 0.0000000 1.0000000
## 69 0.0000000 1.0000000
## 71 1.0000000 0.0000000
## 73 0.5625000 0.4375000
## 84 1.0000000 0.0000000
## 87 0.0000000 1.0000000
## 88 0.0000000 1.0000000
## 89 1.0000000 0.0000000
## 97 1.0000000 0.0000000
## 104 1.0000000 0.0000000
## 106 0.5625000 0.4375000
## 107 0.0000000 1.0000000
## 111 0.7692308 0.2307692
## 114 0.0000000 1.0000000
## 115 0.7692308 0.2307692
## 118 0.0000000 1.0000000
## 126 1.0000000 0.0000000
## 132 1.0000000 0.0000000
## 134 1.0000000 0.0000000
## 137 1.0000000 0.0000000
## 138 1.0000000 0.0000000
## 139 1.0000000 0.0000000
## 145 1.0000000 0.0000000
## 150 0.0000000 1.0000000
## 151 1.0000000 0.0000000
## 167 0.0000000 1.0000000
## 173 1.0000000 0.0000000
## 174 0.1666667 0.8333333
## 179 1.0000000 0.0000000
## 181 1.0000000 0.0000000
## 183 1.0000000 0.0000000
## 189 0.0000000 1.0000000
## 190 1.0000000 0.0000000
## 193 1.0000000 0.0000000
## 195 1.0000000 0.0000000
## 202 0.0000000 1.0000000
## 206 0.0000000 1.0000000
## 216 0.0000000 1.0000000
## 219 0.1666667 0.8333333
## 220 1.0000000 0.0000000
## 222 0.0000000 1.0000000
## 223 1.0000000 0.0000000
## 230 0.0000000 1.0000000
## 238 0.0000000 1.0000000
## 240 0.0000000 1.0000000
## 246 1.0000000 0.0000000
## 248 0.0000000 1.0000000
## 249 0.6363636 0.3636364
## 250 1.0000000 0.0000000
## 256 0.0000000 1.0000000
## 260 0.0000000 1.0000000
## 261 0.0000000 1.0000000
## 262 0.0000000 1.0000000
## 264 0.1538462 0.8461538
## 271 0.0000000 1.0000000
## 275 1.0000000 0.0000000
## 276 1.0000000 0.0000000
## 277 1.0000000 0.0000000
## 281 1.0000000 0.0000000
## 294 0.1538462 0.8461538
## 295 1.0000000 0.0000000
## 296 0.0000000 1.0000000
## 297 0.0000000 1.0000000
## 300 0.6363636 0.3636364
## 301 0.1538462 0.8461538
## 303 0.0000000 1.0000000
## 304 1.0000000 0.0000000
## 313 0.0000000 1.0000000
## 316 0.0000000 1.0000000
## 317 0.0000000 1.0000000
## 320 0.0000000 1.0000000
## 321 0.0000000 1.0000000
## 324 0.1538462 0.8461538
## 327 0.6363636 0.3636364
## 330 0.0000000 1.0000000
## 333 1.0000000 0.0000000
## 334 0.1538462 0.8461538
## 340 0.0000000 1.0000000
## 347 0.6363636 0.3636364
## 352 1.0000000 0.0000000
## 356 1.0000000 0.0000000
## 360 0.0000000 1.0000000
## 363 1.0000000 0.0000000
## 366 1.0000000 0.0000000
## 373 1.0000000 0.0000000
## 376 1.0000000 0.0000000
## 377 1.0000000 0.0000000
## 380 0.5625000 0.4375000
## 382 0.0000000 1.0000000
## 384 1.0000000 0.0000000
## 386 0.7692308 0.2307692
## 391 1.0000000 0.0000000
## 393 1.0000000 0.0000000
## 394 1.0000000 0.0000000
## 400 1.0000000 0.0000000
## 401 0.0000000 1.0000000
## 403 0.5625000 0.4375000
## 407 1.0000000 0.0000000
## 412 1.0000000 0.0000000
## 417 0.0000000 1.0000000
## 425 1.0000000 0.0000000
## 430 1.0000000 0.0000000
## 431 0.7692308 0.2307692
## 434 1.0000000 0.0000000
## 443 1.0000000 0.0000000
## 445 1.0000000 0.0000000
## 446 1.0000000 0.0000000
## 447 1.0000000 0.0000000
## 456 1.0000000 0.0000000
## 457 0.0000000 1.0000000
## 458 0.0000000 1.0000000
## 461 1.0000000 0.0000000
## 470 1.0000000 0.0000000
## 472 1.0000000 0.0000000
## 474 1.0000000 0.0000000
## 480 0.0000000 1.0000000
## 482 0.7692308 0.2307692
## 485 1.0000000 0.0000000
## 490 0.7692308 0.2307692
## 491 1.0000000 0.0000000
## 494 0.0000000 1.0000000
## 496 1.0000000 0.0000000
## 500 1.0000000 0.0000000
## 509 1.0000000 0.0000000
## 513 1.0000000 0.0000000
## 515 0.1666667 0.8333333
## 518 1.0000000 0.0000000
## 520 0.0000000 1.0000000
## 527 1.0000000 0.0000000
## 529 1.0000000 0.0000000
## 531 0.0000000 1.0000000
## 534 1.0000000 0.0000000
## 536 1.0000000 0.0000000
## 538 1.0000000 0.0000000
## 541 1.0000000 0.0000000
## 543 0.7692308 0.2307692
## 545 1.0000000 0.0000000
## 546 1.0000000 0.0000000
## 548 1.0000000 0.0000000
## 549 1.0000000 0.0000000
## 554 0.7692308 0.2307692
## 562 1.0000000 0.0000000
## 563 1.0000000 0.0000000
## 568 1.0000000 0.0000000
## 570 0.0000000 1.0000000
## 572 0.0000000 1.0000000
## 575 0.0000000 1.0000000
## 576 1.0000000 0.0000000
## 581 1.0000000 0.0000000
## 582 0.0000000 1.0000000
## 583 0.1666667 0.8333333
## 585 1.0000000 0.0000000
## 589 0.0000000 1.0000000
## 593 0.1538462 0.8461538
## 596 1.0000000 0.0000000
## 599 1.0000000 0.0000000
## 606 0.0000000 1.0000000
## 608 1.0000000 0.0000000
## 614 0.7692308 0.2307692
## 616 1.0000000 0.0000000
## 617 1.0000000 0.0000000
## 618 1.0000000 0.0000000
## 619 1.0000000 0.0000000
## 621 1.0000000 0.0000000
## 622 0.5625000 0.4375000
## 623 1.0000000 0.0000000
## 628 1.0000000 0.0000000
## 631 1.0000000 0.0000000
## 632 1.0000000 0.0000000
## 634 0.0000000 1.0000000
## 637 0.0000000 1.0000000
## 638 1.0000000 0.0000000
## 639 1.0000000 0.0000000
## 642 1.0000000 0.0000000
## 643 1.0000000 0.0000000
## 647 1.0000000 0.0000000
## 651 1.0000000 0.0000000
## 652 1.0000000 0.0000000
## 655 1.0000000 0.0000000
## 656 1.0000000 0.0000000
## 657 1.0000000 0.0000000
## 661 1.0000000 0.0000000
## 662 1.0000000 0.0000000
## 664 1.0000000 0.0000000
## 667 1.0000000 0.0000000
## 677 1.0000000 0.0000000
## 681 0.0000000 1.0000000
## 682 0.0000000 1.0000000
## 683 1.0000000 0.0000000
## 684 1.0000000 0.0000000
## 685 1.0000000 0.0000000
""
## [1] ""
# pred <= predict (mymodel, dataset, type = 'prob')
# To produce classes only, without the probabilities, run the next command.
# By default threshold is set at 0.5 to produce the classes
""
## [1] ""
Predict = predict(tree_with_params, validationData, type = "class")
Predict
## 2 4 5 8 11 16 20 21
## malignant malignant benign benign benign benign benign benign
## 24 26 31 32 34 37 50 53
## benign benign benign benign benign malignant malignant malignant
## 58 59 65 67 68 69 71 73
## benign benign benign benign malignant malignant benign benign
## 84 87 88 89 97 104 106 107
## benign malignant malignant benign benign benign benign malignant
## 111 114 115 118 126 132 134 137
## benign malignant benign malignant benign benign benign benign
## 138 139 145 150 151 167 173 174
## benign benign benign malignant benign malignant benign malignant
## 179 181 183 189 190 193 195 202
## benign benign benign malignant benign benign benign malignant
## 206 216 219 220 222 223 230 238
## malignant malignant malignant benign malignant benign malignant malignant
## 240 246 248 249 250 256 260 261
## malignant benign malignant benign benign malignant malignant malignant
## 262 264 271 275 276 277 281 294
## malignant malignant malignant benign benign benign benign malignant
## 295 296 297 300 301 303 304 313
## benign malignant malignant benign malignant malignant benign malignant
## 316 317 320 321 324 327 330 333
## malignant malignant malignant malignant malignant benign malignant benign
## 334 340 347 352 356 360 363 366
## malignant malignant benign benign benign malignant benign benign
## 373 376 377 380 382 384 386 391
## benign benign benign benign malignant benign benign benign
## 393 394 400 401 403 407 412 417
## benign benign benign malignant benign benign benign malignant
## 425 430 431 434 443 445 446 447
## benign benign benign benign benign benign benign benign
## 456 457 458 461 470 472 474 480
## benign malignant malignant benign benign benign benign malignant
## 482 485 490 491 494 496 500 509
## benign benign benign benign malignant benign benign benign
## 513 515 518 520 527 529 531 534
## benign malignant benign malignant benign benign malignant benign
## 536 538 541 543 545 546 548 549
## benign benign benign benign benign benign benign benign
## 554 562 563 568 570 572 575 576
## benign benign benign benign malignant malignant malignant benign
## 581 582 583 585 589 593 596 599
## benign malignant malignant benign malignant malignant benign benign
## 606 608 614 616 617 618 619 621
## malignant benign benign benign benign benign benign benign
## 622 623 628 631 632 634 637 638
## benign benign benign benign benign malignant malignant benign
## 639 642 643 647 651 652 655 656
## benign benign benign benign benign benign benign benign
## 657 661 662 664 667 677 681 682
## benign benign benign benign benign benign malignant malignant
## 683 684 685
## benign benign benign
## Levels: benign malignant
# Producing confusion matrix
Confusion_matrix = table(Predict, validationData$Class)
print(Confusion_matrix)
##
## Predict benign malignant
## benign 130 13
## malignant 6 62
(130+62)/(13+6+130+62)
## [1] 0.9099526
# ROC curve
#install.packages("ROCR")
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.6.2
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.6.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
# install.packages("gplots")
# To draw ROC we need to predict the prob values. So we run predict again
# Note that PredictROC is same as Predict with "type = prob"
PredictROC = predict(tree_with_params, validationData)
PredictROC
## benign malignant
## 2 0.1538462 0.8461538
## 4 0.1666667 0.8333333
## 5 1.0000000 0.0000000
## 8 1.0000000 0.0000000
## 11 1.0000000 0.0000000
## 16 0.5625000 0.4375000
## 20 1.0000000 0.0000000
## 21 0.7692308 0.2307692
## 24 0.5625000 0.4375000
## 26 0.6363636 0.3636364
## 31 1.0000000 0.0000000
## 32 1.0000000 0.0000000
## 34 1.0000000 0.0000000
## 37 0.0000000 1.0000000
## 50 0.0000000 1.0000000
## 53 0.0000000 1.0000000
## 58 0.6363636 0.3636364
## 59 1.0000000 0.0000000
## 65 1.0000000 0.0000000
## 67 1.0000000 0.0000000
## 68 0.0000000 1.0000000
## 69 0.0000000 1.0000000
## 71 1.0000000 0.0000000
## 73 0.5625000 0.4375000
## 84 1.0000000 0.0000000
## 87 0.0000000 1.0000000
## 88 0.0000000 1.0000000
## 89 1.0000000 0.0000000
## 97 1.0000000 0.0000000
## 104 1.0000000 0.0000000
## 106 0.5625000 0.4375000
## 107 0.0000000 1.0000000
## 111 0.7692308 0.2307692
## 114 0.0000000 1.0000000
## 115 0.7692308 0.2307692
## 118 0.0000000 1.0000000
## 126 1.0000000 0.0000000
## 132 1.0000000 0.0000000
## 134 1.0000000 0.0000000
## 137 1.0000000 0.0000000
## 138 1.0000000 0.0000000
## 139 1.0000000 0.0000000
## 145 1.0000000 0.0000000
## 150 0.0000000 1.0000000
## 151 1.0000000 0.0000000
## 167 0.0000000 1.0000000
## 173 1.0000000 0.0000000
## 174 0.1666667 0.8333333
## 179 1.0000000 0.0000000
## 181 1.0000000 0.0000000
## 183 1.0000000 0.0000000
## 189 0.0000000 1.0000000
## 190 1.0000000 0.0000000
## 193 1.0000000 0.0000000
## 195 1.0000000 0.0000000
## 202 0.0000000 1.0000000
## 206 0.0000000 1.0000000
## 216 0.0000000 1.0000000
## 219 0.1666667 0.8333333
## 220 1.0000000 0.0000000
## 222 0.0000000 1.0000000
## 223 1.0000000 0.0000000
## 230 0.0000000 1.0000000
## 238 0.0000000 1.0000000
## 240 0.0000000 1.0000000
## 246 1.0000000 0.0000000
## 248 0.0000000 1.0000000
## 249 0.6363636 0.3636364
## 250 1.0000000 0.0000000
## 256 0.0000000 1.0000000
## 260 0.0000000 1.0000000
## 261 0.0000000 1.0000000
## 262 0.0000000 1.0000000
## 264 0.1538462 0.8461538
## 271 0.0000000 1.0000000
## 275 1.0000000 0.0000000
## 276 1.0000000 0.0000000
## 277 1.0000000 0.0000000
## 281 1.0000000 0.0000000
## 294 0.1538462 0.8461538
## 295 1.0000000 0.0000000
## 296 0.0000000 1.0000000
## 297 0.0000000 1.0000000
## 300 0.6363636 0.3636364
## 301 0.1538462 0.8461538
## 303 0.0000000 1.0000000
## 304 1.0000000 0.0000000
## 313 0.0000000 1.0000000
## 316 0.0000000 1.0000000
## 317 0.0000000 1.0000000
## 320 0.0000000 1.0000000
## 321 0.0000000 1.0000000
## 324 0.1538462 0.8461538
## 327 0.6363636 0.3636364
## 330 0.0000000 1.0000000
## 333 1.0000000 0.0000000
## 334 0.1538462 0.8461538
## 340 0.0000000 1.0000000
## 347 0.6363636 0.3636364
## 352 1.0000000 0.0000000
## 356 1.0000000 0.0000000
## 360 0.0000000 1.0000000
## 363 1.0000000 0.0000000
## 366 1.0000000 0.0000000
## 373 1.0000000 0.0000000
## 376 1.0000000 0.0000000
## 377 1.0000000 0.0000000
## 380 0.5625000 0.4375000
## 382 0.0000000 1.0000000
## 384 1.0000000 0.0000000
## 386 0.7692308 0.2307692
## 391 1.0000000 0.0000000
## 393 1.0000000 0.0000000
## 394 1.0000000 0.0000000
## 400 1.0000000 0.0000000
## 401 0.0000000 1.0000000
## 403 0.5625000 0.4375000
## 407 1.0000000 0.0000000
## 412 1.0000000 0.0000000
## 417 0.0000000 1.0000000
## 425 1.0000000 0.0000000
## 430 1.0000000 0.0000000
## 431 0.7692308 0.2307692
## 434 1.0000000 0.0000000
## 443 1.0000000 0.0000000
## 445 1.0000000 0.0000000
## 446 1.0000000 0.0000000
## 447 1.0000000 0.0000000
## 456 1.0000000 0.0000000
## 457 0.0000000 1.0000000
## 458 0.0000000 1.0000000
## 461 1.0000000 0.0000000
## 470 1.0000000 0.0000000
## 472 1.0000000 0.0000000
## 474 1.0000000 0.0000000
## 480 0.0000000 1.0000000
## 482 0.7692308 0.2307692
## 485 1.0000000 0.0000000
## 490 0.7692308 0.2307692
## 491 1.0000000 0.0000000
## 494 0.0000000 1.0000000
## 496 1.0000000 0.0000000
## 500 1.0000000 0.0000000
## 509 1.0000000 0.0000000
## 513 1.0000000 0.0000000
## 515 0.1666667 0.8333333
## 518 1.0000000 0.0000000
## 520 0.0000000 1.0000000
## 527 1.0000000 0.0000000
## 529 1.0000000 0.0000000
## 531 0.0000000 1.0000000
## 534 1.0000000 0.0000000
## 536 1.0000000 0.0000000
## 538 1.0000000 0.0000000
## 541 1.0000000 0.0000000
## 543 0.7692308 0.2307692
## 545 1.0000000 0.0000000
## 546 1.0000000 0.0000000
## 548 1.0000000 0.0000000
## 549 1.0000000 0.0000000
## 554 0.7692308 0.2307692
## 562 1.0000000 0.0000000
## 563 1.0000000 0.0000000
## 568 1.0000000 0.0000000
## 570 0.0000000 1.0000000
## 572 0.0000000 1.0000000
## 575 0.0000000 1.0000000
## 576 1.0000000 0.0000000
## 581 1.0000000 0.0000000
## 582 0.0000000 1.0000000
## 583 0.1666667 0.8333333
## 585 1.0000000 0.0000000
## 589 0.0000000 1.0000000
## 593 0.1538462 0.8461538
## 596 1.0000000 0.0000000
## 599 1.0000000 0.0000000
## 606 0.0000000 1.0000000
## 608 1.0000000 0.0000000
## 614 0.7692308 0.2307692
## 616 1.0000000 0.0000000
## 617 1.0000000 0.0000000
## 618 1.0000000 0.0000000
## 619 1.0000000 0.0000000
## 621 1.0000000 0.0000000
## 622 0.5625000 0.4375000
## 623 1.0000000 0.0000000
## 628 1.0000000 0.0000000
## 631 1.0000000 0.0000000
## 632 1.0000000 0.0000000
## 634 0.0000000 1.0000000
## 637 0.0000000 1.0000000
## 638 1.0000000 0.0000000
## 639 1.0000000 0.0000000
## 642 1.0000000 0.0000000
## 643 1.0000000 0.0000000
## 647 1.0000000 0.0000000
## 651 1.0000000 0.0000000
## 652 1.0000000 0.0000000
## 655 1.0000000 0.0000000
## 656 1.0000000 0.0000000
## 657 1.0000000 0.0000000
## 661 1.0000000 0.0000000
## 662 1.0000000 0.0000000
## 664 1.0000000 0.0000000
## 667 1.0000000 0.0000000
## 677 1.0000000 0.0000000
## 681 0.0000000 1.0000000
## 682 0.0000000 1.0000000
## 683 1.0000000 0.0000000
## 684 1.0000000 0.0000000
## 685 1.0000000 0.0000000
PredictROC[,2]
## 2 4 5 8 11 16 20 21
## 0.8461538 0.8333333 0.0000000 0.0000000 0.0000000 0.4375000 0.0000000 0.2307692
## 24 26 31 32 34 37 50 53
## 0.4375000 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000
## 58 59 65 67 68 69 71 73
## 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 0.4375000
## 84 87 88 89 97 104 106 107
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000
## 111 114 115 118 126 132 134 137
## 0.2307692 1.0000000 0.2307692 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 138 139 145 150 151 167 173 174
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 1.0000000 0.0000000 0.8333333
## 179 181 183 189 190 193 195 202
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 1.0000000
## 206 216 219 220 222 223 230 238
## 1.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.0000000 1.0000000 1.0000000
## 240 246 248 249 250 256 260 261
## 1.0000000 0.0000000 1.0000000 0.3636364 0.0000000 1.0000000 1.0000000 1.0000000
## 262 264 271 275 276 277 281 294
## 1.0000000 0.8461538 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8461538
## 295 296 297 300 301 303 304 313
## 0.0000000 1.0000000 1.0000000 0.3636364 0.8461538 1.0000000 0.0000000 1.0000000
## 316 317 320 321 324 327 330 333
## 1.0000000 1.0000000 1.0000000 1.0000000 0.8461538 0.3636364 1.0000000 0.0000000
## 334 340 347 352 356 360 363 366
## 0.8461538 1.0000000 0.3636364 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000
## 373 376 377 380 382 384 386 391
## 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 0.0000000 0.2307692 0.0000000
## 393 394 400 401 403 407 412 417
## 0.0000000 0.0000000 0.0000000 1.0000000 0.4375000 0.0000000 0.0000000 1.0000000
## 425 430 431 434 443 445 446 447
## 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 456 457 458 461 470 472 474 480
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000
## 482 485 490 491 494 496 500 509
## 0.2307692 0.0000000 0.2307692 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000
## 513 515 518 520 527 529 531 534
## 0.0000000 0.8333333 0.0000000 1.0000000 0.0000000 0.0000000 1.0000000 0.0000000
## 536 538 541 543 545 546 548 549
## 0.0000000 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000
## 554 562 563 568 570 572 575 576
## 0.2307692 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 0.0000000
## 581 582 583 585 589 593 596 599
## 0.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.8461538 0.0000000 0.0000000
## 606 608 614 616 617 618 619 621
## 1.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 622 623 628 631 632 634 637 638
## 0.4375000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000
## 639 642 643 647 651 652 655 656
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 657 661 662 664 667 677 681 682
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000
## 683 684 685
## 0.0000000 0.0000000 0.0000000
pred = prediction(PredictROC[,2], validationData$Class)
perf = performance(pred, "tpr", "fpr")
pred
## An object of class "prediction"
## Slot "predictions":
## [[1]]
## 2 4 5 8 11 16 20 21
## 0.8461538 0.8333333 0.0000000 0.0000000 0.0000000 0.4375000 0.0000000 0.2307692
## 24 26 31 32 34 37 50 53
## 0.4375000 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000
## 58 59 65 67 68 69 71 73
## 0.3636364 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000 0.4375000
## 84 87 88 89 97 104 106 107
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000
## 111 114 115 118 126 132 134 137
## 0.2307692 1.0000000 0.2307692 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 138 139 145 150 151 167 173 174
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 1.0000000 0.0000000 0.8333333
## 179 181 183 189 190 193 195 202
## 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000 1.0000000
## 206 216 219 220 222 223 230 238
## 1.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.0000000 1.0000000 1.0000000
## 240 246 248 249 250 256 260 261
## 1.0000000 0.0000000 1.0000000 0.3636364 0.0000000 1.0000000 1.0000000 1.0000000
## 262 264 271 275 276 277 281 294
## 1.0000000 0.8461538 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.8461538
## 295 296 297 300 301 303 304 313
## 0.0000000 1.0000000 1.0000000 0.3636364 0.8461538 1.0000000 0.0000000 1.0000000
## 316 317 320 321 324 327 330 333
## 1.0000000 1.0000000 1.0000000 1.0000000 0.8461538 0.3636364 1.0000000 0.0000000
## 334 340 347 352 356 360 363 366
## 0.8461538 1.0000000 0.3636364 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000
## 373 376 377 380 382 384 386 391
## 0.0000000 0.0000000 0.0000000 0.4375000 1.0000000 0.0000000 0.2307692 0.0000000
## 393 394 400 401 403 407 412 417
## 0.0000000 0.0000000 0.0000000 1.0000000 0.4375000 0.0000000 0.0000000 1.0000000
## 425 430 431 434 443 445 446 447
## 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 456 457 458 461 470 472 474 480
## 0.0000000 1.0000000 1.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000
## 482 485 490 491 494 496 500 509
## 0.2307692 0.0000000 0.2307692 0.0000000 1.0000000 0.0000000 0.0000000 0.0000000
## 513 515 518 520 527 529 531 534
## 0.0000000 0.8333333 0.0000000 1.0000000 0.0000000 0.0000000 1.0000000 0.0000000
## 536 538 541 543 545 546 548 549
## 0.0000000 0.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000
## 554 562 563 568 570 572 575 576
## 0.2307692 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 1.0000000 0.0000000
## 581 582 583 585 589 593 596 599
## 0.0000000 1.0000000 0.8333333 0.0000000 1.0000000 0.8461538 0.0000000 0.0000000
## 606 608 614 616 617 618 619 621
## 1.0000000 0.0000000 0.2307692 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 622 623 628 631 632 634 637 638
## 0.4375000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000 0.0000000
## 639 642 643 647 651 652 655 656
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## 657 661 662 664 667 677 681 682
## 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 1.0000000 1.0000000
## 683 684 685
## 0.0000000 0.0000000 0.0000000
##
##
## Slot "labels":
## [[1]]
## [1] benign benign benign benign benign malignant benign
## [8] malignant malignant malignant benign benign benign malignant
## [15] malignant malignant malignant malignant benign benign malignant
## [22] malignant benign benign benign malignant malignant benign
## [29] benign malignant malignant malignant benign malignant benign
## [36] malignant benign benign benign benign benign benign
## [43] benign malignant benign malignant benign malignant benign
## [50] benign benign malignant benign benign benign malignant
## [57] malignant malignant malignant benign malignant malignant malignant
## [64] malignant malignant benign malignant benign benign malignant
## [71] benign malignant malignant malignant malignant benign benign
## [78] benign benign malignant benign malignant benign malignant
## [85] malignant malignant benign malignant benign malignant benign
## [92] malignant malignant malignant malignant benign malignant malignant
## [99] benign benign benign malignant benign benign benign
## [106] benign benign benign malignant benign benign benign
## [113] benign benign benign malignant benign benign benign
## [120] malignant benign benign benign benign benign benign
## [127] benign benign malignant malignant malignant benign benign
## [134] benign benign malignant benign benign malignant benign
## [141] malignant benign benign benign benign malignant benign
## [148] malignant benign benign malignant benign benign benign
## [155] benign benign benign benign benign benign benign
## [162] benign benign benign malignant malignant malignant benign
## [169] benign malignant malignant benign malignant malignant benign
## [176] benign malignant benign benign benign benign benign
## [183] benign benign benign benign benign benign benign
## [190] malignant malignant benign benign benign benign benign
## [197] benign benign benign benign benign benign benign
## [204] benign benign benign malignant malignant benign benign
## [211] benign
## Levels: benign < malignant
##
##
## Slot "cutoffs":
## [[1]]
## 682 593 583 622 347 614 685
## Inf 1.0000000 0.8461538 0.8333333 0.4375000 0.3636364 0.2307692 0.0000000
##
##
## Slot "fp":
## [[1]]
## [1] 0 4 5 6 10 12 20 136
##
##
## Slot "tp":
## [[1]]
## [1] 0 52 58 62 65 69 71 75
##
##
## Slot "tn":
## [[1]]
## [1] 136 132 131 130 126 124 116 0
##
##
## Slot "fn":
## [[1]]
## [1] 75 23 17 13 10 6 4 0
##
##
## Slot "n.pos":
## [[1]]
## [1] 75
##
##
## Slot "n.neg":
## [[1]]
## [1] 136
##
##
## Slot "n.pos.pred":
## [[1]]
## [1] 0 56 63 68 75 81 91 211
##
##
## Slot "n.neg.pred":
## [[1]]
## [1] 211 155 148 143 136 130 120 0
perf
## An object of class "performance"
## Slot "x.name":
## [1] "False positive rate"
##
## Slot "y.name":
## [1] "True positive rate"
##
## Slot "alpha.name":
## [1] "Cutoff"
##
## Slot "x.values":
## [[1]]
## [1] 0.00000000 0.02941176 0.03676471 0.04411765 0.07352941 0.08823529 0.14705882
## [8] 1.00000000
##
##
## Slot "y.values":
## [[1]]
## [1] 0.0000000 0.6933333 0.7733333 0.8266667 0.8666667 0.9200000 0.9466667
## [8] 1.0000000
##
##
## Slot "alpha.values":
## [[1]]
## [1] Inf 1.0000000 0.8461538 0.8333333 0.4375000 0.3636364 0.2307692
## [8] 0.0000000
plot(perf, colorize = T)

plot(perf, colorize=T,
main = "ROC curve",
ylab = "Sensitivity",
xlab = "Specificity",
print.cutoffs.at=seq(0,1,0.3),
text.adj= c(-0.2,1.7))

# Area Under Curve
auc = as.numeric(performance(pred, "auc")@y.values)
auc = round(auc, 3)
auc
## [1] 0.945