p = seq(0, 1, 0.01)
gini = p * (1 - p) * 2
ent = -(p * log(p) + (1 - p) * log(1 - p))
class.err = 1 - pmax(p, 1 - p)
matplot(p, cbind(gini, ent, class.err), col = c("red", "green", "blue"))
attach(OJ)
set.seed(1)
row.number1 <- sample(1:nrow(OJ), 800)
OJ_train <- OJ[row.number1, ]
OJ_test <- OJ[-row.number1, ]
OJ.tree <- tree(Purchase ~ ., data = OJ_train)
summary(OJ.tree)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ_train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
Terminal nodes = 9 training error rate = 0.1588. “PriceDiff”, “SpecialCH”, “LoyalCH”, “ListPriceDiff”, and “PctDiscMM” as the variables in the model.
OJ.tree
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 800 1073.00 CH ( 0.60625 0.39375 )
## 2) LoyalCH < 0.5036 365 441.60 MM ( 0.29315 0.70685 )
## 4) LoyalCH < 0.280875 177 140.50 MM ( 0.13559 0.86441 )
## 8) LoyalCH < 0.0356415 59 10.14 MM ( 0.01695 0.98305 ) *
## 9) LoyalCH > 0.0356415 118 116.40 MM ( 0.19492 0.80508 ) *
## 5) LoyalCH > 0.280875 188 258.00 MM ( 0.44149 0.55851 )
## 10) PriceDiff < 0.05 79 84.79 MM ( 0.22785 0.77215 )
## 20) SpecialCH < 0.5 64 51.98 MM ( 0.14062 0.85938 ) *
## 21) SpecialCH > 0.5 15 20.19 CH ( 0.60000 0.40000 ) *
## 11) PriceDiff > 0.05 109 147.00 CH ( 0.59633 0.40367 ) *
## 3) LoyalCH > 0.5036 435 337.90 CH ( 0.86897 0.13103 )
## 6) LoyalCH < 0.764572 174 201.00 CH ( 0.73563 0.26437 )
## 12) ListPriceDiff < 0.235 72 99.81 MM ( 0.50000 0.50000 )
## 24) PctDiscMM < 0.196197 55 73.14 CH ( 0.61818 0.38182 ) *
## 25) PctDiscMM > 0.196197 17 12.32 MM ( 0.11765 0.88235 ) *
## 13) ListPriceDiff > 0.235 102 65.43 CH ( 0.90196 0.09804 ) *
## 7) LoyalCH > 0.764572 261 91.20 CH ( 0.95785 0.04215 ) *
There are 109 observations in the leaf with the residual variance of 147 Selecting node 11 PriceDiff, the node splits for when PriceDiff>0.05 The overall prediction is CH = 59.63% taking CH value
The overall prediction is CH = 40.37% taking the MM value
plot(OJ.tree)
text(OJ.tree, pretty = 0)
Most important indicator of Purchase is “LoyalCH” (top 3 nodes contain “LoyalCH”)
set.seed(1)
OJ.preds1<-predict(OJ.tree,newdata = OJ_test,type = "class")
caret::confusionMatrix(OJ.preds1, OJ_test$Purchase)
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 160 38
## MM 8 64
##
## Accuracy : 0.8296
## 95% CI : (0.7794, 0.8725)
## No Information Rate : 0.6222
## P-Value [Acc > NIR] : 8.077e-14
##
## Kappa : 0.6154
##
## Mcnemar's Test P-Value : 1.904e-05
##
## Sensitivity : 0.9524
## Specificity : 0.6275
## Pos Pred Value : 0.8081
## Neg Pred Value : 0.8889
## Prevalence : 0.6222
## Detection Rate : 0.5926
## Detection Prevalence : 0.7333
## Balanced Accuracy : 0.7899
##
## 'Positive' Class : CH
##
OJ.te<-(8+38)/nrow(OJ_test)
OJ.te
## [1] 0.1703704
Test error rate = 17.04%.
OJ.tree.cv<-cv.tree(OJ.tree,FUN = prune.misclass)
OJ.tree.cv
## $size
## [1] 9 8 7 4 2 1
##
## $dev
## [1] 145 145 146 146 167 315
##
## $k
## [1] -Inf 0.000000 3.000000 4.333333 10.500000 151.000000
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
plot(OJ.tree.cv$size, OJ.tree.cv$dev, type = "b", xlab = "Tree size", ylab = "CV Deviance")
points(4,min(OJ.tree.cv$dev),col="red")
tree size = 4.
OJ.prune <- prune.misclass(OJ.tree, best = 4)
plot(OJ.prune)
text(OJ.prune, pretty = 0)
summary(OJ.prune)
##
## Classification tree:
## snip.tree(tree = OJ.tree, nodes = c(4L, 10L, 3L))
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff"
## Number of terminal nodes: 4
## Residual mean deviance: 0.8922 = 710.2 / 796
## Misclassification error rate: 0.1788 = 143 / 800
summary(OJ.tree)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ_train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
Misclassification error rate of the pruned tree = 0.1788 This is higher than the original tree at 0.1588.
set.seed(1)
OJ.prune.preds6<-predict(OJ.prune,newdata = OJ_test,type = "class")
caret::confusionMatrix(OJ.prune.preds6, OJ_test$Purchase)
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 161 41
## MM 7 61
##
## Accuracy : 0.8222
## 95% CI : (0.7713, 0.8659)
## No Information Rate : 0.6222
## P-Value [Acc > NIR] : 6.769e-13
##
## Kappa : 0.5954
##
## Mcnemar's Test P-Value : 1.906e-06
##
## Sensitivity : 0.9583
## Specificity : 0.5980
## Pos Pred Value : 0.7970
## Neg Pred Value : 0.8971
## Prevalence : 0.6222
## Detection Rate : 0.5963
## Detection Prevalence : 0.7481
## Balanced Accuracy : 0.7782
##
## 'Positive' Class : CH
##
OJ.prune.te<-(7+41)/nrow(OJ_test)
OJ.prune.te
## [1] 0.1777778
OJ.te
## [1] 0.1703704
The test error rate slightly increased with the pruned tree Test error rate = 17.78% Unpruned tree had a test error rate of 17.03%