R.p <- seq(0, 1, 0.01)
gini.index <- (1-p) * 2*p
classification.error <- 1-pmax(p, 1-p)
entropy <- -(p*log(p) + (1-p)*log(1-p))
par(bg = '#006666')
plot(NA, NA, xlim = c(0,1), ylim = c(0,1), xlab = 'p', ylab = 'f')
lines(p, gini.index, type = 'l', col = '#FF33CC')
lines(p, classification.error, col = 'darkslategray1')
lines(p, entropy, col = 'chartreuse')
OJ data set which is
part of the ISLR package.set.seed(2)
set.training.oj <- sample(nrow(OJ), nrow(OJ)*0.66)
training.oj <- OJ[set.training.oj,]
test.oj <- OJ[-set.training.oj,]
Purchase as the response and the other variables as
predictors. Use the summary() function to produce summary
statistics about the tree, and describe the results obtained. What is
the training error rate? How many terminal nodes does the tree
have?tree.oj <- tree(Purchase ~ ., data = training.oj,)
summary(tree.oj)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = training.oj)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff"
## Number of terminal nodes: 6
## Residual mean deviance: 0.7523 = 526.6 / 700
## Misclassification error rate: 0.1728 = 122 / 706
tree.oj
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 706 947.50 CH ( 0.60482 0.39518 )
## 2) LoyalCH < 0.5036 323 382.20 MM ( 0.27864 0.72136 )
## 4) LoyalCH < 0.280875 154 115.10 MM ( 0.12338 0.87662 ) *
## 5) LoyalCH > 0.280875 169 230.00 MM ( 0.42012 0.57988 )
## 10) PriceDiff < 0.05 64 64.60 MM ( 0.20312 0.79688 ) *
## 11) PriceDiff > 0.05 105 144.40 CH ( 0.55238 0.44762 ) *
## 3) LoyalCH > 0.5036 383 281.20 CH ( 0.87990 0.12010 )
## 6) LoyalCH < 0.737888 151 176.60 CH ( 0.72848 0.27152 )
## 12) PriceDiff < 0.015 47 64.96 MM ( 0.46809 0.53191 ) *
## 13) PriceDiff > 0.015 104 89.30 CH ( 0.84615 0.15385 ) *
## 7) LoyalCH > 0.737888 232 48.26 CH ( 0.97845 0.02155 ) *
There are 6 terminal nodes.
LoyalCH and PriceDiff are the main important variables.
There are 13 nodes in total.
plot(tree.oj)
text(tree.oj, pretty =0)
There are 2 major important variables in this dataset. LoyalCH, then PriceDiff.
pred.tree.oj <- predict(tree.oj, test.oj, type = 'class')
table(test.oj$Purchase, pred.tree.oj, dnn = c('Actual', 'Predicted'))
## Predicted
## Actual CH MM
## CH 196 30
## MM 39 99
mse.pred.oj <- mean(pred.tree.oj != test.oj$Purchase)
mse.pred.oj
## [1] 0.1895604
The MSE is approximately 0.19, or 19%.
cv.tree() function to the
training set in order to determine the optimal tree size.cv.tree.oj <- cv.tree(tree.oj, FUN = prune.tree)
cv.tree.oj
## $size
## [1] 6 5 4 3 2 1
##
## $dev
## [1] 590.2541 599.2843 598.8823 615.9539 668.4509 952.7208
##
## $k
## [1] -Inf 20.94305 22.33652 37.19361 56.35952 284.02935
##
## $method
## [1] "deviance"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
The optimal tree size appears to be 6 nodes.
plot(cv.tree.oj$size, cv.tree.oj$dev, type = "b", xlab = "Tree Size", ylab = "Classification Error")
6 notes appears to have the lowest error rate, but there is not much difference between the error rate of 4 and 6 nodes.
prune.oj <- prune.tree(tree.oj, best = 5)
Since the result is not a pruned tree, we will use 5 nodes for comparison.
summary(prune.oj)
##
## Classification tree:
## snip.tree(tree = tree.oj, nodes = 5L)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff"
## Number of terminal nodes: 5
## Residual mean deviance: 0.7811 = 547.5 / 701
## Misclassification error rate: 0.1884 = 133 / 706
summary(tree.oj)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = training.oj)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff"
## Number of terminal nodes: 6
## Residual mean deviance: 0.7523 = 526.6 / 700
## Misclassification error rate: 0.1728 = 122 / 706
The training error increases from 0.1728 to 0.1884, or approximately from 17% to 19%.
pred.oj.prune <- predict(prune.oj, test.oj, type = 'class')
mse.pred.oj.prune <- mean(pred.oj.prune != test.oj$Purchase)
print("MSE of Pruned Tree")
## [1] "MSE of Pruned Tree"
mse.pred.oj.prune
## [1] 0.2225275
print("MSE of Original Tree")
## [1] "MSE of Original Tree"
mse.pred.oj
## [1] 0.1895604
The Test Error increases to approximately 0.22, or 22%, from approximately 19%.
detach(OJ)