p = seq(0, 1, 0.01)
gini = p * (1 - p) * 2
entropy = -(p * log(p) + (1 - p) * log(1 - p))
class.err = 1 - pmax(p, 1 - p)
matplot(p, cbind(gini, entropy, class.err), col = c("pink", "red", "purple"))
library(ISLR)
attach(Carseats)
set.seed(1)
train = sample(dim(Carseats)[1], dim(Carseats)[1]/2)
Carseats.train = Carseats[train, ]
Carseats.test = Carseats[-train, ]
library(tree)
tree.carseats = tree(Sales ~ ., data = Carseats.train)
summary(tree.carseats)
##
## Regression tree:
## tree(formula = Sales ~ ., data = Carseats.train)
## Variables actually used in tree construction:
## [1] "ShelveLoc" "Price" "Age" "Advertising" "Income"
## [6] "CompPrice"
## Number of terminal nodes: 18
## Residual mean deviance: 2.36 = 429.5 / 182
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.2570 -1.0360 0.1024 0.0000 0.9301 3.9130
plot(tree.carseats)
text(tree.carseats, pretty = 0)
mean((Carseats.test$Sales - pred.carseats)^2)
cv.carseats = cv.tree(tree.carseats, FUN = prune.tree)
par(mfrow = c(1, 2))
plot(cv.carseats$size, cv.carseats$dev, type = "b")
plot(cv.carseats$k, cv.carseats$dev, type = "b")
pruned.carseats = prune.tree(tree.carseats, best = 9)
par(mfrow = c(1, 1))
plot(pruned.carseats)
mean((Carseats.test$Sales - pred.pruned)^2)
library(randomForest)
bag.carseats = randomForest(Sales ~ ., data = Carseats.train, mtry = 10, ntree = 500,
importance = T)
bag.pred = predict(bag.carseats, Carseats.test)
importance(bag.carseats)
## %IncMSE IncNodePurity
## CompPrice 16.9874366 126.852848
## Income 3.8985402 78.314126
## Advertising 16.5698586 123.702901
## Population 0.6487058 62.328851
## Price 55.3976775 514.654890
importance(rf.carseats)
rf.carseats = randomForest(Sales ~ ., data = Carseats.train, mtry = 5, ntree = 500,
importance = T)
rf.pred = predict(rf.carseats, Carseats.test)
mean((Carseats.test$Sales - rf.pred)^2)
## Population 1.131119 82.24483
## Price 46.600559 451.70021
## ShelveLoc 37.352447 278.79756
## Age 19.992113 194.99430
## Education 1.945616 51.70741
library(ISLR)
attach(OJ)
set.seed(1013)
OJ.test = OJ[-train
library(tree)
oj.tree = tree(Purchase ~ ., data = OJ.train)
summary(oj.tree)
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff"
## Number of terminal nodes: 7
## Residual mean deviance: 0.7517 = 596.1 / 793
oj.tree
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
## 1) root 800 1075.00 CH ( 0.60250 0.39750 )
## 2) LoyalCH < 0.5036 359 422.80 MM ( 0.27577 0.72423 )
## 3) LoyalCH > 0.5036 441 343.30 CH ( 0.86848 0.13152 )
## 6) LoyalCH < 0.764572 186 210.30 CH ( 0.74731 0.25269 )
## 12) PriceDiff < -0.165 29 34.16 MM ( 0.27586 0.72414 ) *
## 13) PriceDiff > -0.165 157 140.90 CH ( 0.83439 0.16561 )
## 26) PriceDiff < 0.265 82 95.37 CH ( 0.73171 0.26829 ) *
plot(oj.tree)
text(oj.tree, pretty = 0)
oj.pred = predict(oj.tree, OJ.test, type = "class")
table(OJ.test$Purchase, oj.pred)
cv.oj = cv.tree(oj.tree, FUN = prune.tree)
plot(cv.oj$size, cv.oj$dev, type = "b", xlab = "Tree Size", ylab = "Deviance")
Which tree size corresponds to the lowest cross-validated classification error rate? size 6
Produce a pruned tree corresponding to the optimal tree size obtained using cross-validation. If cross-validation does not lead to selection of a pruned tree, then create a pruned tree with five terminal nodes.
prune.OJ=prune.tree(tree.OJ,best=6)
summary(tree.OJ)
## Classification tree:
## tree(formula = Purchase ~ ., data = OJtrain)
## Variables actually used in tree construction:
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
summary(prune.OJ)
##
## Classification tree:
## snip.tree(tree = tree.OJ, nodes = c(10L, 4L))
## Number of terminal nodes: 6
## Residual mean deviance: 0.7748 = 614.4 / 793
treeOJ.pred = predict(tree.OJ, newdata = OJtest, type = "class")
table(treeOJ.pred, OJtest$Purchase)
pruneOJ.pred = predict(prune.OJ, newdata = OJtest, type = "class")
prunedOJvalerr = (36 + 8) / 270
prunedOJvalerr