p = seq(0, 1, 0.001)
gini.index = 2 * p * (1 - p)
class.error = 1 - pmax(p, 1 - p)
cross.entropy = - (p * log(p) + (1 - p) * log(1 - p))
matplot(p, cbind(gini.index, class.error, cross.entropy), ylab = "gini.index, class.error, cross.entropy", col = c("red", "green", "blue"))
library(ISLR)
library(randomForest)
library(caret)
b1 = Carseats
set.seed(1)
tr_ind = sample(nrow(b1),.8*nrow(b1), replace = F)
b1train = b1[tr_ind,]
b1test = b1[-tr_ind,]
tree.seats = tree::tree(Sales ~ ., data = b1train)
## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
summary(tree.seats)
##
## Regression tree:
## tree::tree(formula = Sales ~ ., data = b1train)
## Variables actually used in tree construction:
## [1] "ShelveLoc" "Price" "Age" "Income" "CompPrice"
## [6] "Advertising"
## Number of terminal nodes: 16
## Residual mean deviance: 2.572 = 781.9 / 304
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.45400 -1.07000 -0.05544 0.00000 1.14500 4.69600
plot(tree.seats)
text(tree.seats, pretty = 0)
treeseat.pred = predict(tree.seats, newdata = b1test)
mean((treeseat.pred - b1test$Sales)^2)
## [1] 4.936081
set.seed(1)
cv.seats = tree::cv.tree(tree.seats)
plot(cv.seats$size, cv.seats$dev, type = "b")
prune.car = tree::prune.tree(tree.seats, best = 10)
plot(prune.car)
text(prune.car,pretty=0)
treeseat.pred = predict(prune.car, newdata = b1test)
mean((treeseat.pred - b1test$Sales)^2)
## [1] 5.088731
In this situation pruning did not improve the MSE of my data set as it went up from 4.93 to 5.08.
set.seed(1)
bag.seats = randomForest(Sales~., data = b1train, mtry = 10, ntree = 551, importance = TRUE)
bagseat.pred = predict(bag.seats, newdata = b1test)
mean((bagseat.pred - b1test$Sales)^2)
## [1] 2.94674
importance(bag.seats)
## %IncMSE IncNodePurity
## CompPrice 40.223153 260.94508
## Income 12.662829 138.23658
## Advertising 23.611579 190.71765
## Population -2.386952 73.81902
## Price 80.660141 744.61378
## ShelveLoc 78.381654 692.52750
## Age 26.637401 232.40567
## Education 2.557387 62.04542
## Urban -2.563052 10.15531
## US 3.486152 12.13078
set.seed(1)
rando.seats = randomForest(Sales~., data = b1train, mtry = 10, importance = TRUE)
randseat.pred = predict(rando.seats, newdata = b1test)
mean((randseat.pred - b1test$Sales)^2)
## [1] 2.945423
importance(rando.seats)
## %IncMSE IncNodePurity
## CompPrice 38.143176 259.77221
## Income 11.839999 138.20846
## Advertising 22.964249 191.25839
## Population -2.309923 74.13451
## Price 76.903940 744.20064
## ShelveLoc 73.841154 692.64875
## Age 25.449768 231.66005
## Education 2.547928 61.58542
## Urban -2.600879 10.15212
## US 3.572899 12.28877
set.seed(1)
train = sample(1:nrow(OJ), 800)
OJtrain = OJ[train, ]
OJtest = OJ[-train, ]
The tree had 5 variables in it with 9 different nodes. The training error rate of my tree was .1588.
tree.OJ = tree::tree(Purchase ~ ., data = OJtrain)
summary(tree.OJ)
##
## Classification tree:
## tree::tree(formula = Purchase ~ ., data = OJtrain)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
plot(tree.OJ)
text(tree.OJ, pretty = 0)
tree.OJ
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 800 1073.00 CH ( 0.60625 0.39375 )
## 2) LoyalCH < 0.5036 365 441.60 MM ( 0.29315 0.70685 )
## 4) LoyalCH < 0.280875 177 140.50 MM ( 0.13559 0.86441 )
## 8) LoyalCH < 0.0356415 59 10.14 MM ( 0.01695 0.98305 ) *
## 9) LoyalCH > 0.0356415 118 116.40 MM ( 0.19492 0.80508 ) *
## 5) LoyalCH > 0.280875 188 258.00 MM ( 0.44149 0.55851 )
## 10) PriceDiff < 0.05 79 84.79 MM ( 0.22785 0.77215 )
## 20) SpecialCH < 0.5 64 51.98 MM ( 0.14062 0.85938 ) *
## 21) SpecialCH > 0.5 15 20.19 CH ( 0.60000 0.40000 ) *
## 11) PriceDiff > 0.05 109 147.00 CH ( 0.59633 0.40367 ) *
## 3) LoyalCH > 0.5036 435 337.90 CH ( 0.86897 0.13103 )
## 6) LoyalCH < 0.764572 174 201.00 CH ( 0.73563 0.26437 )
## 12) ListPriceDiff < 0.235 72 99.81 MM ( 0.50000 0.50000 )
## 24) PctDiscMM < 0.196196 55 73.14 CH ( 0.61818 0.38182 ) *
## 25) PctDiscMM > 0.196196 17 12.32 MM ( 0.11765 0.88235 ) *
## 13) ListPriceDiff > 0.235 102 65.43 CH ( 0.90196 0.09804 ) *
## 7) LoyalCH > 0.764572 261 91.20 CH ( 0.95785 0.04215 ) *
Line 10 has a total of 79 observations. Over 77.21% of the observations take the value of MM and 22.78% take the value of CH.
The most important variables shown are LoyalCH, SpecialCH, PriceDiff, PctDiscMM, and ListPriceDiff.
plot(tree.OJ)
text(tree.OJ)
treeOJ.pred = predict(tree.OJ, newdata = OJtest, type = "class")
table(treeOJ.pred, OJtest$Purchase)
##
## treeOJ.pred CH MM
## CH 160 38
## MM 8 64
(38 + 8) / 270
## [1] 0.1703704
The test error rate .17037.
OJcv = tree::cv.tree(tree.OJ, FUN = prune.misclass)
OJcv
## $size
## [1] 9 8 7 4 2 1
##
## $dev
## [1] 150 150 149 158 172 315
##
## $k
## [1] -Inf 0.000000 3.000000 4.333333 10.500000 151.000000
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
plot(OJcv$size, OJcv$dev, type = "b", xlab = "Tree Size", ylab = "cv classification error rate")
The tree size with 7 nodes corresponds to the lowest error rate.
prune.OJ=tree::prune.tree(tree.OJ,best=7)
The pruned tree has a higher error rate of .1625 than the unpruned tree which had a rate of .1588.
summary(tree.OJ)
##
## Classification tree:
## tree::tree(formula = Purchase ~ ., data = OJtrain)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
summary(prune.OJ)
##
## Classification tree:
## snip.tree(tree = tree.OJ, nodes = c(10L, 4L))
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "ListPriceDiff" "PctDiscMM"
## Number of terminal nodes: 7
## Residual mean deviance: 0.7748 = 614.4 / 793
## Misclassification error rate: 0.1625 = 130 / 800
treeOJ.pred = predict(tree.OJ, newdata = OJtest, type = "class")
table(treeOJ.pred, OJtest$Purchase)
##
## treeOJ.pred CH MM
## CH 160 38
## MM 8 64
unprunedOJvalerr = (38 + 8) / 270
unprunedOJvalerr
## [1] 0.1703704
pruneOJ.pred = predict(prune.OJ, newdata = OJtest, type = "class")
table(pruneOJ.pred, OJtest$Purchase)
##
## pruneOJ.pred CH MM
## CH 160 36
## MM 8 66
prunedOJvalerr = (36 + 8) / 270
prunedOJvalerr
## [1] 0.162963