Chapter 08 (page 332): 3, 8, 9

(3)

err = seq(0.0, 1.0, .01)
m = 1 - err

err.fun=function(a){
  b = 1 - a
  return(1-max(a,b))
}

class.error = sapply(err, err.fun)
gini = ( ( err * (1 - err) ) + ( m * (1 - m) ) )
entropy = ( - ( (err * log2(err) ) + ( m * log2(m) ) ) )

plot(err, entropy, typ = "l", xlab ="", ylab = "")
lines(err, gini, col = "blue")
lines(err, class.error, col = "red")

(8)

(a) Train and test dataset

library(ISLR)
set.seed(42)

train = sample(nrow(Carseats), .8*(nrow(Carseats)) )
car.train = Carseats[train,]
car.test = Carseats[-train,]

(b) Tree model

library(tree)
set.seed(42)

car.tree = tree(Sales ~., car.train)
plot(car.tree)
text(car.tree,pretty=0)

tree.pred = predict(car.tree, newdata = car.test)

mse = mean((tree.pred - car.test$Sales)^2)

mse
## [1] 3.906465

MSE value is 3.906

(c) Tree Model with pruning

cv.car = cv.tree(car.tree)

plot(cv.car$size, cv.car$dev, type = "b")

prune.car = prune.tree(car.tree, best = 5)
plot(prune.car)
text(prune.car, pretty = 0)

tree.pred = predict(prune.car, newdata = car.test)

mse1 = mean((tree.pred - car.test$Sales)^2)

mse1
## [1] 5.084432

We observe that when we do pruning the tree it increases MSE value. ### (d) Random forest

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
set.seed(42)

bag.car = randomForest(Sales ~., car.train, importance = TRUE, ntree = 500, mtry = 10)

tree.pred = predict(bag.car, newdata = car.test)

mse2 = mean((tree.pred - car.test$Sales)^2)

mse2
## [1] 2.247194
importance(bag.car)
##                %IncMSE IncNodePurity
## CompPrice   37.2655918    272.684989
## Income      11.3481020    139.099550
## Advertising 24.7312015    187.974015
## Population  -0.8309134     84.799869
## Price       74.6565917    742.426277
## ShelveLoc   76.1027278    688.088937
## Age         22.3794426    239.876755
## Education    2.8933345     64.433523
## Urban        0.3376574      9.737118
## US           1.6649153     10.587431

To use the bagging method, we have m = p. We see MSE value from the model as 2.2472, which is less than previous mse values. Also we significant variables are CompPrice, Income and Advertising.

(e) Random Forest

rf.car = randomForest(Sales ~., car.train, importance = TRUE, ntree = 500, mtry = 3)

tree.pred = predict(rf.car, newdata = car.test)

mse3 = mean((tree.pred - car.test$Sales)^2)
mse3
## [1] 2.835609
importance(rf.car)
##                %IncMSE IncNodePurity
## CompPrice   16.5827316     225.67910
## Income       6.3862954     193.16698
## Advertising 17.5873572     218.29447
## Population   1.4117218     155.91267
## Price       44.9860497     581.88181
## ShelveLoc   49.9640240     538.27366
## Age         15.2400202     271.29274
## Education    0.8508437     111.00186
## Urban       -2.1433634      22.38776
## US           3.8134550      34.28175

In this random forest model we have m equal to sqrt(p). MSE values increases compared to b) and c); with bagging method it is lower.

9

(a)

OJ.train = OJ[-train,]
OJ.test = OJ[train,]

(b)

set.seed(42)
OJ.tree = tree(Purchase ~., data = OJ.train)
summary(OJ.tree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH"     "SalePriceMM" "PriceDiff"  
## Number of terminal nodes:  8 
## Residual mean deviance:  0.7681 = 569.9 / 742 
## Misclassification error rate: 0.1627 = 122 / 750

Classification tree has 8 terminal nodes and error rate is 16.27%

(c)

OJ.tree
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 750 1025.00 CH ( 0.57067 0.42933 )  
##    2) LoyalCH < 0.48285 310  331.20 MM ( 0.22581 0.77419 )  
##      4) LoyalCH < 0.276142 180  125.60 MM ( 0.11111 0.88889 )  
##        8) LoyalCH < 0.0356415 60   10.17 MM ( 0.01667 0.98333 ) *
##        9) LoyalCH > 0.0356415 120  104.90 MM ( 0.15833 0.84167 ) *
##      5) LoyalCH > 0.276142 130  173.20 MM ( 0.38462 0.61538 )  
##       10) SalePriceMM < 2.04 75   80.28 MM ( 0.22667 0.77333 ) *
##       11) SalePriceMM > 2.04 55   74.03 CH ( 0.60000 0.40000 ) *
##    3) LoyalCH > 0.48285 440  423.20 CH ( 0.81364 0.18636 )  
##      6) LoyalCH < 0.764572 245  301.80 CH ( 0.69388 0.30612 )  
##       12) PriceDiff < 0.265 143  197.40 CH ( 0.53846 0.46154 )  
##         24) PriceDiff < -0.165 37   41.05 MM ( 0.24324 0.75676 ) *
##         25) PriceDiff > -0.165 106  138.30 CH ( 0.64151 0.35849 ) *
##       13) PriceDiff > 0.265 102   60.88 CH ( 0.91176 0.08824 ) *
##      7) LoyalCH > 0.764572 195   60.32 CH ( 0.96410 0.03590 ) *
plot(OJ.tree)
text(OJ.tree,pretty=0)

We have noted the node categorized as 25. It has criteria of PriceDiff > -0.165, number of observations 106 with deviance of 138.30. Once observation moves to this node 25, the prediction will be CH and 64.15% observations in this node are CH.

(d)

plot(OJ.tree)
text(OJ.tree,pretty=0)

1st and 2nd variables used to create tree are Loyal CH and it could be pointers to why they are important variables in this model

(e)

set.seed(42)
OJ.pred = predict(OJ.tree, newdata = OJ.test, type = "class")
table(OJ.test$Purchase, OJ.pred)
##     OJ.pred
##       CH  MM
##   CH 209  16
##   MM  36  59
error = 1 - (209+59)/320
error
## [1] 0.1625

Missclassification error rate is 16.25%

(f)

set.seed(42)
cv.OJ = cv.tree(OJ.tree, FUN = prune.misclass)
cv.OJ
## $size
## [1] 8 7 5 2 1
## 
## $dev
## [1] 151 149 149 154 322
## 
## $k
## [1]       -Inf   0.000000   5.500000   6.333333 170.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

(g)

plot(cv.OJ$size, cv.OJ$dev, type = "b", xlab = "Tree size", ylab = "Deviance")

bestsize = cv.OJ$size[which.min(cv.OJ$dev)]
bestsize
## [1] 7

(h)

Best size as per CV function is 7 however we see in the plot that 5 has similar deviance hence we will select 5

(i)

prune.OJ = prune.misclass(OJ.tree, best = 5)

plot(prune.OJ)
text(prune.OJ, pretty = 0)

(j)

prune.pred = predict(prune.OJ, newdata = OJ.train, type = "class")
table(OJ.train$Purchase, prune.pred)
##     prune.pred
##       CH  MM
##   CH 349  79
##   MM  54 268
error.prune1 = 1 - (349+268)/750

OJ.pred = predict(OJ.tree, newdata = OJ.train, type = "class")
table(OJ.train$Purchase, OJ.pred)
##     OJ.pred
##       CH  MM
##   CH 382  46
##   MM  76 246
error1 = 1 - (382+246)/750

error.prune1
## [1] 0.1773333
error1
## [1] 0.1626667

We observe that missclassifation rate is higher for pruned tree

(k)

prune.pred = predict(prune.OJ, newdata = OJ.test, type = "class")
table(OJ.test$Purchase, prune.pred)
##     prune.pred
##       CH  MM
##   CH 198  27
##   MM  22  73
error.prune = 1 - (198+73)/320
error.prune
## [1] 0.153125
error
## [1] 0.1625

We observe that error rate in case of pruned tree is lower than above.