3

err = seq(0.0, 1.0, .01)
m = 1 - err

err.fun=function(a){
  b = 1 - a
  return(1-max(a,b))
}

class.error = sapply(err, err.fun)
gini = ( ( err * (1 - err) ) + ( m * (1 - m) ) )
entropy = ( - ( (err * log2(err) ) + ( m * log2(m) ) ) )

plot(err, entropy, typ = "l", xlab ="", ylab = "")
lines(err, gini, col = "blue")
lines(err, class.error, col = "red")

8

a)

library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
set.seed(42)

train = sample(nrow(Carseats), .8*(nrow(Carseats)) )
car.train = Carseats[train,]
car.test = Carseats[-train,]

b)

library(tree)
## Warning: package 'tree' was built under R version 4.0.4
set.seed(42)

car.tree = tree(Sales ~., car.train)
plot(car.tree)
text(car.tree,pretty=0)

tree.pred = predict(car.tree, newdata = car.test)

mse1 = mean((tree.pred - car.test$Sales)^2)
mse1
## [1] 3.906465

The MSE is 3.9065.

c)

cv.car = cv.tree(car.tree)

plot(cv.car$size, cv.car$dev, type = "b")

prune.car = prune.tree(car.tree, best = 5)
plot(prune.car)
text(prune.car, pretty = 0)

tree.pred = predict(prune.car, newdata = car.test)

mse2 = mean((tree.pred - car.test$Sales)^2)
mse2
## [1] 5.084432

Pruning the tree increases the MSE in this scenario.

d)

library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
set.seed(42)

bag.car = randomForest(Sales ~., car.train, importance = TRUE, ntree = 500, mtry = 10)

tree.pred = predict(bag.car, newdata = car.test)

mse3 = mean((tree.pred - car.test$Sales)^2)
mse3
## [1] 2.247194
importance(bag.car)
##                %IncMSE IncNodePurity
## CompPrice   37.2655918    272.684989
## Income      11.3481020    139.099550
## Advertising 24.7312015    187.974015
## Population  -0.8309134     84.799869
## Price       74.6565917    742.426277
## ShelveLoc   76.1027278    688.088937
## Age         22.3794426    239.876755
## Education    2.8933345     64.433523
## Urban        0.3376574      9.737118
## US           1.6649153     10.587431

In order to use the bagging method, we used m = p. The MSE that was achieved by the model was 2.2472, well below what was seen previously. The most important variables were CompPrice, Income and Advertising.

e)

rf.car = randomForest(Sales ~., car.train, importance = TRUE, ntree = 500, mtry = 3)

tree.pred = predict(rf.car, newdata = car.test)

mse4 = mean((tree.pred - car.test$Sales)^2)
mse4
## [1] 2.835609
importance(rf.car)
##                %IncMSE IncNodePurity
## CompPrice   16.5827316     225.67910
## Income       6.3862954     193.16698
## Advertising 17.5873572     218.29447
## Population   1.4117218     155.91267
## Price       44.9860497     581.88181
## ShelveLoc   49.9640240     538.27366
## Age         15.2400202     271.29274
## Education    0.8508437     111.00186
## Urban       -2.1433634      22.38776
## US           3.8134550      34.28175

For this random forest model we set m = to sqrt(p). The MSE improved to parts b) and c), but was not as low as with the bagging method.

9

a)

OJ.train = OJ[-train,]
OJ.test = OJ[train,]

b)

set.seed(42)
OJ.tree = tree(Purchase ~., data = OJ.train)
summary(OJ.tree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH"     "SalePriceMM" "PriceDiff"  
## Number of terminal nodes:  8 
## Residual mean deviance:  0.7681 = 569.9 / 742 
## Misclassification error rate: 0.1627 = 122 / 750

The tree has 8 terminal nodes and the error rate is 16.27%.

c)

OJ.tree
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 750 1025.00 CH ( 0.57067 0.42933 )  
##    2) LoyalCH < 0.48285 310  331.20 MM ( 0.22581 0.77419 )  
##      4) LoyalCH < 0.276142 180  125.60 MM ( 0.11111 0.88889 )  
##        8) LoyalCH < 0.0356415 60   10.17 MM ( 0.01667 0.98333 ) *
##        9) LoyalCH > 0.0356415 120  104.90 MM ( 0.15833 0.84167 ) *
##      5) LoyalCH > 0.276142 130  173.20 MM ( 0.38462 0.61538 )  
##       10) SalePriceMM < 2.04 75   80.28 MM ( 0.22667 0.77333 ) *
##       11) SalePriceMM > 2.04 55   74.03 CH ( 0.60000 0.40000 ) *
##    3) LoyalCH > 0.48285 440  423.20 CH ( 0.81364 0.18636 )  
##      6) LoyalCH < 0.764572 245  301.80 CH ( 0.69388 0.30612 )  
##       12) PriceDiff < 0.265 143  197.40 CH ( 0.53846 0.46154 )  
##         24) PriceDiff < -0.165 37   41.05 MM ( 0.24324 0.75676 ) *
##         25) PriceDiff > -0.165 106  138.30 CH ( 0.64151 0.35849 ) *
##       13) PriceDiff > 0.265 102   60.88 CH ( 0.91176 0.08824 ) *
##      7) LoyalCH > 0.764572 195   60.32 CH ( 0.96410 0.03590 ) *
plot(OJ.tree)
text(OJ.tree,pretty=0)

We will be analyzing the node categorized as 25. The criteria for this node is PriceDiff > -.165. We have 106 observations with a deviance of 138.30. If an observation goes to this node, the prediction will be CH. 64.15% of the observations in this node are CH.

d)

plot(OJ.tree)
text(OJ.tree,pretty=0)

LoyalCH is the first and second variable that is used to create the tree. This could point to the high importance of this variable to the model.

e)

set.seed(42)
OJ.pred = predict(OJ.tree, newdata = OJ.test, type = "class")
table(OJ.test$Purchase, OJ.pred)
##     OJ.pred
##       CH  MM
##   CH 209  16
##   MM  36  59
error = 1 - (209+59)/320
error
## [1] 0.1625

The missclassification error rate is 16.25%

f)

set.seed(42)
cv.OJ = cv.tree(OJ.tree, FUN = prune.misclass)
cv.OJ
## $size
## [1] 8 7 5 2 1
## 
## $dev
## [1] 151 149 149 154 322
## 
## $k
## [1]       -Inf   0.000000   5.500000   6.333333 170.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

g)

plot(cv.OJ$size, cv.OJ$dev, type = "b", xlab = "Tree size", ylab = "Deviance")

bestsize = cv.OJ$size[which.min(cv.OJ$dev)]
bestsize
## [1] 7

h)

The best size according to the cross validation function is 7, but as we can observe on the plot 5 has similar deviance and therefore we will be moving forward with 5

i)

prune.OJ = prune.misclass(OJ.tree, best = 5)

plot(prune.OJ)
text(prune.OJ, pretty = 0)

j)

prune.pred = predict(prune.OJ, newdata = OJ.train, type = "class")
table(OJ.train$Purchase, prune.pred)
##     prune.pred
##       CH  MM
##   CH 349  79
##   MM  54 268
error.prune2 = 1 - (349+268)/750

OJ.pred = predict(OJ.tree, newdata = OJ.train, type = "class")
table(OJ.train$Purchase, OJ.pred)
##     OJ.pred
##       CH  MM
##   CH 382  46
##   MM  76 246
error2 = 1 - (382+246)/750

error.prune2
## [1] 0.1773333
error2
## [1] 0.1626667

The missclassifation rate is higher for the pruned tree.

k)

prune.pred = predict(prune.OJ, newdata = OJ.test, type = "class")
table(OJ.test$Purchase, prune.pred)
##     prune.pred
##       CH  MM
##   CH 198  27
##   MM  22  73
error.prune = 1 - (198+73)/320
error.prune
## [1] 0.153125
error
## [1] 0.1625

The error rate of the pruned tree is lower.