library(ISLR)
## Warning: package 'ISLR' was built under R version 4.2.2
library(tree)
## Warning: package 'tree' was built under R version 4.2.2
library(rpart)
library(caret)
## Warning: package 'caret' was built under R version 4.2.2
## Loading required package: ggplot2
## Loading required package: lattice
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(ISLR)

3

p = seq(0, 1, 0.001)
gindex <- 2 * p * (1 - p)
clerror <- 1 - pmax(p, 1 - p)
crent <- - (p * log(p) + (1 - p) * log(1 - p))
matplot(p, cbind(gindex, clerror, crent), ylab = "gindex, clerror, crent", col = c("green", "blue", "red"))
legend('bottom', inset=.01, legend = c('gini index', 'class error', 'cross entropy'), col = c("green", "blue", "red"), pch=c(15,17,19))

8

A

set.seed(1)
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
training <- Carseats[train, ]
testing <- Carseats[-train, ]

B

tree.carseats <- tree(Sales ~ ., data = training)
summary(tree.carseats)
## 
## Regression tree:
## tree(formula = Sales ~ ., data = training)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Age"         "Advertising" "CompPrice"  
## [6] "US"         
## Number of terminal nodes:  18 
## Residual mean deviance:  2.167 = 394.3 / 182 
## Distribution of residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.88200 -0.88200 -0.08712  0.00000  0.89590  4.09900
plot(tree.carseats)
text(tree.carseats, pretty = 0)

treecarseat.pred <- predict(tree.carseats, newdata = testing)
mean((treecarseat.pred - testing$Sales)^2)
## [1] 4.922039

The MSE is 4.922039

C

set.seed(1)
cv.carseats <- cv.tree(tree.carseats)
plot(cv.carseats$size, cv.carseats$dev, type = "b")

The optimal level is 12

prune.carseats <- prune.tree(tree.carseats, best = 12)
plot(prune.carseats)
text(prune.carseats,pretty=0)

treecarseat.pred <- predict(prune.carseats, newdata = testing)
mean((treecarseat.pred - testing$Sales)^2)
## [1] 4.966929

The MSE moved from 4.922039 to 4.918134!

D

set.seed(1)
bag.carseats <- randomForest(Sales~., data = training, mtry = 10, ntree = 551, importance = TRUE)
bagcarseat.pred <- predict(bag.carseats, newdata = testing)
mean((bagcarseat.pred - testing$Sales)^2)
## [1] 2.599099

The MSE is 2.599099!

importance(bag.carseats)
##                 %IncMSE IncNodePurity
## CompPrice   26.18616309    170.781666
## Income       5.25063979     90.717958
## Advertising 13.25673204     97.498810
## Population  -2.14346969     58.289311
## Price       60.58241525    503.478806
## ShelveLoc   50.77308639    380.258594
## Age         19.03720001    158.282846
## Education    1.24264920     44.834257
## Urban       -0.08461165      9.883299
## US           4.71515903     17.907727

Price and ChelveLoc are the highest

set.seed(1)
rando.carseats = randomForest(Sales~., data = training, mtry = 10, importance = TRUE)
randcarseat.pred = predict(rando.carseats, newdata = testing)
mean((randcarseat.pred - testing$Sales)^2)
## [1] 2.605253

The MSE is 2.605253

importance(rando.carseats)
##                %IncMSE IncNodePurity
## CompPrice   24.8888481    170.182937
## Income       4.7121131     91.264880
## Advertising 12.7692401     97.164338
## Population  -1.8074075     58.244596
## Price       56.3326252    502.903407
## ShelveLoc   48.8886689    380.032715
## Age         17.7275460    157.846774
## Education    0.5962186     44.598731
## Urban        0.1728373      9.822082
## US           4.2172102     18.073863

The most important are the same as above.

9 A

set.seed(1)
train <- sample(1:nrow(OJ), 800)
ojtraining <- OJ[train, ]
ojtesting <- OJ[-train, ]

B

ojtree <- tree(Purchase ~ ., data = ojtraining)
summary(ojtree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = ojtraining)
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "SpecialCH"     "ListPriceDiff"
## [5] "PctDiscMM"    
## Number of terminal nodes:  9 
## Residual mean deviance:  0.7432 = 587.8 / 791 
## Misclassification error rate: 0.1588 = 127 / 800

The tree has 0 terminal nodes and an error rate of .1588

C

ojtree
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 800 1073.00 CH ( 0.60625 0.39375 )  
##    2) LoyalCH < 0.5036 365  441.60 MM ( 0.29315 0.70685 )  
##      4) LoyalCH < 0.280875 177  140.50 MM ( 0.13559 0.86441 )  
##        8) LoyalCH < 0.0356415 59   10.14 MM ( 0.01695 0.98305 ) *
##        9) LoyalCH > 0.0356415 118  116.40 MM ( 0.19492 0.80508 ) *
##      5) LoyalCH > 0.280875 188  258.00 MM ( 0.44149 0.55851 )  
##       10) PriceDiff < 0.05 79   84.79 MM ( 0.22785 0.77215 )  
##         20) SpecialCH < 0.5 64   51.98 MM ( 0.14062 0.85938 ) *
##         21) SpecialCH > 0.5 15   20.19 CH ( 0.60000 0.40000 ) *
##       11) PriceDiff > 0.05 109  147.00 CH ( 0.59633 0.40367 ) *
##    3) LoyalCH > 0.5036 435  337.90 CH ( 0.86897 0.13103 )  
##      6) LoyalCH < 0.764572 174  201.00 CH ( 0.73563 0.26437 )  
##       12) ListPriceDiff < 0.235 72   99.81 MM ( 0.50000 0.50000 )  
##         24) PctDiscMM < 0.196196 55   73.14 CH ( 0.61818 0.38182 ) *
##         25) PctDiscMM > 0.196196 17   12.32 MM ( 0.11765 0.88235 ) *
##       13) ListPriceDiff > 0.235 102   65.43 CH ( 0.90196 0.09804 ) *
##      7) LoyalCH > 0.764572 261   91.20 CH ( 0.95785 0.04215 ) *

Terminal node 11 the split criterion is PriceDiff > 0.05, the number of observations is 109 with a dev. of 147.

D

plot(ojtree)
text(ojtree, pretty = 0)

E

tree.pred <- predict(ojtree, ojtesting, type = "class")
table(tree.pred, ojtesting$Purchase)
##          
## tree.pred  CH  MM
##        CH 160  38
##        MM   8  64
1-(160+64)/270
## [1] 0.1703704

The error rate is 17.03%

F

cv.oj <- cv.tree(ojtree, FUN = prune.misclass)
cv.oj
## $size
## [1] 9 8 7 4 2 1
## 
## $dev
## [1] 150 150 149 158 172 315
## 
## $k
## [1]       -Inf   0.000000   3.000000   4.333333  10.500000 151.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

G

plot(cv.oj$size, cv.oj$dev, type = "b", xlab = "Tree size", ylab = "Deviance")

H

The 7 node tree is the smallest with the lowest error rate.

I

prune.oj <- prune.misclass(ojtree, best = 7)
plot(prune.oj)
text(prune.oj, pretty = 0)

J

summary(prune.oj)
## 
## Classification tree:
## snip.tree(tree = ojtree, nodes = c(4L, 10L))
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "ListPriceDiff" "PctDiscMM"    
## Number of terminal nodes:  7 
## Residual mean deviance:  0.7748 = 614.4 / 793 
## Misclassification error rate: 0.1625 = 130 / 800
summary(ojtree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = ojtraining)
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "SpecialCH"     "ListPriceDiff"
## [5] "PctDiscMM"    
## Number of terminal nodes:  9 
## Residual mean deviance:  0.7432 = 587.8 / 791 
## Misclassification error rate: 0.1588 = 127 / 800

The unpruned tree has a lower misclassification rate (15.88% vs 16.25%)

K

prune.pred <- predict(prune.oj, ojtesting, type = "class")
table(prune.pred, ojtesting$Purchase)
##           
## prune.pred  CH  MM
##         CH 160  36
##         MM   8  66
1-(160+66)/270
## [1] 0.162963

The error rate has gone to 16.3%