Chapter 8 numbers 3, 8, and 9

library(ISLR2)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(rpart)
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(BART)
## Loading required package: nlme
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
library(tree)

3

Consider the Gini index, classification error, and entropy in a simple classification setting with two classes. Create a single plot that displays each of these quantities as a function of \(\hat{p}_{m1}\). The x-axis should display \(\hat{p}_{m1}\), ranging from 0 to 1, and the y-axis should display the value of the Gini index, classification error, and entropy.

Hint: In a setting with two classes, \(\hat{p}_{m1} = 1 - \hat{p}_{m2}\). You could make this plot by hand, but it will be much easier to make in R.

# Define a sequence of values for p_hat
p <- seq(0, 1, by = 0.01)

# Calculate Gini index
gini <- 2 * p * (1 - p)

# Calculate classification error
class_error <- 1 - pmax(p, 1 - p)

# Calculate entropy (handle log(0) as 0)
entropy <- -p * log2(p) - (1 - p) * log2(1 - p)
entropy[is.nan(entropy)] <- 0  # replace NaNs with 0 (when p is 0 or 1)

# Plot all three measures
plot(p, gini, type = "l", col = "blue", lwd = 2,
     ylab = "Impurity Measure",
     xlab = expression(hat(p)[m1]),
     main = "Gini Index, Classification Error, and Entropy")
lines(p, class_error, col = "red", lwd = 2, lty = 2)
lines(p, entropy, col = "darkgreen", lwd = 2, lty = 3)
legend("top", legend = c("Gini Index", "Classification Error", "Entropy"),
       col = c("blue", "red", "darkgreen"), lty = 1:3, lwd = 2)

8.

In the lab, a classification tree was applied to the Carseats data set after converting Sales into a qualitative response variable. Now we will seek to predict Sales using regression trees and related approaches, treating the response as a quantitative variable.

data("Carseats")

(a) Split the data set into a training set and a test set.

set.seed(1)
train_index = createDataPartition(Carseats$Sales, p = 0.5, list = FALSE)

train = Carseats[train_index, ]
test = Carseats[-train_index, ]

(b) Fit a regression tree to the training set. Plot the tree, and interpret the results. What test MSE do you obtain?

tree_model = rpart(Sales ~ ., data = train, method = "anova", control = rpart.control(minsplit = 15, cp = 0))
fancyRpartPlot(tree_model, sub = "", cex = 0.6)

tree_preds = predict(tree_model, newdata = test)
mean((test$Sales - tree_preds)^2)
## [1] 4.319363

MSE is 4.319363

(c) Use cross-validation in order to determine the optimal level of tree complexity. Does pruning the tree improve the test MSE?

printcp(tree_model)
## 
## Regression tree:
## rpart(formula = Sales ~ ., data = train, method = "anova", control = rpart.control(minsplit = 15, 
##     cp = 0))
## 
## Variables actually used in tree construction:
## [1] Advertising Age         CompPrice   Income      Population  Price      
## [7] ShelveLoc   Urban      
## 
## Root node error: 1476.1/201 = 7.3439
## 
## n= 201 
## 
##           CP nsplit rel error  xerror     xstd
## 1  0.2084483      0   1.00000 1.00879 0.087596
## 2  0.1138324      1   0.79155 0.81341 0.077576
## 3  0.0636946      2   0.67772 0.82897 0.079899
## 4  0.0528339      3   0.61402 0.84903 0.081825
## 5  0.0490121      4   0.56119 0.84508 0.081696
## 6  0.0325569      5   0.51218 0.84964 0.082104
## 7  0.0315159      6   0.47962 0.81428 0.078891
## 8  0.0311924      7   0.44811 0.81657 0.078878
## 9  0.0282239      8   0.41691 0.79175 0.078147
## 10 0.0206688      9   0.38869 0.72616 0.070345
## 11 0.0192129     10   0.36802 0.71050 0.067181
## 12 0.0172196     11   0.34881 0.69697 0.067441
## 13 0.0171463     12   0.33159 0.69277 0.068072
## 14 0.0113740     13   0.31444 0.66972 0.066233
## 15 0.0110334     15   0.29169 0.67344 0.064341
## 16 0.0100634     16   0.28066 0.67963 0.064373
## 17 0.0092325     17   0.27060 0.66379 0.062633
## 18 0.0084827     18   0.26136 0.66739 0.063314
## 19 0.0071732     19   0.25288 0.66970 0.063334
## 20 0.0054995     21   0.23854 0.65064 0.061569
## 21 0.0054015     22   0.23304 0.64926 0.061590
## 22 0.0000000     23   0.22763 0.64257 0.060180
plotcp(tree_model)

prune_model = prune(tree_model, cp = tree_model$cptable[which.min(tree_model$cptable[,"xerror"]), "CP"])

fancyRpartPlot(prune_model, cex = 0.6)

prune_preds = predict(prune_model, newdata = test)
mean((test$Sales - prune_preds)^2)
## [1] 4.319363

(d) Use the bagging approach in order to analyze this data. What test MSE do you obtain? Use the importance() function to determine which variables are most important.

set.seed(1)

train_control = trainControl("cv", number = 10)

p = ncol(train) - 1

bag_model = train(Sales ~ ., data = train, method = "rf", trControl = train_control, tuneGrid = data.frame(mtry = p), importance = T)

bag_preds = predict(bag_model, newdata = test)

mean((test$Sales - bag_preds)^2)
## [1] 2.648432
varImp(bag_model)
## rf variable importance
## 
##                   Overall
## Price           100.00000
## ShelveLocGood    86.57993
## ShelveLocMedium  44.76368
## CompPrice        44.56485
## Advertising      36.63108
## Age              34.25622
## Income           20.61102
## USYes            12.16029
## Education         4.97453
## UrbanYes          0.01901
## Population        0.00000

Test MSE is 2.648432

(e) Use random forests to analyze this data. What test MSE do you obtain? Use the importance() function to determine which variables are most important. Describe the effect of m, the number of variables considered at each split, on the error rate obtained.

set.seed(1)

train_control = trainControl("cv", number = 10)

rf_model = train(Sales ~ ., data = train, method = "rf", trControl = train_control, importance = T)

rf_preds = predict(rf_model, newdata = test)

mean((test$Sales - rf_preds)^2)
## [1] 2.646104
varImp(rf_model)
## rf variable importance
## 
##                 Overall
## Price           100.000
## ShelveLocGood    84.578
## CompPrice        46.822
## ShelveLocMedium  38.684
## Age              34.358
## Advertising      32.218
## Income           23.379
## USYes             8.078
## Education         2.773
## UrbanYes          1.873
## Population        0.000

MSE is 2.646104

(f) Now analyze the data using BART, and report your results.

set.seed(1)

bart_model = gbart(x.train = train[, 2:11], y.train = train[, "Sales"], x.test = test[, 2:11]) 
## *****Calling gbart: type=1
## *****Data:
## data:n,p,np: 201, 14, 199
## y1,yn: 2.007711, -1.552289
## x1,x[n*p]: 138.000000, 1.000000
## xp1,xp[np*p]: 113.000000, 1.000000
## *****Number of Trees: 200
## *****Number of Cut Points: 64 ... 1
## *****burn,nd,thin: 100,1000,1
## *****Prior:beta,alpha,tau,nu,lambda,offset: 2,0.95,0.2512,3,0.22362,7.49229
## *****sigma: 1.071447
## *****w (weights): 1.000000 ... 1.000000
## *****Dirichlet:sparse,theta,omega,a,b,rho,augment: 0,0,1,0.5,1,14,0
## *****printevery: 100
## 
## MCMC
## done 0 (out of 1100)
## done 100 (out of 1100)
## done 200 (out of 1100)
## done 300 (out of 1100)
## done 400 (out of 1100)
## done 500 (out of 1100)
## done 600 (out of 1100)
## done 700 (out of 1100)
## done 800 (out of 1100)
## done 900 (out of 1100)
## done 1000 (out of 1100)
## time: 4s
## trcnt,tecnt: 1000,1000
yhat_bart = bart_model$yhat.test.mean
mean((test[, "Sales"] - yhat_bart)^2)
## [1] 1.540608
ord = order(bart_model$varcount.mean, decreasing = T)
bart_model$varcount.mean[ord]
##       Price   CompPrice         Age  ShelveLoc1  ShelveLoc2  ShelveLoc3 
##      24.042      18.028      17.555      17.025      17.025      16.589 
##         US1      Urban2      Income         US2   Education  Population 
##      16.516      16.361      16.299      16.210      15.774      15.619 
##      Urban1 Advertising 
##      15.010      14.139

The MSE is 1.540608

The output above shows which variables were used the most when splitting the trees. Price was used the most out of the variables.

9

This problem involves the OJ data set which is part of the ISLR2 package.

data(OJ)

(a) Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations.

set.seed(1)

oj_index = createDataPartition(OJ$Purchase, p = 0.7, list = FALSE)

oj_train = OJ[oj_index, ]
oj_test = OJ[-oj_index, ]

(b) Fit a tree to the training data, with Purchase as the response and the other variables as predictors. Use the summary() function to produce summary statistics about the tree, and describe the results obtained. What is the training error rate? How many terminal nodes does the tree have?

tree_oj = rpart(Purchase ~ ., data = oj_train, method = "class", control = rpart.control(minsplit = 15, cp = 0))

summary(tree_oj)
## Call:
## rpart(formula = Purchase ~ ., data = oj_train, method = "class", 
##     control = rpart.control(minsplit = 15, cp = 0))
##   n= 750 
## 
##              CP nsplit rel error    xerror       xstd
## 1  0.4828767123      0 1.0000000 1.0000000 0.04573100
## 2  0.0256849315      1 0.5171233 0.5479452 0.03842134
## 3  0.0239726027      3 0.4657534 0.5650685 0.03885138
## 4  0.0205479452      4 0.4417808 0.5513699 0.03850854
## 5  0.0119863014      5 0.4212329 0.5308219 0.03797614
## 6  0.0102739726      7 0.3972603 0.5136986 0.03751524
## 7  0.0085616438      8 0.3869863 0.5136986 0.03751524
## 8  0.0068493151     10 0.3698630 0.5136986 0.03751524
## 9  0.0057077626     11 0.3630137 0.4931507 0.03694061
## 10 0.0034246575     18 0.3184932 0.4828767 0.03664416
## 11 0.0008561644     22 0.3047945 0.4863014 0.03674367
## 12 0.0000000000     26 0.3013699 0.4863014 0.03674367
## 
## Variable importance
##        LoyalCH      PriceDiff    SalePriceMM        StoreID        PriceMM 
##             39             10              8              7              6 
##  ListPriceDiff         DiscMM      PctDiscMM        PriceCH WeekofPurchase 
##              5              5              5              4              4 
##    SalePriceCH          STORE      SpecialCH         Store7 
##              3              3              1              1 
## 
## Node number 1: 750 observations,    complexity param=0.4828767
##   predicted class=CH  expected loss=0.3893333  P(node) =1
##     class counts:   458   292
##    probabilities: 0.611 0.389 
##   left son=2 (425 obs) right son=3 (325 obs)
##   Primary splits:
##       LoyalCH     < 0.5036    to the right, improve=123.09670, (0 missing)
##       StoreID     < 3.5       to the right, improve= 32.56409, (0 missing)
##       PriceDiff   < 0.31      to the right, improve= 19.87539, (0 missing)
##       SalePriceMM < 2.04      to the right, improve= 17.34553, (0 missing)
##       DiscCH      < 0.255     to the right, improve= 15.60231, (0 missing)
##   Surrogate splits:
##       StoreID        < 3.5       to the right, agree=0.648, adj=0.188, (0 split)
##       PriceMM        < 1.89      to the right, agree=0.596, adj=0.068, (0 split)
##       WeekofPurchase < 246.5     to the right, agree=0.595, adj=0.065, (0 split)
##       ListPriceDiff  < 0.035     to the right, agree=0.591, adj=0.055, (0 split)
##       PriceCH        < 1.72      to the right, agree=0.588, adj=0.049, (0 split)
## 
## Node number 2: 425 observations,    complexity param=0.0239726
##   predicted class=CH  expected loss=0.1388235  P(node) =0.5666667
##     class counts:   366    59
##    probabilities: 0.861 0.139 
##   left son=4 (404 obs) right son=5 (21 obs)
##   Primary splits:
##       PriceDiff   < -0.39     to the right, improve=12.310240, (0 missing)
##       LoyalCH     < 0.7645725 to the right, improve= 9.114571, (0 missing)
##       DiscMM      < 0.57      to the left,  improve= 8.216284, (0 missing)
##       PctDiscMM   < 0.264375  to the left,  improve= 8.216284, (0 missing)
##       SalePriceMM < 1.585     to the right, improve= 7.859655, (0 missing)
##   Surrogate splits:
##       DiscMM      < 0.72      to the left,  agree=0.976, adj=0.524, (0 split)
##       SalePriceMM < 1.435     to the right, agree=0.976, adj=0.524, (0 split)
##       PctDiscMM   < 0.3342595 to the left,  agree=0.976, adj=0.524, (0 split)
##       SalePriceCH < 2.075     to the left,  agree=0.958, adj=0.143, (0 split)
## 
## Node number 3: 325 observations,    complexity param=0.02568493
##   predicted class=MM  expected loss=0.2830769  P(node) =0.4333333
##     class counts:    92   233
##    probabilities: 0.283 0.717 
##   left son=6 (181 obs) right son=7 (144 obs)
##   Primary splits:
##       LoyalCH   < 0.282272  to the right, improve=14.082430, (0 missing)
##       PriceDiff < 0.31      to the right, improve= 8.237351, (0 missing)
##       SpecialCH < 0.5       to the right, improve= 5.851637, (0 missing)
##       DiscCH    < 0.255     to the right, improve= 5.607657, (0 missing)
##       PctDiscCH < 0.132882  to the right, improve= 5.607657, (0 missing)
##   Surrogate splits:
##       STORE       < 2.5       to the left,  agree=0.609, adj=0.118, (0 split)
##       PriceCH     < 1.875     to the left,  agree=0.594, adj=0.083, (0 split)
##       SalePriceCH < 1.875     to the left,  agree=0.594, adj=0.083, (0 split)
##       StoreID     < 3.5       to the right, agree=0.569, adj=0.028, (0 split)
##       PriceMM     < 2.205     to the left,  agree=0.566, adj=0.021, (0 split)
## 
## Node number 4: 404 observations,    complexity param=0.005707763
##   predicted class=CH  expected loss=0.1113861  P(node) =0.5386667
##     class counts:   359    45
##    probabilities: 0.889 0.111 
##   left son=8 (281 obs) right son=9 (123 obs)
##   Primary splits:
##       LoyalCH     < 0.7053255 to the right, improve=6.210817, (0 missing)
##       SalePriceMM < 2.125     to the right, improve=3.358289, (0 missing)
##       PriceDiff   < 0.31      to the right, improve=3.035729, (0 missing)
##       PriceMM     < 2.11      to the right, improve=2.730502, (0 missing)
##       DiscCH      < 0.115     to the right, improve=2.360569, (0 missing)
##   Surrogate splits:
##       WeekofPurchase < 236.5     to the right, agree=0.705, adj=0.033, (0 split)
##       PriceCH        < 1.72      to the right, agree=0.703, adj=0.024, (0 split)
##       SalePriceMM    < 1.585     to the right, agree=0.698, adj=0.008, (0 split)
## 
## Node number 5: 21 observations,    complexity param=0.01027397
##   predicted class=MM  expected loss=0.3333333  P(node) =0.028
##     class counts:     7    14
##    probabilities: 0.333 0.667 
##   left son=10 (9 obs) right son=11 (12 obs)
##   Primary splits:
##       LoyalCH       < 0.742157  to the right, improve=3.5000000, (0 missing)
##       STORE         < 1.5       to the right, improve=3.0476190, (0 missing)
##       SpecialCH     < 0.5       to the left,  improve=1.8666670, (0 missing)
##       PriceMM       < 2.11      to the left,  improve=0.9333333, (0 missing)
##       ListPriceDiff < 0.135     to the left,  improve=0.9333333, (0 missing)
##   Surrogate splits:
##       PriceCH     < 2.04      to the right, agree=0.714, adj=0.333, (0 split)
##       DiscMM      < 0.47      to the left,  agree=0.714, adj=0.333, (0 split)
##       SalePriceMM < 1.64      to the right, agree=0.714, adj=0.333, (0 split)
##       SalePriceCH < 2.04      to the right, agree=0.714, adj=0.333, (0 split)
##       PctDiscMM   < 0.2224545 to the left,  agree=0.714, adj=0.333, (0 split)
## 
## Node number 6: 181 observations,    complexity param=0.02568493
##   predicted class=MM  expected loss=0.4143646  P(node) =0.2413333
##     class counts:    75   106
##    probabilities: 0.414 0.586 
##   left son=12 (107 obs) right son=13 (74 obs)
##   Primary splits:
##       PriceDiff     < 0.05      to the right, improve=12.694000, (0 missing)
##       SalePriceMM   < 1.94      to the right, improve=10.544100, (0 missing)
##       DiscMM        < 0.22      to the left,  improve= 6.766872, (0 missing)
##       PctDiscMM     < 0.0729725 to the left,  improve= 5.775128, (0 missing)
##       ListPriceDiff < 0.235     to the right, improve= 4.614858, (0 missing)
##   Surrogate splits:
##       SalePriceMM   < 1.94      to the right, agree=0.950, adj=0.878, (0 split)
##       DiscMM        < 0.08      to the left,  agree=0.834, adj=0.595, (0 split)
##       PctDiscMM     < 0.038887  to the left,  agree=0.834, adj=0.595, (0 split)
##       ListPriceDiff < 0.135     to the right, agree=0.779, adj=0.459, (0 split)
##       PriceMM       < 2.04      to the right, agree=0.773, adj=0.446, (0 split)
## 
## Node number 7: 144 observations,    complexity param=0.0008561644
##   predicted class=MM  expected loss=0.1180556  P(node) =0.192
##     class counts:    17   127
##    probabilities: 0.118 0.882 
##   left son=14 (94 obs) right son=15 (50 obs)
##   Primary splits:
##       LoyalCH        < 0.0356415 to the right, improve=1.4729200, (0 missing)
##       PriceCH        < 1.72      to the left,  improve=0.7395995, (0 missing)
##       PriceMM        < 1.74      to the left,  improve=0.7114842, (0 missing)
##       WeekofPurchase < 273.5     to the left,  improve=0.5734127, (0 missing)
##       PctDiscMM      < 0.1961965 to the left,  improve=0.4667313, (0 missing)
##   Surrogate splits:
##       STORE       < 2.5       to the left,  agree=0.771, adj=0.34, (0 split)
##       PriceCH     < 1.975     to the left,  agree=0.729, adj=0.22, (0 split)
##       SalePriceMM < 2.205     to the left,  agree=0.722, adj=0.20, (0 split)
##       PriceMM     < 2.205     to the left,  agree=0.715, adj=0.18, (0 split)
##       SalePriceCH < 1.875     to the left,  agree=0.694, adj=0.12, (0 split)
## 
## Node number 8: 281 observations
##   predicted class=CH  expected loss=0.05338078  P(node) =0.3746667
##     class counts:   266    15
##    probabilities: 0.947 0.053 
## 
## Node number 9: 123 observations,    complexity param=0.005707763
##   predicted class=CH  expected loss=0.2439024  P(node) =0.164
##     class counts:    93    30
##    probabilities: 0.756 0.244 
##   left son=18 (60 obs) right son=19 (63 obs)
##   Primary splits:
##       PriceDiff     < 0.265     to the right, improve=7.359504, (0 missing)
##       LoyalCH       < 0.6919315 to the left,  improve=5.959074, (0 missing)
##       ListPriceDiff < 0.135     to the right, improve=5.841506, (0 missing)
##       SalePriceMM   < 2.125     to the right, improve=5.646196, (0 missing)
##       PriceMM       < 2.11      to the right, improve=3.845598, (0 missing)
##   Surrogate splits:
##       ListPriceDiff < 0.255     to the right, agree=0.837, adj=0.667, (0 split)
##       SalePriceMM   < 1.94      to the right, agree=0.813, adj=0.617, (0 split)
##       DiscMM        < 0.03      to the left,  agree=0.780, adj=0.550, (0 split)
##       PctDiscMM     < 0.0137615 to the left,  agree=0.780, adj=0.550, (0 split)
##       PriceMM       < 2.04      to the right, agree=0.691, adj=0.367, (0 split)
## 
## Node number 10: 9 observations
##   predicted class=CH  expected loss=0.3333333  P(node) =0.012
##     class counts:     6     3
##    probabilities: 0.667 0.333 
## 
## Node number 11: 12 observations
##   predicted class=MM  expected loss=0.08333333  P(node) =0.016
##     class counts:     1    11
##    probabilities: 0.083 0.917 
## 
## Node number 12: 107 observations,    complexity param=0.02054795
##   predicted class=CH  expected loss=0.4299065  P(node) =0.1426667
##     class counts:    61    46
##    probabilities: 0.570 0.430 
##   left son=24 (53 obs) right son=25 (54 obs)
##   Primary splits:
##       STORE       < 1.5       to the left,  improve=3.442309, (0 missing)
##       LoyalCH     < 0.3084325 to the left,  improve=2.349588, (0 missing)
##       PriceDiff   < 0.49      to the right, improve=1.997351, (0 missing)
##       PriceMM     < 2.205     to the left,  improve=1.879220, (0 missing)
##       SalePriceMM < 2.205     to the left,  improve=1.879220, (0 missing)
##   Surrogate splits:
##       StoreID     < 5.5       to the right, agree=0.794, adj=0.585, (0 split)
##       Store7      splits as  RL, agree=0.794, adj=0.585, (0 split)
##       SalePriceCH < 1.775     to the left,  agree=0.748, adj=0.491, (0 split)
##       PriceMM     < 2.155     to the left,  agree=0.720, adj=0.434, (0 split)
##       PriceCH     < 1.875     to the left,  agree=0.710, adj=0.415, (0 split)
## 
## Node number 13: 74 observations,    complexity param=0.003424658
##   predicted class=MM  expected loss=0.1891892  P(node) =0.09866667
##     class counts:    14    60
##    probabilities: 0.189 0.811 
##   left son=26 (11 obs) right son=27 (63 obs)
##   Primary splits:
##       SpecialCH < 0.5       to the right, improve=3.279903, (0 missing)
##       LoyalCH   < 0.37888   to the right, improve=1.829975, (0 missing)
##       PriceDiff < -0.24     to the right, improve=1.579896, (0 missing)
##       DiscMM    < 0.47      to the left,  improve=1.128932, (0 missing)
##       PctDiscMM < 0.227263  to the left,  improve=1.128932, (0 missing)
## 
## Node number 14: 94 observations,    complexity param=0.0008561644
##   predicted class=MM  expected loss=0.1702128  P(node) =0.1253333
##     class counts:    16    78
##    probabilities: 0.170 0.830 
##   left son=28 (6 obs) right son=29 (88 obs)
##   Primary splits:
##       SalePriceMM    < 2.205     to the right, improve=1.3941010, (0 missing)
##       PriceMM        < 2.205     to the right, improve=1.0096780, (0 missing)
##       WeekofPurchase < 273.5     to the left,  improve=0.9531915, (0 missing)
##       ListPriceDiff  < 0.135     to the left,  improve=0.9508105, (0 missing)
##       PctDiscMM      < 0.1961965 to the left,  improve=0.8741791, (0 missing)
##   Surrogate splits:
##       PriceMM       < 2.205     to the right, agree=0.989, adj=0.833, (0 split)
##       ListPriceDiff < 0.43      to the right, agree=0.957, adj=0.333, (0 split)
##       PriceDiff     < 0.43      to the right, agree=0.947, adj=0.167, (0 split)
## 
## Node number 15: 50 observations
##   predicted class=MM  expected loss=0.02  P(node) =0.06666667
##     class counts:     1    49
##    probabilities: 0.020 0.980 
## 
## Node number 18: 60 observations
##   predicted class=CH  expected loss=0.06666667  P(node) =0.08
##     class counts:    56     4
##    probabilities: 0.933 0.067 
## 
## Node number 19: 63 observations,    complexity param=0.005707763
##   predicted class=CH  expected loss=0.4126984  P(node) =0.084
##     class counts:    37    26
##    probabilities: 0.587 0.413 
##   left son=38 (58 obs) right son=39 (5 obs)
##   Primary splits:
##       LoyalCH     < 0.6919315 to the left,  improve=3.746579, (0 missing)
##       StoreID     < 5.5       to the right, improve=3.050794, (0 missing)
##       Store7      splits as  RL, improve=3.050794, (0 missing)
##       STORE       < 0.5       to the left,  improve=3.050794, (0 missing)
##       SalePriceMM < 2.155     to the right, improve=1.850027, (0 missing)
## 
## Node number 24: 53 observations,    complexity param=0.008561644
##   predicted class=CH  expected loss=0.3018868  P(node) =0.07066667
##     class counts:    37    16
##    probabilities: 0.698 0.302 
##   left son=48 (32 obs) right son=49 (21 obs)
##   Primary splits:
##       PriceDiff      < 0.31      to the right, improve=3.4259320, (0 missing)
##       ListPriceDiff  < 0.235     to the right, improve=2.1396230, (0 missing)
##       WeekofPurchase < 249       to the right, improve=1.2176710, (0 missing)
##       PriceMM        < 2.155     to the right, improve=0.9623333, (0 missing)
##       SalePriceMM    < 2.155     to the right, improve=0.9623333, (0 missing)
##   Surrogate splits:
##       SalePriceMM    < 2.04      to the right, agree=0.868, adj=0.667, (0 split)
##       WeekofPurchase < 249       to the right, agree=0.830, adj=0.571, (0 split)
##       PriceMM        < 2.04      to the right, agree=0.792, adj=0.476, (0 split)
##       PriceCH        < 1.755     to the right, agree=0.755, adj=0.381, (0 split)
##       DiscMM         < 0.05      to the left,  agree=0.736, adj=0.333, (0 split)
## 
## Node number 25: 54 observations,    complexity param=0.0119863
##   predicted class=MM  expected loss=0.4444444  P(node) =0.072
##     class counts:    24    30
##    probabilities: 0.444 0.556 
##   left son=50 (48 obs) right son=51 (6 obs)
##   Primary splits:
##       PriceDiff   < 0.37      to the left,  improve=2.666667, (0 missing)
##       DiscCH      < 0.115     to the left,  improve=2.176871, (0 missing)
##       PctDiscCH   < 0.059517  to the left,  improve=2.176871, (0 missing)
##       LoyalCH     < 0.49      to the right, improve=1.500000, (0 missing)
##       SalePriceCH < 1.775     to the right, improve=1.463019, (0 missing)
##   Surrogate splits:
##       ListPriceDiff < 0.38      to the left,  agree=0.944, adj=0.500, (0 split)
##       DiscCH        < 0.115     to the left,  agree=0.907, adj=0.167, (0 split)
##       PctDiscCH     < 0.059517  to the left,  agree=0.907, adj=0.167, (0 split)
## 
## Node number 26: 11 observations
##   predicted class=CH  expected loss=0.4545455  P(node) =0.01466667
##     class counts:     6     5
##    probabilities: 0.545 0.455 
## 
## Node number 27: 63 observations
##   predicted class=MM  expected loss=0.1269841  P(node) =0.084
##     class counts:     8    55
##    probabilities: 0.127 0.873 
## 
## Node number 28: 6 observations
##   predicted class=CH  expected loss=0.5  P(node) =0.008
##     class counts:     3     3
##    probabilities: 0.500 0.500 
## 
## Node number 29: 88 observations,    complexity param=0.0008561644
##   predicted class=MM  expected loss=0.1477273  P(node) =0.1173333
##     class counts:    13    75
##    probabilities: 0.148 0.852 
##   left son=58 (24 obs) right son=59 (64 obs)
##   Primary splits:
##       ListPriceDiff  < 0.135     to the left,  improve=1.3674240, (0 missing)
##       LoyalCH        < 0.203377  to the left,  improve=0.7305195, (0 missing)
##       WeekofPurchase < 273.5     to the left,  improve=0.7266585, (0 missing)
##       PctDiscMM      < 0.1961965 to the left,  improve=0.6657576, (0 missing)
##       PriceDiff      < 0.31      to the right, improve=0.6339944, (0 missing)
##   Surrogate splits:
##       PriceMM        < 1.89      to the left,  agree=0.886, adj=0.583, (0 split)
##       WeekofPurchase < 229.5     to the left,  agree=0.852, adj=0.458, (0 split)
##       PriceCH        < 1.72      to the left,  agree=0.784, adj=0.208, (0 split)
##       PriceDiff      < 0.015     to the left,  agree=0.773, adj=0.167, (0 split)
##       SalePriceMM    < 1.84      to the left,  agree=0.761, adj=0.125, (0 split)
## 
## Node number 38: 58 observations,    complexity param=0.005707763
##   predicted class=CH  expected loss=0.362069  P(node) =0.07733333
##     class counts:    37    21
##    probabilities: 0.638 0.362 
##   left son=76 (17 obs) right son=77 (41 obs)
##   Primary splits:
##       StoreID       < 5.5       to the right, improve=2.873448, (0 missing)
##       Store7        splits as  RL, improve=2.873448, (0 missing)
##       STORE         < 0.5       to the left,  improve=2.873448, (0 missing)
##       ListPriceDiff < 0.235     to the right, improve=2.364532, (0 missing)
##       SalePriceCH   < 1.755     to the left,  improve=1.595588, (0 missing)
##   Surrogate splits:
##       STORE       < 0.5       to the left,  agree=1.000, adj=1.000, (0 split)
##       SalePriceMM < 1.64      to the left,  agree=0.845, adj=0.471, (0 split)
##       PctDiscMM   < 0.1961965 to the right, agree=0.845, adj=0.471, (0 split)
##       SpecialCH   < 0.5       to the right, agree=0.793, adj=0.294, (0 split)
##       DiscCH      < 0.335     to the right, agree=0.759, adj=0.176, (0 split)
## 
## Node number 39: 5 observations
##   predicted class=MM  expected loss=0  P(node) =0.006666667
##     class counts:     0     5
##    probabilities: 0.000 1.000 
## 
## Node number 48: 32 observations
##   predicted class=CH  expected loss=0.15625  P(node) =0.04266667
##     class counts:    27     5
##    probabilities: 0.844 0.156 
## 
## Node number 49: 21 observations,    complexity param=0.008561644
##   predicted class=MM  expected loss=0.4761905  P(node) =0.028
##     class counts:    10    11
##    probabilities: 0.476 0.524 
##   left son=98 (14 obs) right son=99 (7 obs)
##   Primary splits:
##       SalePriceMM    < 2.04      to the left,  improve=2.333333, (0 missing)
##       SalePriceCH    < 1.81      to the left,  improve=2.333333, (0 missing)
##       WeekofPurchase < 239       to the left,  improve=1.937729, (0 missing)
##       PriceCH        < 1.755     to the left,  improve=1.937729, (0 missing)
##       PriceMM        < 2.04      to the left,  improve=1.912554, (0 missing)
##   Surrogate splits:
##       SalePriceCH    < 1.81      to the left,  agree=1.000, adj=1.000, (0 split)
##       PriceCH        < 1.81      to the left,  agree=0.810, adj=0.429, (0 split)
##       PriceMM        < 2.04      to the left,  agree=0.810, adj=0.429, (0 split)
##       WeekofPurchase < 275       to the left,  agree=0.762, adj=0.286, (0 split)
##       ListPriceDiff  < 0.235     to the right, agree=0.762, adj=0.286, (0 split)
## 
## Node number 50: 48 observations,    complexity param=0.0119863
##   predicted class=CH  expected loss=0.5  P(node) =0.064
##     class counts:    24    24
##    probabilities: 0.500 0.500 
##   left son=100 (15 obs) right son=101 (33 obs)
##   Primary splits:
##       LoyalCH        < 0.49      to the right, improve=2.3757580, (0 missing)
##       SpecialMM      < 0.5       to the left,  improve=0.7526132, (0 missing)
##       WeekofPurchase < 269.5     to the right, improve=0.6153846, (0 missing)
##       SalePriceMM    < 2.15      to the left,  improve=0.3920145, (0 missing)
##       StoreID        < 3.5       to the right, improve=0.3809524, (0 missing)
##   Surrogate splits:
##       StoreID < 3.5       to the right, agree=0.729, adj=0.133, (0 split)
##       STORE   < 3.5       to the right, agree=0.729, adj=0.133, (0 split)
## 
## Node number 51: 6 observations
##   predicted class=MM  expected loss=0  P(node) =0.008
##     class counts:     0     6
##    probabilities: 0.000 1.000 
## 
## Node number 58: 24 observations,    complexity param=0.0008561644
##   predicted class=MM  expected loss=0.2916667  P(node) =0.032
##     class counts:     7    17
##    probabilities: 0.292 0.708 
##   left son=116 (9 obs) right son=117 (15 obs)
##   Primary splits:
##       STORE     < 1.5       to the left,  improve=2.005556, (0 missing)
##       PriceDiff < -0.12     to the left,  improve=1.546919, (0 missing)
##       PctDiscMM < 0.109423  to the right, improve=1.546919, (0 missing)
##       StoreID   < 5         to the right, improve=1.200877, (0 missing)
##       DiscMM    < 0.25      to the right, improve=1.200877, (0 missing)
##   Surrogate splits:
##       StoreID   < 5         to the right, agree=0.833, adj=0.556, (0 split)
##       SpecialMM < 0.5       to the right, agree=0.833, adj=0.556, (0 split)
##       Store7    splits as  RL, agree=0.833, adj=0.556, (0 split)
##       DiscMM    < 0.1       to the right, agree=0.792, adj=0.444, (0 split)
##       PriceDiff < -0.035    to the left,  agree=0.792, adj=0.444, (0 split)
## 
## Node number 59: 64 observations
##   predicted class=MM  expected loss=0.09375  P(node) =0.08533333
##     class counts:     6    58
##    probabilities: 0.094 0.906 
## 
## Node number 76: 17 observations
##   predicted class=CH  expected loss=0.1176471  P(node) =0.02266667
##     class counts:    15     2
##    probabilities: 0.882 0.118 
## 
## Node number 77: 41 observations,    complexity param=0.005707763
##   predicted class=CH  expected loss=0.4634146  P(node) =0.05466667
##     class counts:    22    19
##    probabilities: 0.537 0.463 
##   left son=154 (5 obs) right son=155 (36 obs)
##   Primary splits:
##       SalePriceMM    < 2.15      to the right, improve=2.445799, (0 missing)
##       SalePriceCH    < 1.94      to the right, improve=2.276608, (0 missing)
##       WeekofPurchase < 236.5     to the right, improve=1.480566, (0 missing)
##       PriceDiff      < -0.065    to the right, improve=1.401738, (0 missing)
##       PriceCH        < 1.875     to the right, improve=1.342625, (0 missing)
##   Surrogate splits:
##       PriceMM        < 2.205     to the right, agree=0.951, adj=0.6, (0 split)
##       SalePriceCH    < 1.94      to the right, agree=0.927, adj=0.4, (0 split)
##       WeekofPurchase < 275       to the right, agree=0.902, adj=0.2, (0 split)
## 
## Node number 98: 14 observations
##   predicted class=CH  expected loss=0.3571429  P(node) =0.01866667
##     class counts:     9     5
##    probabilities: 0.643 0.357 
## 
## Node number 99: 7 observations
##   predicted class=MM  expected loss=0.1428571  P(node) =0.009333333
##     class counts:     1     6
##    probabilities: 0.143 0.857 
## 
## Node number 100: 15 observations
##   predicted class=CH  expected loss=0.2666667  P(node) =0.02
##     class counts:    11     4
##    probabilities: 0.733 0.267 
## 
## Node number 101: 33 observations,    complexity param=0.006849315
##   predicted class=MM  expected loss=0.3939394  P(node) =0.044
##     class counts:    13    20
##    probabilities: 0.394 0.606 
##   left son=202 (8 obs) right son=203 (25 obs)
##   Primary splits:
##       WeekofPurchase < 269.5     to the right, improve=1.1275760, (0 missing)
##       SpecialMM      < 0.5       to the left,  improve=0.7575758, (0 missing)
##       PriceCH        < 1.825     to the right, improve=0.4432900, (0 missing)
##       SalePriceCH    < 1.825     to the right, improve=0.4432900, (0 missing)
##       LoyalCH        < 0.3618285 to the right, improve=0.3191142, (0 missing)
##   Surrogate splits:
##       DiscMM    < 0.03      to the right, agree=0.818, adj=0.250, (0 split)
##       PctDiscMM < 0.0137615 to the right, agree=0.818, adj=0.250, (0 split)
##       LoyalCH   < 0.3136    to the left,  agree=0.788, adj=0.125, (0 split)
## 
## Node number 116: 9 observations
##   predicted class=CH  expected loss=0.4444444  P(node) =0.012
##     class counts:     5     4
##    probabilities: 0.556 0.444 
## 
## Node number 117: 15 observations
##   predicted class=MM  expected loss=0.1333333  P(node) =0.02
##     class counts:     2    13
##    probabilities: 0.133 0.867 
## 
## Node number 154: 5 observations
##   predicted class=CH  expected loss=0  P(node) =0.006666667
##     class counts:     5     0
##    probabilities: 1.000 0.000 
## 
## Node number 155: 36 observations,    complexity param=0.005707763
##   predicted class=MM  expected loss=0.4722222  P(node) =0.048
##     class counts:    17    19
##    probabilities: 0.472 0.528 
##   left son=310 (5 obs) right son=311 (31 obs)
##   Primary splits:
##       LoyalCH        < 0.54608   to the left,  improve=1.2476700, (0 missing)
##       WeekofPurchase < 236.5     to the right, improve=0.8213675, (0 missing)
##       PriceDiff      < -0.065    to the right, improve=0.6944444, (0 missing)
##       SpecialMM      < 0.5       to the left,  improve=0.4629630, (0 missing)
##       PriceMM        < 2.04      to the right, improve=0.1920635, (0 missing)
## 
## Node number 202: 8 observations
##   predicted class=CH  expected loss=0.375  P(node) =0.01066667
##     class counts:     5     3
##    probabilities: 0.625 0.375 
## 
## Node number 203: 25 observations,    complexity param=0.003424658
##   predicted class=MM  expected loss=0.32  P(node) =0.03333333
##     class counts:     8    17
##    probabilities: 0.320 0.680 
##   left son=406 (17 obs) right son=407 (8 obs)
##   Primary splits:
##       LoyalCH        < 0.3618285 to the right, improve=0.8947059, (0 missing)
##       WeekofPurchase < 243       to the left,  improve=0.2292063, (0 missing)
##       StoreID        < 2.5       to the right, improve=0.2261538, (0 missing)
##       PriceCH        < 1.875     to the right, improve=0.2261538, (0 missing)
##       SalePriceCH    < 1.875     to the right, improve=0.2261538, (0 missing)
##   Surrogate splits:
##       PriceMM     < 2.26      to the left,  agree=0.72, adj=0.125, (0 split)
##       SalePriceMM < 2.26      to the left,  agree=0.72, adj=0.125, (0 split)
## 
## Node number 310: 5 observations
##   predicted class=CH  expected loss=0.2  P(node) =0.006666667
##     class counts:     4     1
##    probabilities: 0.800 0.200 
## 
## Node number 311: 31 observations,    complexity param=0.005707763
##   predicted class=MM  expected loss=0.4193548  P(node) =0.04133333
##     class counts:    13    18
##    probabilities: 0.419 0.581 
##   left son=622 (13 obs) right son=623 (18 obs)
##   Primary splits:
##       LoyalCH        < 0.62608   to the right, improve=1.7207060, (0 missing)
##       WeekofPurchase < 261       to the left,  improve=1.1230900, (0 missing)
##       PriceDiff      < 0.235     to the left,  improve=0.5736973, (0 missing)
##       SpecialMM      < 0.5       to the left,  improve=0.1876833, (0 missing)
##       PriceCH        < 1.875     to the right, improve=0.1402525, (0 missing)
##   Surrogate splits:
##       WeekofPurchase < 265       to the right, agree=0.710, adj=0.308, (0 split)
##       PriceCH        < 1.94      to the right, agree=0.677, adj=0.231, (0 split)
##       PriceMM        < 2.04      to the right, agree=0.677, adj=0.231, (0 split)
##       SpecialCH      < 0.5       to the right, agree=0.677, adj=0.231, (0 split)
##       StoreID        < 2.5       to the right, agree=0.645, adj=0.154, (0 split)
## 
## Node number 406: 17 observations,    complexity param=0.003424658
##   predicted class=MM  expected loss=0.4117647  P(node) =0.02266667
##     class counts:     7    10
##    probabilities: 0.412 0.588 
##   left son=812 (10 obs) right son=813 (7 obs)
##   Primary splits:
##       LoyalCH        < 0.420437  to the left,  improve=1.7210080, (0 missing)
##       PriceCH        < 1.875     to the right, improve=0.7908497, (0 missing)
##       SalePriceCH    < 1.875     to the right, improve=0.7908497, (0 missing)
##       WeekofPurchase < 254.5     to the right, improve=0.6067227, (0 missing)
##       StoreID        < 2.5       to the right, improve=0.2352941, (0 missing)
##   Surrogate splits:
##       WeekofPurchase < 238.5     to the right, agree=0.706, adj=0.286, (0 split)
##       PriceCH        < 1.825     to the right, agree=0.647, adj=0.143, (0 split)
##       SalePriceCH    < 1.825     to the right, agree=0.647, adj=0.143, (0 split)
## 
## Node number 407: 8 observations
##   predicted class=MM  expected loss=0.125  P(node) =0.01066667
##     class counts:     1     7
##    probabilities: 0.125 0.875 
## 
## Node number 622: 13 observations
##   predicted class=CH  expected loss=0.3846154  P(node) =0.01733333
##     class counts:     8     5
##    probabilities: 0.615 0.385 
## 
## Node number 623: 18 observations,    complexity param=0.003424658
##   predicted class=MM  expected loss=0.2777778  P(node) =0.024
##     class counts:     5    13
##    probabilities: 0.278 0.722 
##   left son=1246 (5 obs) right son=1247 (13 obs)
##   Primary splits:
##       PriceCH        < 1.755     to the left,  improve=1.4376070, (0 missing)
##       SalePriceCH    < 1.755     to the left,  improve=1.4376070, (0 missing)
##       WeekofPurchase < 261       to the left,  improve=1.3888890, (0 missing)
##       PriceDiff      < -0.065    to the right, improve=1.0683760, (0 missing)
##       ListPriceDiff  < 0.165     to the right, improve=0.4170274, (0 missing)
##   Surrogate splits:
##       SalePriceCH    < 1.755     to the left,  agree=1.000, adj=1.0, (0 split)
##       WeekofPurchase < 240.5     to the left,  agree=0.889, adj=0.6, (0 split)
##       PriceDiff      < 0.22      to the right, agree=0.833, adj=0.4, (0 split)
##       ListPriceDiff  < 0.235     to the right, agree=0.833, adj=0.4, (0 split)
## 
## Node number 812: 10 observations
##   predicted class=CH  expected loss=0.4  P(node) =0.01333333
##     class counts:     6     4
##    probabilities: 0.600 0.400 
## 
## Node number 813: 7 observations
##   predicted class=MM  expected loss=0.1428571  P(node) =0.009333333
##     class counts:     1     6
##    probabilities: 0.143 0.857 
## 
## Node number 1246: 5 observations
##   predicted class=CH  expected loss=0.4  P(node) =0.006666667
##     class counts:     3     2
##    probabilities: 0.600 0.400 
## 
## Node number 1247: 13 observations
##   predicted class=MM  expected loss=0.1538462  P(node) =0.01733333
##     class counts:     2    11
##    probabilities: 0.154 0.846
sum(tree_oj$frame$var == "<leaf>")
## [1] 27

There are 27 terminal nodes.

oj_preds = predict(tree_oj, newdata = oj_train, type = "class")
mean(oj_preds != oj_train$Purchase)
## [1] 0.1173333

There is an 11.73% training classification error.

# tree made using tree() from tree library
tree_oj_2 = tree::tree(Purchase ~ ., data = oj_train)

oj_preds_2 = predict(tree_oj_2, newdata = oj_train, type = "class")
mean(oj_preds_2 != oj_train$Purchase)
## [1] 0.1586667
summary(tree_oj_2)
## 
## Classification tree:
## tree::tree(formula = Purchase ~ ., data = oj_train)
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "ListPriceDiff" "DiscMM"       
## [5] "SalePriceMM"  
## Number of terminal nodes:  11 
## Residual mean deviance:  0.7306 = 539.9 / 739 
## Misclassification error rate: 0.1587 = 119 / 750

Using tree::tree(), the number of terminal nodes is 11 and the misclassification error rate is 15.87%.

(c) Type in the name of the tree object in order to get a detailed text output. Pick one of the terminal nodes, and interpret the information displayed.

tree_oj
## n= 750 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##    1) root 750 292 CH (0.61066667 0.38933333)  
##      2) LoyalCH>=0.5036 425  59 CH (0.86117647 0.13882353)  
##        4) PriceDiff>=-0.39 404  45 CH (0.88861386 0.11138614)  
##          8) LoyalCH>=0.7053255 281  15 CH (0.94661922 0.05338078) *
##          9) LoyalCH< 0.7053255 123  30 CH (0.75609756 0.24390244)  
##           18) PriceDiff>=0.265 60   4 CH (0.93333333 0.06666667) *
##           19) PriceDiff< 0.265 63  26 CH (0.58730159 0.41269841)  
##             38) LoyalCH< 0.6919315 58  21 CH (0.63793103 0.36206897)  
##               76) StoreID>=5.5 17   2 CH (0.88235294 0.11764706) *
##               77) StoreID< 5.5 41  19 CH (0.53658537 0.46341463)  
##                154) SalePriceMM>=2.15 5   0 CH (1.00000000 0.00000000) *
##                155) SalePriceMM< 2.15 36  17 MM (0.47222222 0.52777778)  
##                  310) LoyalCH< 0.54608 5   1 CH (0.80000000 0.20000000) *
##                  311) LoyalCH>=0.54608 31  13 MM (0.41935484 0.58064516)  
##                    622) LoyalCH>=0.62608 13   5 CH (0.61538462 0.38461538) *
##                    623) LoyalCH< 0.62608 18   5 MM (0.27777778 0.72222222)  
##                     1246) PriceCH< 1.755 5   2 CH (0.60000000 0.40000000) *
##                     1247) PriceCH>=1.755 13   2 MM (0.15384615 0.84615385) *
##             39) LoyalCH>=0.6919315 5   0 MM (0.00000000 1.00000000) *
##        5) PriceDiff< -0.39 21   7 MM (0.33333333 0.66666667)  
##         10) LoyalCH>=0.742157 9   3 CH (0.66666667 0.33333333) *
##         11) LoyalCH< 0.742157 12   1 MM (0.08333333 0.91666667) *
##      3) LoyalCH< 0.5036 325  92 MM (0.28307692 0.71692308)  
##        6) LoyalCH>=0.282272 181  75 MM (0.41436464 0.58563536)  
##         12) PriceDiff>=0.05 107  46 CH (0.57009346 0.42990654)  
##           24) STORE< 1.5 53  16 CH (0.69811321 0.30188679)  
##             48) PriceDiff>=0.31 32   5 CH (0.84375000 0.15625000) *
##             49) PriceDiff< 0.31 21  10 MM (0.47619048 0.52380952)  
##               98) SalePriceMM< 2.04 14   5 CH (0.64285714 0.35714286) *
##               99) SalePriceMM>=2.04 7   1 MM (0.14285714 0.85714286) *
##           25) STORE>=1.5 54  24 MM (0.44444444 0.55555556)  
##             50) PriceDiff< 0.37 48  24 CH (0.50000000 0.50000000)  
##              100) LoyalCH>=0.49 15   4 CH (0.73333333 0.26666667) *
##              101) LoyalCH< 0.49 33  13 MM (0.39393939 0.60606061)  
##                202) WeekofPurchase>=269.5 8   3 CH (0.62500000 0.37500000) *
##                203) WeekofPurchase< 269.5 25   8 MM (0.32000000 0.68000000)  
##                  406) LoyalCH>=0.3618285 17   7 MM (0.41176471 0.58823529)  
##                    812) LoyalCH< 0.420437 10   4 CH (0.60000000 0.40000000) *
##                    813) LoyalCH>=0.420437 7   1 MM (0.14285714 0.85714286) *
##                  407) LoyalCH< 0.3618285 8   1 MM (0.12500000 0.87500000) *
##             51) PriceDiff>=0.37 6   0 MM (0.00000000 1.00000000) *
##         13) PriceDiff< 0.05 74  14 MM (0.18918919 0.81081081)  
##           26) SpecialCH>=0.5 11   5 CH (0.54545455 0.45454545) *
##           27) SpecialCH< 0.5 63   8 MM (0.12698413 0.87301587) *
##        7) LoyalCH< 0.282272 144  17 MM (0.11805556 0.88194444)  
##         14) LoyalCH>=0.0356415 94  16 MM (0.17021277 0.82978723)  
##           28) SalePriceMM>=2.205 6   3 CH (0.50000000 0.50000000) *
##           29) SalePriceMM< 2.205 88  13 MM (0.14772727 0.85227273)  
##             58) ListPriceDiff< 0.135 24   7 MM (0.29166667 0.70833333)  
##              116) STORE< 1.5 9   4 CH (0.55555556 0.44444444) *
##              117) STORE>=1.5 15   2 MM (0.13333333 0.86666667) *
##             59) ListPriceDiff>=0.135 64   6 MM (0.09375000 0.90625000) *
##         15) LoyalCH< 0.0356415 50   1 MM (0.02000000 0.98000000) *

Node 8: PriceDiff>=-0.39 and LoyalCH >= 0.7053255 281 15 CH (0.94661922 0.05338078) *

281 observations in this node with 15 misclassified. 94.66% chance CH and 5.34% chance MM

tree_oj_2
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 750 1003.000 CH ( 0.61067 0.38933 )  
##     2) LoyalCH < 0.5036 325  387.300 MM ( 0.28308 0.71692 )  
##       4) LoyalCH < 0.282272 144  104.600 MM ( 0.11806 0.88194 ) *
##       5) LoyalCH > 0.282272 181  245.600 MM ( 0.41436 0.58564 )  
##        10) PriceDiff < 0.05 74   71.790 MM ( 0.18919 0.81081 ) *
##        11) PriceDiff > 0.05 107  146.200 CH ( 0.57009 0.42991 ) *
##     3) LoyalCH > 0.5036 425  342.400 CH ( 0.86118 0.13882 )  
##       6) LoyalCH < 0.764572 169  195.900 CH ( 0.73373 0.26627 )  
##        12) PriceDiff < 0.265 96  130.400 CH ( 0.58333 0.41667 )  
##          24) PriceDiff < -0.165 23   26.400 MM ( 0.26087 0.73913 )  
##            48) ListPriceDiff < 0.115 7    8.376 CH ( 0.71429 0.28571 ) *
##            49) ListPriceDiff > 0.115 16    7.481 MM ( 0.06250 0.93750 ) *
##          25) PriceDiff > -0.165 73   90.970 CH ( 0.68493 0.31507 )  
##            50) DiscMM < 0.22 49   66.920 CH ( 0.57143 0.42857 )  
##             100) SalePriceMM < 2.155 39   53.830 MM ( 0.46154 0.53846 ) *
##             101) SalePriceMM > 2.155 10    0.000 CH ( 1.00000 0.00000 ) *
##            51) DiscMM > 0.22 24   13.770 CH ( 0.91667 0.08333 ) *
##        13) PriceDiff > 0.265 73   36.460 CH ( 0.93151 0.06849 ) *
##       7) LoyalCH > 0.764572 256  108.600 CH ( 0.94531 0.05469 )  
##        14) PriceDiff < 0.31 174   97.400 CH ( 0.91954 0.08046 ) *
##        15) PriceDiff > 0.31 82    0.000 CH ( 1.00000 0.00000 ) *

(d) Create a plot of the tree, and interpret the results.

fancyRpartPlot(tree_oj, cex = 0.6)

plot(tree_oj_2)
text(tree_oj_2, pretty = 0)

(e) Predict the response on the test data, and produce a confusion matrix comparing the test labels to the predicted test labels. What is the test error rate?

oj_preds = predict(tree_oj, newdata = oj_test, type = "class")
confusionMatrix(oj_preds, oj_test$Purchase)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  CH  MM
##         CH 160  33
##         MM  35  92
##                                          
##                Accuracy : 0.7875         
##                  95% CI : (0.7385, 0.831)
##     No Information Rate : 0.6094         
##     P-Value [Acc > NIR] : 7.64e-12       
##                                          
##                   Kappa : 0.5549         
##                                          
##  Mcnemar's Test P-Value : 0.9035         
##                                          
##             Sensitivity : 0.8205         
##             Specificity : 0.7360         
##          Pos Pred Value : 0.8290         
##          Neg Pred Value : 0.7244         
##              Prevalence : 0.6094         
##          Detection Rate : 0.5000         
##    Detection Prevalence : 0.6031         
##       Balanced Accuracy : 0.7783         
##                                          
##        'Positive' Class : CH             
## 
mean(oj_preds != oj_test$Purchase)
## [1] 0.2125

The test error rate was 21.25%

# same but with tree model from tree()
oj_preds_2 = predict(tree_oj_2, newdata = oj_test, type = "class")
confusionMatrix(oj_preds_2, oj_test$Purchase)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  CH  MM
##         CH 158  24
##         MM  37 101
##                                          
##                Accuracy : 0.8094         
##                  95% CI : (0.762, 0.8509)
##     No Information Rate : 0.6094         
##     P-Value [Acc > NIR] : 1.07e-14       
##                                          
##                   Kappa : 0.6069         
##                                          
##  Mcnemar's Test P-Value : 0.1244         
##                                          
##             Sensitivity : 0.8103         
##             Specificity : 0.8080         
##          Pos Pred Value : 0.8681         
##          Neg Pred Value : 0.7319         
##              Prevalence : 0.6094         
##          Detection Rate : 0.4938         
##    Detection Prevalence : 0.5687         
##       Balanced Accuracy : 0.8091         
##                                          
##        'Positive' Class : CH             
## 
mean(oj_preds_2 != oj_test$Purchase)
## [1] 0.190625

(f) Apply the cv.tree() function to the training set in order to determine the optimal tree size.

set.seed(1)
cv_oj = tree::cv.tree(tree_oj_2, FUN = prune.misclass)
cv_oj
## $size
## [1] 11 10  8  7  4  2  1
## 
## $dev
## [1] 155 155 155 159 163 169 292
## 
## $k
## [1]       -Inf   0.000000   1.500000   3.000000   3.666667   7.500000 141.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"
best_size = cv_oj$size[which.min(cv_oj$dev)]
best_size
## [1] 11

(g) Produce a plot with tree size on the x-axis and cross-validated classification error rate on the y-axis.

plot(cv_oj$size, cv_oj$dev, type = "b", 
     xlab = "Tree Size (Number of Terminal Nodes)", 
     ylab = "Cross-Validated Misclassification Error",
     main = "CV Error vs Tree Size")

(h) Which tree size corresponds to the lowest cross-validated classification error rate? 11

(i) Produce a pruned tree corresponding to the optimal tree size obtained using cross-validation. If cross-validation does not lead to selection of a pruned tree, then create a pruned tree with five terminal nodes.

pruned_tree = prune.misclass(tree_oj_2, best = 5)
plot(pruned_tree)
text(pruned_tree, pretty = 0)

(j) Compare the training error rates between the pruned and unpruned trees. Which is higher?

prune_preds = predict(pruned_tree, newdata = oj_train, type = "class")
mean(prune_preds != oj_train$Purchase)
## [1] 0.1666667

Pruned training error: 16.67%

Unpruned training error: 15.87% (from the tree() function)

(k) Compare the test error rates between the pruned and unpruned trees. Which is higher?

prune_preds = predict(pruned_tree, newdata = oj_test, type = "class")
mean(prune_preds != oj_test$Purchase)
## [1] 0.159375

Pruned test error: 15.84%

Unpruned test error: 19.10% (from the tree() function)