#Greeshma Ganji
#ISTE 780
#Summer 2023
#Lab 5
#PART - I
# 1) creating a training set containing a random sample of 800 observations, and a test set containing the remaining observation
library(ISLR)
library(tree)
data(OJ)
set.seed(1)
train <- sample(1:nrow(OJ), 800)
OJ.train <- OJ[train, ]
OJ.test <- OJ[-train, ]
# 2) Fitting a tree to the training data, with Purchase as the response and the other variables except for Buy as predictors.
tree.oj <- tree(Purchase ~ ., data = OJ.train)
summary(tree.oj)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
# The fitted tree has 9 terminal nodes and a training error rate of 0.1588
# 3) Tree object
tree.oj
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 800 1073.00 CH ( 0.60625 0.39375 )
## 2) LoyalCH < 0.5036 365 441.60 MM ( 0.29315 0.70685 )
## 4) LoyalCH < 0.280875 177 140.50 MM ( 0.13559 0.86441 )
## 8) LoyalCH < 0.0356415 59 10.14 MM ( 0.01695 0.98305 ) *
## 9) LoyalCH > 0.0356415 118 116.40 MM ( 0.19492 0.80508 ) *
## 5) LoyalCH > 0.280875 188 258.00 MM ( 0.44149 0.55851 )
## 10) PriceDiff < 0.05 79 84.79 MM ( 0.22785 0.77215 )
## 20) SpecialCH < 0.5 64 51.98 MM ( 0.14062 0.85938 ) *
## 21) SpecialCH > 0.5 15 20.19 CH ( 0.60000 0.40000 ) *
## 11) PriceDiff > 0.05 109 147.00 CH ( 0.59633 0.40367 ) *
## 3) LoyalCH > 0.5036 435 337.90 CH ( 0.86897 0.13103 )
## 6) LoyalCH < 0.764572 174 201.00 CH ( 0.73563 0.26437 )
## 12) ListPriceDiff < 0.235 72 99.81 MM ( 0.50000 0.50000 )
## 24) PctDiscMM < 0.196196 55 73.14 CH ( 0.61818 0.38182 ) *
## 25) PctDiscMM > 0.196196 17 12.32 MM ( 0.11765 0.88235 ) *
## 13) ListPriceDiff > 0.235 102 65.43 CH ( 0.90196 0.09804 ) *
## 7) LoyalCH > 0.764572 261 91.20 CH ( 0.95785 0.04215 ) *
# 4) plotting the tree, and interpreting the results
plot(tree.oj)
text(tree.oj, pretty = 0)

# 5) Predicting the response on the test data
tree.pred <- predict(tree.oj, OJ.test, type = "class")
confusion <- table(tree.pred, OJ.test$Purchase)
confusion
##
## tree.pred CH MM
## CH 160 38
## MM 8 64
test_error_rate <- (confusion[1, 2] + confusion[2, 1]) / sum(confusion)
test_error_rate
## [1] 0.1703704
# The test error rate is about 17%
# 6) Determining the optimal size tree.
cv.oj <- cv.tree(tree.oj, FUN = prune.misclass)
cv.oj
## $size
## [1] 9 8 7 4 2 1
##
## $dev
## [1] 150 150 149 158 172 315
##
## $k
## [1] -Inf 0.000000 3.000000 4.333333 10.500000 151.000000
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
# 7) Producing a plot with tree size on the x-axis and cross-validated classification error rate on the y-axis.
plot(cv.oj$size, cv.oj$dev, type = "b", xlab = "Tree size", ylab = "Deviance")

# 8) Which tree size corresponds to the lowest cross-validated classification error rate ?
# 7-node tree is the smallest tree with the lowest classification error rate.
# 9) Producing a pruned tree
prune.oj <- prune.misclass(tree.oj, best = 7)
plot(prune.oj)
text(prune.oj, pretty = 0)

# 10) Comparing the training error rates between the pruned and un-pruned trees.
summary(tree.oj)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
summary(prune.oj)
##
## Classification tree:
## snip.tree(tree = tree.oj, nodes = c(4L, 10L))
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "ListPriceDiff" "PctDiscMM"
## Number of terminal nodes: 7
## Residual mean deviance: 0.7748 = 614.4 / 793
## Misclassification error rate: 0.1625 = 130 / 800
# The misclassification error rate is slightly higher for the pruned tree.
# 11) Comparing the test error rates between the pruned and un-pruned trees
prune.pred <- predict(prune.oj, OJ.test, type = "class")
confusion_prune <- table(prune.pred, OJ.test$Purchase)
confusion_prune
##
## prune.pred CH MM
## CH 160 36
## MM 8 66
test_error_rate <- (confusion_prune[1, 2] + confusion_prune[2, 1]) / sum(confusion_prune)
test_error_rate
## [1] 0.162963