Problem 3

Use Gini Index to create a plot that displays each of these quantities.

p <- seq(0, 1, 0.01)
gini.index <- 2 * p * (1 - p)
class.error <- 1 - pmax(p, 1 - p)
entropy <- -(p * log(p) + (1 - p) * log(1 -p))
matplot(p, cbind(class.error, gini.index, entropy), col = c("purple", "blue", "pink"), pch = 16, main = "classification tree measures", xlab = "p-hat_mk values", ylab = "splitting criterion")
legend("bottom", pch = 12, title = "measures", col = c("pink", "purple", "blue"), legend = c("calssifcation error", "Gini index", "Entropy"), box.lty = 1)

Problem 8

Use Carseats data to predict Sales using regression trees and related approaches.

Part A

Split the data into training and testing sets.

attach(Carseats)

set.seed(1)
train <- sample(1:nrow(Carseats), nrow(Carseats)/2)
Carseats.train <- Carseats[train, ]
Carseats.test <- Carseats[-train, ]

Part B

Fit a regression tree to the training set. Plot the tree - what is the test MSE?

Carseats.tree <- tree(Sales ~ ., data = Carseats.train)
summary(Carseats.tree)
## 
## Regression tree:
## tree(formula = Sales ~ ., data = Carseats.train)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Age"         "Advertising" "CompPrice"  
## [6] "US"         
## Number of terminal nodes:  18 
## Residual mean deviance:  2.167 = 394.3 / 182 
## Distribution of residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.88200 -0.88200 -0.08712  0.00000  0.89590  4.09900

Variables for the Tree: Shelveloc Price Age Advertising ComPrice US

plot(Carseats.tree)
text(Carseats.tree, pretty = 0)

pred <- predict(Carseats.tree, newdata = Carseats.test)
mean((pred - Carseats.test$Sales)^2)
## [1] 4.922039

Test MSE = 4.922

Part C

Cross Validation to determine optimal level of tree complexity - determine if pruning the tree improves the test MSE.

set.seed(1)
cv <- cv.tree(Carseats.tree)
plot(cv$size, cv$dev, type = "b")

tree.min <- which.min(cv$dev)
tree.min
## [1] 1
Carseats.prune <- prune.tree(Carseats.tree, best = 5)
plot(Carseats.prune)
text(Carseats.prune, pretty = 0)

pred2 <- predict(Carseats.prune, newdata = Carseats.test)
mean((pred2 - Carseats.test$Sales)^2)
## [1] 5.186482

Test MSE = 5.186 (greater than previous MSE)

Part D

Bagging approach to analyze data.

Carseats.bag <- randomForest(Sales ~ ., data = Carseats.train, mtry = 10, ntree = 500, importance = TRUE)
yhat.bag <- predict (Carseats.bag, newdata = Carseats.test)
mean((yhat.bag - Carseats.test$Sales)^2)
## [1] 2.588149

Test MSE = 2.588 (better than previously obtained from Regression Tree Model)

importance(Carseats.bag)
##                %IncMSE IncNodePurity
## CompPrice   25.6142224    170.664874
## Income       4.5915046     91.047400
## Advertising 12.9597786     98.653166
## Population  -0.7823257     57.653647
## Price       55.2302897    510.311641
## ShelveLoc   48.5480002    381.630885
## Age         16.7267043    158.261715
## Education    0.9105144     43.460189
## Urban        0.2260420      9.347446
## US           5.8455044     18.261630

Most Important Variables: Price ShelveLoc

Part E

Use Random Forest to analyze the data.

set.seed(1)
Carseats.rf <- randomForest(Sales ~ ., data = Carseats.train, mtry = 3, ntree = 500, importance = TRUE)
Carseats.rf
## 
## Call:
##  randomForest(formula = Sales ~ ., data = Carseats.train, mtry = 3,      ntree = 500, importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##           Mean of squared residuals: 3.363781
##                     % Var explained: 57.22
yhat.rf <- predict(Carseats.rf, newdata = Carseats.test)
mean((yhat.rf - Carseats.test$Sales)^2)
## [1] 2.960559

Test MSE = 2.961

importance(Carseats.rf)
##                %IncMSE IncNodePurity
## CompPrice   14.8840765     158.82956
## Income       4.3293950     125.64850
## Advertising  8.2215192     107.51700
## Population  -0.9488134      97.06024
## Price       34.9793386     385.93142
## ShelveLoc   34.9248499     298.54210
## Age         14.3055912     178.42061
## Education    1.3117842      70.49202
## Urban       -1.2680807      17.39986
## US           6.1139696      33.98963
detach(Carseats)

Problem 9

Use the OJ dataset in ISLR package.

attach(OJ)

Part A

Create a training and testing set.

set.seed(1)
train <- sample(1:nrow(OJ), 800)
oj.train <- OJ[train, ]
oj.test <- OJ[-train, ]

Part B

Fit a tree to the training data with Purchase as the response.

oj.tree <- tree(Purchase ~ ., data = oj.train, method = "class")
summary(oj.tree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = oj.train, method = "class")
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "SpecialCH"     "ListPriceDiff"
## [5] "PctDiscMM"    
## Number of terminal nodes:  9 
## Residual mean deviance:  0.7432 = 587.8 / 791 
## Misclassification error rate: 0.1588 = 127 / 800

Terminal Nodes: 9 Training Error Rate: 0.16

Part C

Type in the name of the tree object in order to get a detailed text output.

oj.tree
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 800 1073.00 CH ( 0.60625 0.39375 )  
##    2) LoyalCH < 0.5036 365  441.60 MM ( 0.29315 0.70685 )  
##      4) LoyalCH < 0.280875 177  140.50 MM ( 0.13559 0.86441 )  
##        8) LoyalCH < 0.0356415 59   10.14 MM ( 0.01695 0.98305 ) *
##        9) LoyalCH > 0.0356415 118  116.40 MM ( 0.19492 0.80508 ) *
##      5) LoyalCH > 0.280875 188  258.00 MM ( 0.44149 0.55851 )  
##       10) PriceDiff < 0.05 79   84.79 MM ( 0.22785 0.77215 )  
##         20) SpecialCH < 0.5 64   51.98 MM ( 0.14062 0.85938 ) *
##         21) SpecialCH > 0.5 15   20.19 CH ( 0.60000 0.40000 ) *
##       11) PriceDiff > 0.05 109  147.00 CH ( 0.59633 0.40367 ) *
##    3) LoyalCH > 0.5036 435  337.90 CH ( 0.86897 0.13103 )  
##      6) LoyalCH < 0.764572 174  201.00 CH ( 0.73563 0.26437 )  
##       12) ListPriceDiff < 0.235 72   99.81 MM ( 0.50000 0.50000 )  
##         24) PctDiscMM < 0.196197 55   73.14 CH ( 0.61818 0.38182 ) *
##         25) PctDiscMM > 0.196197 17   12.32 MM ( 0.11765 0.88235 ) *
##       13) ListPriceDiff > 0.235 102   65.43 CH ( 0.90196 0.09804 ) *
##      7) LoyalCH > 0.764572 261   91.20 CH ( 0.95785 0.04215 ) *

Part D

Create a plot of the tree.

plot(oj.tree)
text(oj.tree, pretty = 0)

Part E

Predict the response on the test data. Produce a confusion matrix.

tree.pred <- predict(oj.tree, oj.test, type = "class")
table(tree.pred, oj.test$Purchase)
##          
## tree.pred  CH  MM
##        CH 160  38
##        MM   8  64
(8+38)/(160+38+8+64)
## [1] 0.1703704

Test Error Rate: 0.17

Part F

Determine optimal tree size.

cv.oj <- cv.tree(oj.tree, FUN = prune.misclass)
cv.oj
## $size
## [1] 9 8 7 4 2 1
## 
## $dev
## [1] 150 150 149 158 172 315
## 
## $k
## [1]       -Inf   0.000000   3.000000   4.333333  10.500000 151.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

Part G

Produce a plot with the tree size and cross validated classification error rate.

plot(cv.oj$size, cv.oj$dev, type = "b", xlab = "tree size", ylab = "cross validated classification error rate")

Part H

Which tree size corresponds to the lowest cross validated classification error rate?

Tree Size 7 has lowest cross validated classification error rate.

Part I

Produce a pruned tree corresponding to the optimal tree size obtained using cross validation.

oj.prune <- prune.misclass(oj.tree, best = 7)
plot(oj.prune)
text(oj.prune, pretty = 0)

Part J

Compare the training error rates between the pruned and unpruned trees.

summary(oj.tree)
## 
## Classification tree:
## tree(formula = Purchase ~ ., data = oj.train, method = "class")
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "SpecialCH"     "ListPriceDiff"
## [5] "PctDiscMM"    
## Number of terminal nodes:  9 
## Residual mean deviance:  0.7432 = 587.8 / 791 
## Misclassification error rate: 0.1588 = 127 / 800

Part K

Compare the test error rates between the pruned and unpruned trees.

prune.pred <- predict(oj.prune, oj.test, type = "class")
table(prune.pred, oj.test$Purchase)
##           
## prune.pred  CH  MM
##         CH 160  36
##         MM   8  66
(8+36)/(160+36+8+66)
## [1] 0.162963