Problem 3:

p <- seq(0, 1, 0.01)
class_error <- 1 - pmax(p, 1 - p)
gini_index <- 2 * p * (1 - p)
entropy <- -(p * log(p + 1e-6) + (1 - p) * log(1 - p + 1e-6))

plot(p, gini_index, type = "l", col = "green", lwd = 2,
     ylab = "Impurity Index", xlab = expression(hat(p)[m1]), 
     ylim = c(0, 0.8), main = "Impurity Measures as a Function of p")

lines(p, class_error, col = "red", lwd = 2)
lines(p, entropy, col = "blue", lwd = 2)

legend("topright", legend = c("Gini Index", "Classification Error", "Entropy"),
       col = c("green", "red", "blue"), lwd = 2)

Problem 8:
(a) Split the data set into a training set and a test set:

library(ISLR2)
library(tree)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
library(BART)
## Warning: package 'BART' was built under R version 4.4.3
## Loading required package: nlme
## Loading required package: survival
set.seed(1)
train <- sample(1:nrow(Carseats), nrow(Carseats) / 2)

Carseats.train <- Carseats[train, ]
Carseats.test <- Carseats[-train, ]


(b) Fit a regression tree:

tree.carseats <- tree(Sales ~ ., data = Carseats, subset = train)

plot(tree.carseats)
text(tree.carseats, pretty = 0, cex = 0.8)

yhat <- predict(tree.carseats, newdata = Carseats.test)
sales.test <- Carseats.test$Sales
mean((yhat - sales.test)^2)
## [1] 4.922039


(c) Cross-validation and Pruning:

set.seed(1)
cv.carseats <- cv.tree(tree.carseats)

plot(cv.carseats$size, cv.carseats$dev, type = "b", 
     xlab = "Tree Size", ylab = "CV Error")

prune.carseats <- prune.tree(tree.carseats, best = 9)
plot(prune.carseats)
text(prune.carseats, pretty = 0)

yhat.prune <- predict(prune.carseats, newdata = Carseats.test)
mean((yhat.prune - sales.test)^2)
## [1] 4.918134


Pruning does not improve the test MSE


(d) Bagging:

set.seed(1)
bag.carseats <- randomForest(Sales ~ ., data = Carseats, subset = train, 
                             mtry = 10, importance = TRUE)

yhat.bag <- predict(bag.carseats, newdata = Carseats.test)
mean((yhat.bag - sales.test)^2)
## [1] 2.605253
importance(bag.carseats)
##                %IncMSE IncNodePurity
## CompPrice   24.8888481    170.182937
## Income       4.7121131     91.264880
## Advertising 12.7692401     97.164338
## Population  -1.8074075     58.244596
## Price       56.3326252    502.903407
## ShelveLoc   48.8886689    380.032715
## Age         17.7275460    157.846774
## Education    0.5962186     44.598731
## Urban        0.1728373      9.822082
## US           4.2172102     18.073863
varImpPlot(bag.carseats)


(e) Random Forests:

set.seed(1)
rf.carseats <- randomForest(Sales ~ ., data = Carseats, subset = train, 
                            mtry = 3, importance = TRUE)

yhat.rf <- predict(rf.carseats, newdata = Carseats.test)
mean((yhat.rf - sales.test)^2)
## [1] 2.960559
importance(rf.carseats)
##                %IncMSE IncNodePurity
## CompPrice   14.8840765     158.82956
## Income       4.3293950     125.64850
## Advertising  8.2215192     107.51700
## Population  -0.9488134      97.06024
## Price       34.9793386     385.93142
## ShelveLoc   34.9248499     298.54210
## Age         14.3055912     178.42061
## Education    1.3117842      70.49202
## Urban       -1.2680807      17.39986
## US           6.1139696      33.98963
varImpPlot(rf.carseats)


Effect of m: Random Forests force the trees to look at predictors other than the dominating Price and ShelveLoc. This decorrelates the trees.


(f) BART:

set.seed(1)
x <- Carseats[, -1]
y <- Carseats$Sales 

x.train <- x[train, ]
y.train <- y[train]
x.test <- x[-train, ]

bart.fit <- gbart(x.train, y.train, x.test = x.test)
## *****Calling gbart: type=1
## *****Data:
## data:n,p,np: 200, 14, 200
## y1,yn: 2.781850, 1.091850
## x1,x[n*p]: 107.000000, 1.000000
## xp1,xp[np*p]: 111.000000, 1.000000
## *****Number of Trees: 200
## *****Number of Cut Points: 63 ... 1
## *****burn,nd,thin: 100,1000,1
## *****Prior:beta,alpha,tau,nu,lambda,offset: 2,0.95,0.273474,3,0.23074,7.57815
## *****sigma: 1.088371
## *****w (weights): 1.000000 ... 1.000000
## *****Dirichlet:sparse,theta,omega,a,b,rho,augment: 0,0,1,0.5,1,14,0
## *****printevery: 100
## 
## MCMC
## done 0 (out of 1100)
## done 100 (out of 1100)
## done 200 (out of 1100)
## done 300 (out of 1100)
## done 400 (out of 1100)
## done 500 (out of 1100)
## done 600 (out of 1100)
## done 700 (out of 1100)
## done 800 (out of 1100)
## done 900 (out of 1100)
## done 1000 (out of 1100)
## time: 2s
## trcnt,tecnt: 1000,1000
yhat.bart <- bart.fit$yhat.test.mean
mean((yhat.bart - sales.test)^2)
## [1] 1.450842


Results: The test MSE should drop to approximately 1.4 to 1.6, making it the most highly accurate predictive model out of all the methods tested in this exercise.