p = seq(0, 1, 0.001)
gini = p * (1 - p) * 2
entropy = -(p * log(p) + (1 - p) * log(1 - p))
class.err = 1 - pmax(p, 1 - p)
matplot(p, cbind(gini, entropy, class.err), col = c("red", "black", "navy"))
library(ISLR2)
library(tree)
attach(Carseats)
set.seed(1)
train=sample(nrow(Carseats),nrow(Carseats)/2)
test=Carseats[-train,"Sales"]
tree.carseats=tree(Sales~.,data=Carseats,subset=train)
summary.carseats=summary(tree.carseats)
summary.carseats
##
## Regression tree:
## tree(formula = Sales ~ ., data = Carseats, subset = train)
## Variables actually used in tree construction:
## [1] "ShelveLoc" "Price" "Age" "Advertising" "CompPrice"
## [6] "US"
## Number of terminal nodes: 18
## Residual mean deviance: 2.167 = 394.3 / 182
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.88200 -0.88200 -0.08712 0.00000 0.89590 4.09900
plot(tree.carseats)
text(tree.carseats,
pretty = 0,
cex = 0.5)
tree.pred=predict(tree.carseats,newdata=Carseats[-train,])
mean((tree.pred-test)^2)
## [1] 4.922039
MSE Test 4.922039
cv.carseats=cv.tree(tree.carseats,FUN=prune.tree)
tree.min=which.min(cv.carseats$dev)
best.node.no=cv.carseats$size[tree.min]
prune.carseats=prune.tree(tree.carseats,best=best.node.no)
prune.pred=predict(prune.carseats, Carseats[-train,])
mean((prune.pred-test)^2)
## [1] 4.922039
plot(cv.carseats$size, cv.carseats$dev, type = "b")
points(best.node.no, cv.carseats$dev[tree.min], col = "navy", cex = 2, pch = 20)
plot(prune.carseats)
text(prune.carseats, pretty = 0)
Using cross-validation to select the appropriate amount of tree complexity and trimming the tree did not appear to increase the Test MSE and generated the same result.
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
set.seed(1)
bag.carseats=randomForest(Sales~.,data=Carseats,mtry=10,subset=train,importance=T)
bag.carseats
##
## Call:
## randomForest(formula = Sales ~ ., data = Carseats, mtry = 10, importance = T, subset = train)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 10
##
## Mean of squared residuals: 2.889221
## % Var explained: 63.26
yhat.bag=predict(bag.carseats,newdata=Carseats[-train,])
mean((yhat.bag-test)^2)
## [1] 2.605253
importance(bag.carseats)
## %IncMSE IncNodePurity
## CompPrice 24.8888481 170.182937
## Income 4.7121131 91.264880
## Advertising 12.7692401 97.164338
## Population -1.8074075 58.244596
## Price 56.3326252 502.903407
## ShelveLoc 48.8886689 380.032715
## Age 17.7275460 157.846774
## Education 0.5962186 44.598731
## Urban 0.1728373 9.822082
## US 4.2172102 18.073863
varImpPlot(bag.carseats)
The result produced after conducting Test MSE of 2.605 which is nearly
half of what the Regression Trees presented. The most important
variables being Price and ShelveLoc.
set.seed(1)
rf5.carseats=randomForest(Sales~.,data=Carseats,mtry=5,subset=train,importance=T)
rf3.carseats=randomForest(Sales~.,data=Carseats,mtry=3,subset=train,importance=T)
rf5.carseats
##
## Call:
## randomForest(formula = Sales ~ ., data = Carseats, mtry = 5, importance = T, subset = train)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 5
##
## Mean of squared residuals: 3.060785
## % Var explained: 61.08
rf3.carseats
##
## Call:
## randomForest(formula = Sales ~ ., data = Carseats, mtry = 3, importance = T, subset = train)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 3
##
## Mean of squared residuals: 3.387281
## % Var explained: 56.92
yhat5.rf=predict(rf5.carseats,newdata=Carseats[-train,])
yhat3.rf=predict(rf3.carseats,newdata=Carseats[-train,])
mean((yhat5.rf-test)^2)
## [1] 2.714168
mean((yhat3.rf-test)^2)
## [1] 3.009147
par(mfrow=c(2,2))
varImpPlot(rf5.carseats)
varImpPlot(rf3.carseats)
Test MSE for the Random Forest method creates 2.71 for yhat 5 and 3.00
for yhat 3.009
library(BART)
## Loading required package: nlme
## Loading required package: nnet
## Loading required package: survival
Test MSE was 1.469
x = Carseats[, 2:11]
y = Carseats[, "Sales"]
xtrain = x[train,]
ytrain = y[train]
xtest = x[test, ]
ytest = y[test]
set.seed(1)
bartfit = gbart(xtrain,
ytrain,
x.test = xtest)
## *****Calling gbart: type=1
## *****Data:
## data:n,p,np: 200, 14, 196
## y1,yn: 2.781850, 1.091850
## x1,x[n*p]: 107.000000, 1.000000
## xp1,xp[np*p]: 121.000000, 0.000000
## *****Number of Trees: 200
## *****Number of Cut Points: 63 ... 1
## *****burn,nd,thin: 100,1000,1
## *****Prior:beta,alpha,tau,nu,lambda,offset: 2,0.95,0.273474,3,0.23074,7.57815
## *****sigma: 1.088371
## *****w (weights): 1.000000 ... 1.000000
## *****Dirichlet:sparse,theta,omega,a,b,rho,augment: 0,0,1,0.5,1,14,0
## *****printevery: 100
##
## MCMC
## done 0 (out of 1100)
## done 100 (out of 1100)
## done 200 (out of 1100)
## done 300 (out of 1100)
## done 400 (out of 1100)
## done 500 (out of 1100)
## done 600 (out of 1100)
## done 700 (out of 1100)
## done 800 (out of 1100)
## done 900 (out of 1100)
## done 1000 (out of 1100)
## time: 4s
## trcnt,tecnt: 1000,1000
yhat.bart = bartfit$yhat.test.mean
mean((ytest - yhat.bart)^2)
## [1] 1.46948
oj = OJ
set.seed(1)
train=sample(1:nrow(OJ),800)
oj.train=OJ[train, ]
oj.test=OJ[-train, ]
oj.tree=tree(Purchase~.,OJ,subset=train)
summary(oj.tree)
##
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ, subset = train)
## Variables actually used in tree construction:
## [1] "LoyalCH" "PriceDiff" "SpecialCH" "ListPriceDiff"
## [5] "PctDiscMM"
## Number of terminal nodes: 9
## Residual mean deviance: 0.7432 = 587.8 / 791
## Misclassification error rate: 0.1588 = 127 / 800
oj.tree
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 800 1073.00 CH ( 0.60625 0.39375 )
## 2) LoyalCH < 0.5036 365 441.60 MM ( 0.29315 0.70685 )
## 4) LoyalCH < 0.280875 177 140.50 MM ( 0.13559 0.86441 )
## 8) LoyalCH < 0.0356415 59 10.14 MM ( 0.01695 0.98305 ) *
## 9) LoyalCH > 0.0356415 118 116.40 MM ( 0.19492 0.80508 ) *
## 5) LoyalCH > 0.280875 188 258.00 MM ( 0.44149 0.55851 )
## 10) PriceDiff < 0.05 79 84.79 MM ( 0.22785 0.77215 )
## 20) SpecialCH < 0.5 64 51.98 MM ( 0.14062 0.85938 ) *
## 21) SpecialCH > 0.5 15 20.19 CH ( 0.60000 0.40000 ) *
## 11) PriceDiff > 0.05 109 147.00 CH ( 0.59633 0.40367 ) *
## 3) LoyalCH > 0.5036 435 337.90 CH ( 0.86897 0.13103 )
## 6) LoyalCH < 0.764572 174 201.00 CH ( 0.73563 0.26437 )
## 12) ListPriceDiff < 0.235 72 99.81 MM ( 0.50000 0.50000 )
## 24) PctDiscMM < 0.196196 55 73.14 CH ( 0.61818 0.38182 ) *
## 25) PctDiscMM > 0.196196 17 12.32 MM ( 0.11765 0.88235 ) *
## 13) ListPriceDiff > 0.235 102 65.43 CH ( 0.90196 0.09804 ) *
## 7) LoyalCH > 0.764572 261 91.20 CH ( 0.95785 0.04215 ) *
plot(oj.tree)
text(oj.tree,
pretty = 0,
cex = 0.5)
oj.pred=predict(oj.tree,oj.test,type='class')
1-mean(oj.pred==oj.test$Purchase)
## [1] 0.1703704
Test MSE is .17 or 17%
oj.cv=cv.tree(oj.tree,FUN=prune.misclass)
oj.cv
## $size
## [1] 9 8 7 4 2 1
##
## $dev
## [1] 150 150 149 158 172 315
##
## $k
## [1] -Inf 0.000000 3.000000 4.333333 10.500000 151.000000
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
plot(oj.cv$size,
oj.cv$dev,
type = "b",
xlab = "Tree Size",
ylab = "CV Error Rate")
tree size 7 has lowest error rate.
oj.pruned=prune.misclass(oj.tree,best=2)
oj.pruned
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 800 1073.0 CH ( 0.6062 0.3937 )
## 2) LoyalCH < 0.5036 365 441.6 MM ( 0.2932 0.7068 ) *
## 3) LoyalCH > 0.5036 435 337.9 CH ( 0.8690 0.1310 ) *
plot(oj.pruned)
text(oj.pruned)
summary(oj.pruned)
##
## Classification tree:
## snip.tree(tree = oj.tree, nodes = 3:2)
## Variables actually used in tree construction:
## [1] "LoyalCH"
## Number of terminal nodes: 2
## Residual mean deviance: 0.9768 = 779.5 / 798
## Misclassification error rate: 0.205 = 164 / 800
Pruned tree is 0.205 and Tree is .17 so pruned tree is higher
pred.unpruned = predict(oj.tree,
oj.test,
type="class")
misclass.unpruned = sum(oj.test$Purchase != pred.unpruned)
misclass.unpruned / length(pred.unpruned)
## [1] 0.1703704
pred.pruned = predict(oj.pruned,
oj.test,
type="class")
misclass.pruned = sum(oj.test$Purchase != pred.pruned)
misclass.pruned / length(pred.pruned)
## [1] 0.1851852
pruned is higher by .015