7

library(ISLR)
set.seed(1)
train = sample(1:nrow(Carseats), nrow(Carseats) / 2)
Car.train = Carseats[train, ]
Car.test = Carseats[-train,]
library(tree)
reg.tree = tree(Sales~.,data = Carseats, subset=train)
reg.tree = tree(Sales~.,data = Car.train)

summary(reg.tree)

## 
## Regression tree:
## tree(formula = Sales ~ ., data = Car.train)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Age"         "Advertising" "CompPrice"  
## [6] "US"         
## Number of terminal nodes:  18 
## Residual mean deviance:  2.167 = 394.3 / 182 
## Distribution of residuals:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.88200 -0.88200 -0.08712  0.00000  0.89590  4.09900

plot(reg.tree)
text(reg.tree ,pretty =0)

yhat = predict(reg.tree,newdata = Car.test)
mean((yhat - Car.test$Sales)^2)

## [1] 4.922039

set.seed(1)
cv.car = cv.tree(reg.tree)
plot(cv.car$size, cv.car$dev, type = "b")

prune.car = prune.tree(reg.tree, best = 8)
plot(prune.car)
text(prune.car,pretty=0)

yhat=predict(prune.car, newdata= Car.test)
mean((yhat-Car.test$Sales)^2)

## [1] 5.113254

library(randomForest)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

set.seed(1)
bag.car = randomForest(Sales~.,data=Car.train,mtry = 10, importance = TRUE)
yhat.bag = predict(bag.car,newdata=Car.test)
mean((yhat.bag-Car.test$Sales)^2)

## [1] 2.605253

importance(bag.car)

##                %IncMSE IncNodePurity
## CompPrice   24.8888481    170.182937
## Income       4.7121131     91.264880
## Advertising 12.7692401     97.164338
## Population  -1.8074075     58.244596
## Price       56.3326252    502.903407
## ShelveLoc   48.8886689    380.032715
## Age         17.7275460    157.846774
## Education    0.5962186     44.598731
## Urban        0.1728373      9.822082
## US           4.2172102     18.073863

varImpPlot(bag.car)

library(randomForest)
set.seed(1)
rf.car = randomForest(Sales~.,data=Car.train,mtry = 3, importance = TRUE)
yhat.rf = predict(rf.car,newdata=Car.test)
mean((yhat.rf-Car.test$Sales)^2)

## [1] 2.960559

library(ISLR)
set.seed(1)
train = sample(dim(OJ)[1],800)
OJ.train = OJ[train,]
OJ.test = OJ[-train,]
OJ.tree = tree(Purchase~., data=OJ.train)
summary(OJ.tree)

## 
## Classification tree:
## tree(formula = Purchase ~ ., data = OJ.train)
## Variables actually used in tree construction:
## [1] "LoyalCH"       "PriceDiff"     "SpecialCH"     "ListPriceDiff"
## [5] "PctDiscMM"    
## Number of terminal nodes:  9 
## Residual mean deviance:  0.7432 = 587.8 / 791 
## Misclassification error rate: 0.1588 = 127 / 800

OJ.tree

## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 800 1073.00 CH ( 0.60625 0.39375 )  
##    2) LoyalCH < 0.5036 365  441.60 MM ( 0.29315 0.70685 )  
##      4) LoyalCH < 0.280875 177  140.50 MM ( 0.13559 0.86441 )  
##        8) LoyalCH < 0.0356415 59   10.14 MM ( 0.01695 0.98305 ) *
##        9) LoyalCH > 0.0356415 118  116.40 MM ( 0.19492 0.80508 ) *
##      5) LoyalCH > 0.280875 188  258.00 MM ( 0.44149 0.55851 )  
##       10) PriceDiff < 0.05 79   84.79 MM ( 0.22785 0.77215 )  
##         20) SpecialCH < 0.5 64   51.98 MM ( 0.14062 0.85938 ) *
##         21) SpecialCH > 0.5 15   20.19 CH ( 0.60000 0.40000 ) *
##       11) PriceDiff > 0.05 109  147.00 CH ( 0.59633 0.40367 ) *
##    3) LoyalCH > 0.5036 435  337.90 CH ( 0.86897 0.13103 )  
##      6) LoyalCH < 0.764572 174  201.00 CH ( 0.73563 0.26437 )  
##       12) ListPriceDiff < 0.235 72   99.81 MM ( 0.50000 0.50000 )  
##         24) PctDiscMM < 0.196196 55   73.14 CH ( 0.61818 0.38182 ) *
##         25) PctDiscMM > 0.196196 17   12.32 MM ( 0.11765 0.88235 ) *
##       13) ListPriceDiff > 0.235 102   65.43 CH ( 0.90196 0.09804 ) *
##      7) LoyalCH > 0.764572 261   91.20 CH ( 0.95785 0.04215 ) *

plot(OJ.tree)
text(OJ.tree,pretty=TRUE)

tree.pred = predict(OJ.tree, newdata = OJ.test, type = "class")
table(tree.pred,OJ.test$Purchase)

##          
## tree.pred  CH  MM
##        CH 160  38
##        MM   8  64

cv.OJ = cv.tree(OJ.tree, FUN = prune.misclass)
cv.OJ

## $size
## [1] 9 8 7 4 2 1
## 
## $dev
## [1] 150 150 149 158 172 315
## 
## $k
## [1]       -Inf   0.000000   3.000000   4.333333  10.500000 151.000000
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

plot(cv.OJ$size,cv.OJ$dev,type='b', xlab = "Tree size", ylab = "Deviance")

prune.OJ = prune.misclass(OJ.tree, best=5)
plot(prune.OJ)
text(prune.OJ,pretty=0)

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

7

Scott Schier

11/28/2021

R Markdown

Including Plots