library(tree)
library(ISLR)
attach(Carseats)
High<-ifelse(Sales<=8, "NO","YES")
Carseats<-data.frame(Carseats, High)
(tree.Carseats<-tree(High~.-Sales, data=Carseats))
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 400 541.500 NO ( 0.59000 0.41000 )  
##     2) ShelveLoc: Bad,Medium 315 390.600 NO ( 0.68889 0.31111 )  
##       4) Price < 92.5 46  56.530 YES ( 0.30435 0.69565 )  
##         8) Income < 57 10  12.220 NO ( 0.70000 0.30000 )  
##          16) CompPrice < 110.5 5   0.000 NO ( 1.00000 0.00000 ) *
##          17) CompPrice > 110.5 5   6.730 YES ( 0.40000 0.60000 ) *
##         9) Income > 57 36  35.470 YES ( 0.19444 0.80556 )  
##          18) Population < 207.5 16  21.170 YES ( 0.37500 0.62500 ) *
##          19) Population > 207.5 20   7.941 YES ( 0.05000 0.95000 ) *
##       5) Price > 92.5 269 299.800 NO ( 0.75465 0.24535 )  
##        10) Advertising < 13.5 224 213.200 NO ( 0.81696 0.18304 )  
##          20) CompPrice < 124.5 96  44.890 NO ( 0.93750 0.06250 )  
##            40) Price < 106.5 38  33.150 NO ( 0.84211 0.15789 )  
##              80) Population < 177 12  16.300 NO ( 0.58333 0.41667 )  
##               160) Income < 60.5 6   0.000 NO ( 1.00000 0.00000 ) *
##               161) Income > 60.5 6   5.407 YES ( 0.16667 0.83333 ) *
##              81) Population > 177 26   8.477 NO ( 0.96154 0.03846 ) *
##            41) Price > 106.5 58   0.000 NO ( 1.00000 0.00000 ) *
##          21) CompPrice > 124.5 128 150.200 NO ( 0.72656 0.27344 )  
##            42) Price < 122.5 51  70.680 YES ( 0.49020 0.50980 )  
##              84) ShelveLoc: Bad 11   6.702 NO ( 0.90909 0.09091 ) *
##              85) ShelveLoc: Medium 40  52.930 YES ( 0.37500 0.62500 )  
##               170) Price < 109.5 16   7.481 YES ( 0.06250 0.93750 ) *
##               171) Price > 109.5 24  32.600 NO ( 0.58333 0.41667 )  
##                 342) Age < 49.5 13  16.050 YES ( 0.30769 0.69231 ) *
##                 343) Age > 49.5 11   6.702 NO ( 0.90909 0.09091 ) *
##            43) Price > 122.5 77  55.540 NO ( 0.88312 0.11688 )  
##              86) CompPrice < 147.5 58  17.400 NO ( 0.96552 0.03448 ) *
##              87) CompPrice > 147.5 19  25.010 NO ( 0.63158 0.36842 )  
##               174) Price < 147 12  16.300 YES ( 0.41667 0.58333 )  
##                 348) CompPrice < 152.5 7   5.742 YES ( 0.14286 0.85714 ) *
##                 349) CompPrice > 152.5 5   5.004 NO ( 0.80000 0.20000 ) *
##               175) Price > 147 7   0.000 NO ( 1.00000 0.00000 ) *
##        11) Advertising > 13.5 45  61.830 YES ( 0.44444 0.55556 )  
##          22) Age < 54.5 25  25.020 YES ( 0.20000 0.80000 )  
##            44) CompPrice < 130.5 14  18.250 YES ( 0.35714 0.64286 )  
##              88) Income < 100 9  12.370 NO ( 0.55556 0.44444 ) *
##              89) Income > 100 5   0.000 YES ( 0.00000 1.00000 ) *
##            45) CompPrice > 130.5 11   0.000 YES ( 0.00000 1.00000 ) *
##          23) Age > 54.5 20  22.490 NO ( 0.75000 0.25000 )  
##            46) CompPrice < 122.5 10   0.000 NO ( 1.00000 0.00000 ) *
##            47) CompPrice > 122.5 10  13.860 NO ( 0.50000 0.50000 )  
##              94) Price < 125 5   0.000 YES ( 0.00000 1.00000 ) *
##              95) Price > 125 5   0.000 NO ( 1.00000 0.00000 ) *
##     3) ShelveLoc: Good 85  90.330 YES ( 0.22353 0.77647 )  
##       6) Price < 135 68  49.260 YES ( 0.11765 0.88235 )  
##        12) US: No 17  22.070 YES ( 0.35294 0.64706 )  
##          24) Price < 109 8   0.000 YES ( 0.00000 1.00000 ) *
##          25) Price > 109 9  11.460 NO ( 0.66667 0.33333 ) *
##        13) US: Yes 51  16.880 YES ( 0.03922 0.96078 ) *
##       7) Price > 135 17  22.070 NO ( 0.64706 0.35294 )  
##        14) Income < 46 6   0.000 NO ( 1.00000 0.00000 ) *
##        15) Income > 46 11  15.160 YES ( 0.45455 0.54545 ) *
summary(tree.Carseats)
## 
## Classification tree:
## tree(formula = High ~ . - Sales, data = Carseats)
## Variables actually used in tree construction:
## [1] "ShelveLoc"   "Price"       "Income"      "CompPrice"   "Population" 
## [6] "Advertising" "Age"         "US"         
## Number of terminal nodes:  27 
## Residual mean deviance:  0.4575 = 170.7 / 373 
## Misclassification error rate: 0.09 = 36 / 400
plot(tree.Carseats)
text(tree.Carseats, pretty=0)

tree.Carseats
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 400 541.500 NO ( 0.59000 0.41000 )  
##     2) ShelveLoc: Bad,Medium 315 390.600 NO ( 0.68889 0.31111 )  
##       4) Price < 92.5 46  56.530 YES ( 0.30435 0.69565 )  
##         8) Income < 57 10  12.220 NO ( 0.70000 0.30000 )  
##          16) CompPrice < 110.5 5   0.000 NO ( 1.00000 0.00000 ) *
##          17) CompPrice > 110.5 5   6.730 YES ( 0.40000 0.60000 ) *
##         9) Income > 57 36  35.470 YES ( 0.19444 0.80556 )  
##          18) Population < 207.5 16  21.170 YES ( 0.37500 0.62500 ) *
##          19) Population > 207.5 20   7.941 YES ( 0.05000 0.95000 ) *
##       5) Price > 92.5 269 299.800 NO ( 0.75465 0.24535 )  
##        10) Advertising < 13.5 224 213.200 NO ( 0.81696 0.18304 )  
##          20) CompPrice < 124.5 96  44.890 NO ( 0.93750 0.06250 )  
##            40) Price < 106.5 38  33.150 NO ( 0.84211 0.15789 )  
##              80) Population < 177 12  16.300 NO ( 0.58333 0.41667 )  
##               160) Income < 60.5 6   0.000 NO ( 1.00000 0.00000 ) *
##               161) Income > 60.5 6   5.407 YES ( 0.16667 0.83333 ) *
##              81) Population > 177 26   8.477 NO ( 0.96154 0.03846 ) *
##            41) Price > 106.5 58   0.000 NO ( 1.00000 0.00000 ) *
##          21) CompPrice > 124.5 128 150.200 NO ( 0.72656 0.27344 )  
##            42) Price < 122.5 51  70.680 YES ( 0.49020 0.50980 )  
##              84) ShelveLoc: Bad 11   6.702 NO ( 0.90909 0.09091 ) *
##              85) ShelveLoc: Medium 40  52.930 YES ( 0.37500 0.62500 )  
##               170) Price < 109.5 16   7.481 YES ( 0.06250 0.93750 ) *
##               171) Price > 109.5 24  32.600 NO ( 0.58333 0.41667 )  
##                 342) Age < 49.5 13  16.050 YES ( 0.30769 0.69231 ) *
##                 343) Age > 49.5 11   6.702 NO ( 0.90909 0.09091 ) *
##            43) Price > 122.5 77  55.540 NO ( 0.88312 0.11688 )  
##              86) CompPrice < 147.5 58  17.400 NO ( 0.96552 0.03448 ) *
##              87) CompPrice > 147.5 19  25.010 NO ( 0.63158 0.36842 )  
##               174) Price < 147 12  16.300 YES ( 0.41667 0.58333 )  
##                 348) CompPrice < 152.5 7   5.742 YES ( 0.14286 0.85714 ) *
##                 349) CompPrice > 152.5 5   5.004 NO ( 0.80000 0.20000 ) *
##               175) Price > 147 7   0.000 NO ( 1.00000 0.00000 ) *
##        11) Advertising > 13.5 45  61.830 YES ( 0.44444 0.55556 )  
##          22) Age < 54.5 25  25.020 YES ( 0.20000 0.80000 )  
##            44) CompPrice < 130.5 14  18.250 YES ( 0.35714 0.64286 )  
##              88) Income < 100 9  12.370 NO ( 0.55556 0.44444 ) *
##              89) Income > 100 5   0.000 YES ( 0.00000 1.00000 ) *
##            45) CompPrice > 130.5 11   0.000 YES ( 0.00000 1.00000 ) *
##          23) Age > 54.5 20  22.490 NO ( 0.75000 0.25000 )  
##            46) CompPrice < 122.5 10   0.000 NO ( 1.00000 0.00000 ) *
##            47) CompPrice > 122.5 10  13.860 NO ( 0.50000 0.50000 )  
##              94) Price < 125 5   0.000 YES ( 0.00000 1.00000 ) *
##              95) Price > 125 5   0.000 NO ( 1.00000 0.00000 ) *
##     3) ShelveLoc: Good 85  90.330 YES ( 0.22353 0.77647 )  
##       6) Price < 135 68  49.260 YES ( 0.11765 0.88235 )  
##        12) US: No 17  22.070 YES ( 0.35294 0.64706 )  
##          24) Price < 109 8   0.000 YES ( 0.00000 1.00000 ) *
##          25) Price > 109 9  11.460 NO ( 0.66667 0.33333 ) *
##        13) US: Yes 51  16.880 YES ( 0.03922 0.96078 ) *
##       7) Price > 135 17  22.070 NO ( 0.64706 0.35294 )  
##        14) Income < 46 6   0.000 NO ( 1.00000 0.00000 ) *
##        15) Income > 46 11  15.160 YES ( 0.45455 0.54545 ) *
set.seed(3)
train<-sample(1:nrow(Carseats), nrow(Carseats)/2)
test<-(-train)
Carseats.test<-Carseats[test,]
High.test<-High[test]
tree.Carseats<-tree(High~.-Sales, data=Carseats, subset=train)
tree.pred<-predict(tree.Carseats, Carseats.test, type="class")
table(tree.pred, High.test)
##          High.test
## tree.pred NO YES
##       NO  82  23
##       YES 44  51
str(Carseats)
## 'data.frame':    400 obs. of  12 variables:
##  $ Sales      : num  9.5 11.22 10.06 7.4 4.15 ...
##  $ CompPrice  : num  138 111 113 117 141 124 115 136 132 132 ...
##  $ Income     : num  73 48 35 100 64 113 105 81 110 113 ...
##  $ Advertising: num  11 16 10 4 3 13 0 15 0 0 ...
##  $ Population : num  276 260 269 466 340 501 45 425 108 131 ...
##  $ Price      : num  120 83 80 97 128 72 108 120 124 124 ...
##  $ ShelveLoc  : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
##  $ Age        : num  42 65 59 55 38 78 71 67 76 76 ...
##  $ Education  : num  17 10 12 14 13 16 15 10 10 17 ...
##  $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
##  $ US         : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
##  $ High       : Factor w/ 2 levels "NO","YES": 2 2 2 1 1 2 1 2 1 1 ...
library(MASS)
set.seed(1)
train<-sample(1:nrow(Boston),nrow(Boston)/2)
tree.Boston<-tree(medv~., data=Boston, subset=train)
summary(tree.Boston)
## 
## Regression tree:
## tree(formula = medv ~ ., data = Boston, subset = train)
## Variables actually used in tree construction:
## [1] "lstat" "rm"    "dis"  
## Number of terminal nodes:  8 
## Residual mean deviance:  12.65 = 3099 / 245 
## Distribution of residuals:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -14.10000  -2.04200  -0.05357   0.00000   1.96000  12.60000
plot(tree.Boston)
text(tree.Boston, pretty=0)

cv.Boston<-cv.tree(tree.Boston)
plot(cv.Boston$size, cv.Boston$dev, type='b')

prune.Boston<-prune.tree(tree.Boston, best = 5)
plot(prune.Boston)
text(prune.Boston, pretty=0)

yhat<-predict(tree.Boston, newdata=Boston[-train,])
Boston.test<-Boston[-train, "medv"]
plot(yhat, Boston.test)
abline(0,1)

mean((yhat-Boston.test)^2)
## [1] 25.04559