library(MASS)
library(ISLR)
library(car)
library(dplyr)
library(ggplot2)
library(caret)
library(mlbench)
library(caTools)
library (randomForest)
library (tree)
library (gbm)

Fitting Classification Trees

attach (Carseats )
The following object is masked _by_ .GlobalEnv:

    High

The following objects are masked from Carseats (pos = 4):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.16, High.17, High.18, High.19, High.2, High.20, High.3, High.4, High.5,
    High.6, High.7, High.8, High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 5):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.16, High.17, High.18, High.19, High.2, High.3, High.4, High.5, High.6,
    High.7, High.8, High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 6):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.16, High.17, High.18, High.2, High.3, High.4, High.5, High.6, High.7,
    High.8, High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 7):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.16, High.17, High.2, High.3, High.4, High.5, High.6, High.7, High.8,
    High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 12):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.16, High.2, High.3, High.4, High.5, High.6, High.7, High.8, High.9,
    Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 13):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.15, High.2, High.3, High.4, High.5, High.6, High.7, High.8, High.9, Income,
    Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 14):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.14, High.2, High.3, High.4, High.5, High.6, High.7, High.8, High.9, Income, Population,
    Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 16):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.13,
    High.2, High.3, High.4, High.5, High.6, High.7, High.8, High.9, Income, Population, Price,
    Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 17):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.12, High.2,
    High.3, High.4, High.5, High.6, High.7, High.8, High.9, Income, Population, Price, Sales,
    ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 18):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.11, High.2, High.3,
    High.4, High.5, High.6, High.7, High.8, High.9, Income, Population, Price, Sales, ShelveLoc,
    Urban, US

The following objects are masked from Carseats (pos = 19):

    Advertising, Age, CompPrice, Education, High, High.1, High.10, High.2, High.3, High.4, High.5,
    High.6, High.7, High.8, High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 20):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, High.5, High.6,
    High.7, High.8, High.9, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 21):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, High.5, High.6,
    High.7, High.8, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 22):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, High.5, High.6,
    High.7, Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 23):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, High.5, High.6,
    Income, Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 24):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, High.5, Income,
    Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 25):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, High.4, Income,
    Population, Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 26):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, High.3, Income, Population,
    Price, Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 27):

    Advertising, Age, CompPrice, Education, High, High.1, High.2, Income, Population, Price,
    Sales, ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 28):

    Advertising, Age, CompPrice, Education, High, High.1, Income, Population, Price, Sales,
    ShelveLoc, Urban, US

The following objects are masked from Carseats (pos = 29):

    Advertising, Age, CompPrice, Education, High, Income, Population, Price, Sales, ShelveLoc,
    Urban, US

The following objects are masked from Carseats (pos = 30):

    Advertising, Age, CompPrice, Education, Income, Population, Price, Sales, ShelveLoc, Urban, US
High=ifelse (Sales <=8," No"," Yes ")
Carseats =data.frame(Carseats ,High)
tree.carseats =tree(High~.-Sales ,Carseats )
summary (tree.carseats )

Classification tree:
tree(formula = High ~ . - Sales, data = Carseats)
Variables actually used in tree construction:
[1] "High.1"
Number of terminal nodes:  2 
Residual mean deviance:  0 = 0 / 398 
Misclassification error rate: 0 = 0 / 400 
plot(tree.carseats )
text(tree.carseats ,pretty =0)
tree.carseats
node), split, n, deviance, yval, (yprob)
      * denotes terminal node

1) root 400 541.5  No ( 0.59 0.41 )  
  2) High.1:  No 236   0.0  No ( 1.00 0.00 ) *
  3) High.1:  Yes  164   0.0  Yes  ( 0.00 1.00 ) *
set.seed (2)
train=sample (1: nrow(Carseats ), 200)
Carseats.test=Carseats[-train ,]
High.test=High[-train ]
tree.carseats =tree(High~.-Sales ,Carseats ,subset =train )
tree.pred=predict(tree.carseats ,Carseats.test ,type ="class")
table(tree.pred ,High.test)
         High.test
tree.pred  No  Yes 
     No   116     0
     Yes    0    84
set.seed (3)
cv.carseats =cv.tree(tree.carseats ,FUN=prune.misclass )
names(cv.carseats )
[1] "size"   "dev"    "k"      "method"
cv.carseats
$size
[1] 2 1

$dev
[1]  0 80

$k
[1] -Inf   80

$method
[1] "misclass"

attr(,"class")
[1] "prune"         "tree.sequence"
par(mfrow =c(1,2))

plot(cv.carseats$size ,cv.carseats$dev ,type="b")
plot(cv.carseats$k ,cv.carseats$dev ,type="b")

prune.carseats =prune.misclass (tree.carseats ,best =9)
best is bigger than tree size
plot(prune.carseats )
text(prune.carseats ,pretty =0)
tree.pred=predict(prune.carseats , Carseats.test ,type="class")
table(tree.pred ,High.test)
         High.test
tree.pred  No  Yes 
     No   116     0
     Yes    0    84
(94+60) /200
[1] 0.77
prune.carseats =prune.misclass (tree.carseats ,best =15)
best is bigger than tree size
plot(prune.carseats )
text(prune.carseats ,pretty =0)

tree.pred=predict(prune.carseats , Carseats.test ,type="class")
table(tree.pred ,High.test)
         High.test
tree.pred  No  Yes 
     No   116     0
     Yes    0    84

Fitting Regression Trees

set.seed (1)
train = sample (1: nrow(Boston ), nrow(Boston )/2)
tree.boston =tree(medv~.,Boston ,subset =train)
summary (tree.boston )

Regression tree:
tree(formula = medv ~ ., data = Boston, subset = train)
Variables actually used in tree construction:
[1] "lstat" "rm"    "dis"  
Number of terminal nodes:  8 
Residual mean deviance:  12.65 = 3099 / 245 
Distribution of residuals:
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-14.10000  -2.04200  -0.05357   0.00000   1.96000  12.60000 
plot(tree.boston )
text(tree.boston ,pretty =0)

cv.boston =cv.tree(tree.boston )
plot(cv.boston$size ,cv.boston$dev ,type = "b")

prune.boston =prune.tree(tree.boston ,best =5)
plot(prune.boston )
text(prune.boston ,pretty =0)

yhat=predict (tree.boston ,newdata =Boston [-train ,])
boston.test=Boston[-train ," medv"]
plot(yhat ,boston.test)
abline (0,1)

mean((yhat -boston.test)^2)
[1] NaN

Bagging and Random Forests

set.seed (1)
bag.boston =randomForest(medv~.,data=Boston ,subset =train ,mtry=13, importance =TRUE)
bag.boston

Call:
 randomForest(formula = medv ~ ., data = Boston, mtry = 13, importance = TRUE,      subset = train) 
               Type of random forest: regression
                     Number of trees: 500
No. of variables tried at each split: 13

          Mean of squared residuals: 10.93548
                    % Var explained: 86.76
bag.boston =randomForest(medv~.,data=Boston ,subset =train ,mtry=13, ntree =25)
yhat.bag = predict (bag.boston ,newdata =Boston [-train ,])
mean(( yhat.bag -boston.test)^2)
[1] NaN
set.seed(1)
rf.boston =randomForest(medv~.,data=Boston ,subset =train ,
mtry=6, importance =TRUE)
yhat.rf = predict(rf.boston ,newdata =Boston [-train ,])
mean(( yhat.rf -boston.test)^2)
[1] NaN
importance(rf.boston)
          %IncMSE IncNodePurity
crim    12.695306    1122.75895
zn       2.529138      56.27423
indus    9.284108    1065.85954
chas     2.424056      42.83957
nox     13.010749    1155.04063
rm      30.709575    6392.99954
age     11.692832     491.46183
dis     16.638661    1356.61363
rad      3.626382      90.11853
tax      7.587347     372.52726
ptratio 12.816740     953.96323
black    6.247228     345.12155
lstat   29.315419    7124.24707
#varImpPlot(rf.boston)

Boosting

set.seed (1)
boost.boston =gbm(medv~.,data=Boston [train ,], distribution="gaussian",n.trees =5000 , interaction.depth =4)
summary (boost.boston )
par(mfrow =c(1,2))

plot(boost.boston ,i="rm")
plot(boost.boston ,i="lstat")

yhat.boost=predict (boost.boston ,newdata =Boston [-train ,],n.trees =5000)
mean(( yhat.boost -boston.test)^2)
[1] NaN
boost.boston =gbm(medv~.,data=Boston [train ,], distribution="gaussian",n.trees =5000 , interaction.depth =4, shrinkage =0.2,verbose =F)
yhat.boost=predict (boost.boston ,newdata =Boston [-train ,],n.trees =5000)
mean(( yhat.boost -boston.test)^2)
[1] NaN
LS0tDQp0aXRsZTogIkxhYm9yYXRvcmlvIDgiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoTUFTUykNCmxpYnJhcnkoSVNMUikNCmxpYnJhcnkoY2FyKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoY2FyZXQpDQpsaWJyYXJ5KG1sYmVuY2gpDQpsaWJyYXJ5KGNhVG9vbHMpDQpsaWJyYXJ5IChyYW5kb21Gb3Jlc3QpDQpsaWJyYXJ5ICh0cmVlKQ0KbGlicmFyeSAoZ2JtKQ0KYGBgDQojI0ZpdHRpbmcgQ2xhc3NpZmljYXRpb24gVHJlZXMNCmBgYHtyfQ0KYXR0YWNoIChDYXJzZWF0cyApDQpIaWdoPWlmZWxzZSAoU2FsZXMgPD04LCIgTm8iLCIgWWVzICIpDQpDYXJzZWF0cyA9ZGF0YS5mcmFtZShDYXJzZWF0cyAsSGlnaCkNCnRyZWUuY2Fyc2VhdHMgPXRyZWUoSGlnaH4uLVNhbGVzICxDYXJzZWF0cyApDQpzdW1tYXJ5ICh0cmVlLmNhcnNlYXRzICkNCnBsb3QodHJlZS5jYXJzZWF0cyApDQp0ZXh0KHRyZWUuY2Fyc2VhdHMgLHByZXR0eSA9MCkNCnRyZWUuY2Fyc2VhdHMNCnNldC5zZWVkICgyKQ0KdHJhaW49c2FtcGxlICgxOiBucm93KENhcnNlYXRzICksIDIwMCkNCkNhcnNlYXRzLnRlc3Q9Q2Fyc2VhdHNbLXRyYWluICxdDQpIaWdoLnRlc3Q9SGlnaFstdHJhaW4gXQ0KdHJlZS5jYXJzZWF0cyA9dHJlZShIaWdofi4tU2FsZXMgLENhcnNlYXRzICxzdWJzZXQgPXRyYWluICkNCnRyZWUucHJlZD1wcmVkaWN0KHRyZWUuY2Fyc2VhdHMgLENhcnNlYXRzLnRlc3QgLHR5cGUgPSJjbGFzcyIpDQp0YWJsZSh0cmVlLnByZWQgLEhpZ2gudGVzdCkNCnNldC5zZWVkICgzKQ0KY3YuY2Fyc2VhdHMgPWN2LnRyZWUodHJlZS5jYXJzZWF0cyAsRlVOPXBydW5lLm1pc2NsYXNzICkNCm5hbWVzKGN2LmNhcnNlYXRzICkNCmN2LmNhcnNlYXRzDQpwYXIobWZyb3cgPWMoMSwyKSkNCnBsb3QoY3YuY2Fyc2VhdHMkc2l6ZSAsY3YuY2Fyc2VhdHMkZGV2ICx0eXBlPSJiIikNCnBsb3QoY3YuY2Fyc2VhdHMkayAsY3YuY2Fyc2VhdHMkZGV2ICx0eXBlPSJiIikNCnBydW5lLmNhcnNlYXRzID1wcnVuZS5taXNjbGFzcyAodHJlZS5jYXJzZWF0cyAsYmVzdCA9OSkNCnBsb3QocHJ1bmUuY2Fyc2VhdHMgKQ0KdGV4dChwcnVuZS5jYXJzZWF0cyAscHJldHR5ID0wKQ0KdHJlZS5wcmVkPXByZWRpY3QocHJ1bmUuY2Fyc2VhdHMgLCBDYXJzZWF0cy50ZXN0ICx0eXBlPSJjbGFzcyIpDQp0YWJsZSh0cmVlLnByZWQgLEhpZ2gudGVzdCkNCig5NCs2MCkgLzIwMA0KcHJ1bmUuY2Fyc2VhdHMgPXBydW5lLm1pc2NsYXNzICh0cmVlLmNhcnNlYXRzICxiZXN0ID0xNSkNCnBsb3QocHJ1bmUuY2Fyc2VhdHMgKQ0KdGV4dChwcnVuZS5jYXJzZWF0cyAscHJldHR5ID0wKQ0KdHJlZS5wcmVkPXByZWRpY3QocHJ1bmUuY2Fyc2VhdHMgLCBDYXJzZWF0cy50ZXN0ICx0eXBlPSJjbGFzcyIpDQp0YWJsZSh0cmVlLnByZWQgLEhpZ2gudGVzdCkNCmBgYA0KIyNGaXR0aW5nIFJlZ3Jlc3Npb24gVHJlZXMNCmBgYHtyfQ0Kc2V0LnNlZWQgKDEpDQp0cmFpbiA9IHNhbXBsZSAoMTogbnJvdyhCb3N0b24gKSwgbnJvdyhCb3N0b24gKS8yKQ0KdHJlZS5ib3N0b24gPXRyZWUobWVkdn4uLEJvc3RvbiAsc3Vic2V0ID10cmFpbikNCnN1bW1hcnkgKHRyZWUuYm9zdG9uICkNCnBsb3QodHJlZS5ib3N0b24gKQ0KdGV4dCh0cmVlLmJvc3RvbiAscHJldHR5ID0wKQ0KY3YuYm9zdG9uID1jdi50cmVlKHRyZWUuYm9zdG9uICkNCnBsb3QoY3YuYm9zdG9uJHNpemUgLGN2LmJvc3RvbiRkZXYgLHR5cGUgPSAiYiIpDQpwcnVuZS5ib3N0b24gPXBydW5lLnRyZWUodHJlZS5ib3N0b24gLGJlc3QgPTUpDQpwbG90KHBydW5lLmJvc3RvbiApDQp0ZXh0KHBydW5lLmJvc3RvbiAscHJldHR5ID0wKQ0KeWhhdD1wcmVkaWN0ICh0cmVlLmJvc3RvbiAsbmV3ZGF0YSA9Qm9zdG9uIFstdHJhaW4gLF0pDQpib3N0b24udGVzdD1Cb3N0b25bLXRyYWluICwiIG1lZHYiXQ0KcGxvdCh5aGF0ICxib3N0b24udGVzdCkNCmFibGluZSAoMCwxKQ0KbWVhbigoeWhhdCAtYm9zdG9uLnRlc3QpXjIpDQpgYGANCiMjQmFnZ2luZyBhbmQgUmFuZG9tIEZvcmVzdHMNCmBgYHtyfQ0Kc2V0LnNlZWQgKDEpDQpiYWcuYm9zdG9uID1yYW5kb21Gb3Jlc3QobWVkdn4uLGRhdGE9Qm9zdG9uICxzdWJzZXQgPXRyYWluICxtdHJ5PTEzLCBpbXBvcnRhbmNlID1UUlVFKQ0KYmFnLmJvc3Rvbg0KYmFnLmJvc3RvbiA9cmFuZG9tRm9yZXN0KG1lZHZ+LixkYXRhPUJvc3RvbiAsc3Vic2V0ID10cmFpbiAsbXRyeT0xMywgbnRyZWUgPTI1KQ0KeWhhdC5iYWcgPSBwcmVkaWN0IChiYWcuYm9zdG9uICxuZXdkYXRhID1Cb3N0b24gWy10cmFpbiAsXSkNCm1lYW4oKCB5aGF0LmJhZyAtYm9zdG9uLnRlc3QpXjIpDQpzZXQuc2VlZCgxKQ0KcmYuYm9zdG9uID1yYW5kb21Gb3Jlc3QobWVkdn4uLGRhdGE9Qm9zdG9uICxzdWJzZXQgPXRyYWluICwNCm10cnk9NiwgaW1wb3J0YW5jZSA9VFJVRSkNCnloYXQucmYgPSBwcmVkaWN0KHJmLmJvc3RvbiAsbmV3ZGF0YSA9Qm9zdG9uIFstdHJhaW4gLF0pDQptZWFuKCggeWhhdC5yZiAtYm9zdG9uLnRlc3QpXjIpDQppbXBvcnRhbmNlKHJmLmJvc3RvbikNCiN2YXJJbXBQbG90KHJmLmJvc3RvbikNCmBgYA0KIyNCb29zdGluZw0KYGBge3J9DQpzZXQuc2VlZCAoMSkNCmJvb3N0LmJvc3RvbiA9Z2JtKG1lZHZ+LixkYXRhPUJvc3RvbiBbdHJhaW4gLF0sIGRpc3RyaWJ1dGlvbj0iZ2F1c3NpYW4iLG4udHJlZXMgPTUwMDAgLCBpbnRlcmFjdGlvbi5kZXB0aCA9NCkNCnN1bW1hcnkgKGJvb3N0LmJvc3RvbiApDQpwYXIobWZyb3cgPWMoMSwyKSkNCnBsb3QoYm9vc3QuYm9zdG9uICxpPSJybSIpDQpwbG90KGJvb3N0LmJvc3RvbiAsaT0ibHN0YXQiKQ0KeWhhdC5ib29zdD1wcmVkaWN0IChib29zdC5ib3N0b24gLG5ld2RhdGEgPUJvc3RvbiBbLXRyYWluICxdLG4udHJlZXMgPTUwMDApDQptZWFuKCggeWhhdC5ib29zdCAtYm9zdG9uLnRlc3QpXjIpDQpib29zdC5ib3N0b24gPWdibShtZWR2fi4sZGF0YT1Cb3N0b24gW3RyYWluICxdLCBkaXN0cmlidXRpb249ImdhdXNzaWFuIixuLnRyZWVzID01MDAwICwgaW50ZXJhY3Rpb24uZGVwdGggPTQsIHNocmlua2FnZSA9MC4yLHZlcmJvc2UgPUYpDQp5aGF0LmJvb3N0PXByZWRpY3QgKGJvb3N0LmJvc3RvbiAsbmV3ZGF0YSA9Qm9zdG9uIFstdHJhaW4gLF0sbi50cmVlcyA9NTAwMCkNCm1lYW4oKCB5aGF0LmJvb3N0IC1ib3N0b24udGVzdCleMikNCmBgYA==