## Registered S3 method overwritten by 'tree':
##   method     from
##   print.tree cli
## Загрузка требуемого пакета: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Присоединяю пакет: 'randomForest'
## Следующий объект скрыт от 'package:ggplot2':
## 
##     margin
## Loaded gbm 2.1.8.1
## 'data.frame':    3000 obs. of  11 variables:
##  $ year      : int  2006 2004 2003 2003 2005 2008 2009 2008 2006 2004 ...
##  $ age       : int  18 24 45 43 50 54 44 30 41 52 ...
##  $ maritl    : Factor w/ 5 levels "1. Never Married",..: 1 1 2 2 4 2 2 1 1 2 ...
##  $ race      : Factor w/ 4 levels "1. White","2. Black",..: 1 1 1 3 1 1 4 3 2 1 ...
##  $ education : Factor w/ 5 levels "1. < HS Grad",..: 1 4 3 4 2 4 3 3 3 2 ...
##  $ region    : Factor w/ 9 levels "1. New England",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ jobclass  : Factor w/ 2 levels "1. Industrial",..: 1 2 1 2 2 2 1 2 2 2 ...
##  $ health    : Factor w/ 2 levels "1. <=Good","2. >=Very Good": 1 2 1 2 1 2 2 1 2 2 ...
##  $ health_ins: Factor w/ 2 levels "1. Yes","2. No": 2 2 1 1 1 1 1 1 1 1 ...
##  $ logwage   : num  4.32 4.26 4.88 5.04 4.32 ...
##  $ wage      : num  75 70.5 131 154.7 75 ...
##  [1] "year"       "age"        "maritl"     "race"       "education" 
##  [6] "region"     "jobclass"   "health"     "health_ins" "logwage"   
## [11] "wage"
## [1] "year"       "age"        "maritl"     "race"       "education" 
## [6] "jobclass"   "health"     "health_ins" "wage"
##  [1] "year"       "age"        "maritl"     "race"       "education" 
##  [6] "jobclass"   "health"     "health_ins" "wage"       "High"

## 
## Classification tree:
## tree(formula = High ~ . - wage, data = data)
## Variables actually used in tree construction:
## [1] "education"  "maritl"     "age"        "health_ins"
## Number of terminal nodes:  6 
## Residual mean deviance:  0.9079 = 2718 / 2994 
## Misclassification error rate: 0.209 = 627 / 3000

## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 3000 3428.0 0 ( 0.74167 0.25833 )  
##    2) education: 1. < HS Grad,2. HS Grad,3. Some College 1889 1427.0 0 ( 0.87454 0.12546 )  
##      4) maritl: 1. Never Married,3. Widowed,5. Separated 502  105.8 0 ( 0.97809 0.02191 ) *
##      5) maritl: 2. Married,4. Divorced 1387 1233.0 0 ( 0.83706 0.16294 )  
##       10) education: 1. < HS Grad,2. HS Grad 914  660.0 0 ( 0.88293 0.11707 ) *
##       11) education: 3. Some College 473  533.6 0 ( 0.74841 0.25159 ) *
##    3) education: 4. College Grad,5. Advanced Degree 1111 1539.0 0 ( 0.51575 0.48425 )  
##      6) age < 33.5 227  246.7 0 ( 0.76652 0.23348 ) *
##      7) age > 33.5 884 1217.0 1 ( 0.45136 0.54864 )  
##       14) health_ins: 1. Yes 708  950.3 1 ( 0.39548 0.60452 ) *
##       15) health_ins: 2. No 176  221.7 0 ( 0.67614 0.32386 ) *
## 
## Classification tree:
## tree(formula = High ~ . - wage, data = data, subset = train)
## Variables actually used in tree construction:
## [1] "education"  "age"        "health"     "jobclass"   "maritl"    
## [6] "race"       "health_ins"
## Number of terminal nodes:  11 
## Residual mean deviance:  0.6745 = 127.5 / 189 
## Misclassification error rate: 0.19 = 38 / 200
##          High.test
## tree.pred    0    1
##         0 1997  664
##         1   73   66
## Wage.class.tree.all 
##           0.7367857
## [1] "size"   "dev"    "k"      "method"
## $size
## [1] 11  4  1
## 
## $dev
## [1] 60 58 52
## 
## $k
## [1]     -Inf 0.000000 2.333333
## 
## $method
## [1] "misclass"
## 
## attr(,"class")
## [1] "prune"         "tree.sequence"

##    0    1 
## 2669  131
##          High.test
## tree.pred    0    1
##         0 2003  666
##         1   67   64
## Wage.class.tree.all   Wage.class.tree.5 
##           0.7367857           0.7382143
## 
## Call:
##  randomForest(formula = wage ~ ., data = data, mtry = 8, importance = TRUE,      ntree = 500, subset = train) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 8
## 
##           Mean of squared residuals: 669.8182
##                     % Var explained: 54.77

## Wage.bag.8 
##   802.3113
## 
## Call:
##  randomForest(formula = wage ~ ., data = data, mtry = 9, importance = TRUE,      ntree = 1000, subset = train) 
##                Type of random forest: regression
##                      Number of trees: 1000
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 670.4685
##                     % Var explained: 54.72
##        Wage.bag.8 Wage.bag.9.tr1000 
##          802.3113          824.8688
## 
## Call:
##  randomForest(formula = wage ~ ., data = data, mtry = 2, importance = TRUE,      ntree = 50, subset = train) 
##                Type of random forest: regression
##                      Number of trees: 50
## No. of variables tried at each split: 2
## 
##           Mean of squared residuals: 656.3471
##                     % Var explained: 55.68

##        Wage.bag.8 Wage.bag.9.tr1000   Wage.bag.2.tr50 
##          802.3113          824.8688          752.1208