## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
## Загрузка требуемого пакета: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Присоединяю пакет: 'randomForest'
## Следующий объект скрыт от 'package:ggplot2':
##
## margin
## Loaded gbm 2.1.8.1
## 'data.frame': 3000 obs. of 11 variables:
## $ year : int 2006 2004 2003 2003 2005 2008 2009 2008 2006 2004 ...
## $ age : int 18 24 45 43 50 54 44 30 41 52 ...
## $ maritl : Factor w/ 5 levels "1. Never Married",..: 1 1 2 2 4 2 2 1 1 2 ...
## $ race : Factor w/ 4 levels "1. White","2. Black",..: 1 1 1 3 1 1 4 3 2 1 ...
## $ education : Factor w/ 5 levels "1. < HS Grad",..: 1 4 3 4 2 4 3 3 3 2 ...
## $ region : Factor w/ 9 levels "1. New England",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ jobclass : Factor w/ 2 levels "1. Industrial",..: 1 2 1 2 2 2 1 2 2 2 ...
## $ health : Factor w/ 2 levels "1. <=Good","2. >=Very Good": 1 2 1 2 1 2 2 1 2 2 ...
## $ health_ins: Factor w/ 2 levels "1. Yes","2. No": 2 2 1 1 1 1 1 1 1 1 ...
## $ logwage : num 4.32 4.26 4.88 5.04 4.32 ...
## $ wage : num 75 70.5 131 154.7 75 ...
## [1] "year" "age" "maritl" "race" "education"
## [6] "region" "jobclass" "health" "health_ins" "logwage"
## [11] "wage"
## [1] "year" "age" "maritl" "race" "education"
## [6] "jobclass" "health" "health_ins" "wage"
## [1] "year" "age" "maritl" "race" "education"
## [6] "jobclass" "health" "health_ins" "wage" "High"


##
## Classification tree:
## tree(formula = High ~ . - wage, data = data)
## Variables actually used in tree construction:
## [1] "education" "maritl" "age" "health_ins"
## Number of terminal nodes: 6
## Residual mean deviance: 0.9079 = 2718 / 2994
## Misclassification error rate: 0.209 = 627 / 3000

## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 3000 3428.0 0 ( 0.74167 0.25833 )
## 2) education: 1. < HS Grad,2. HS Grad,3. Some College 1889 1427.0 0 ( 0.87454 0.12546 )
## 4) maritl: 1. Never Married,3. Widowed,5. Separated 502 105.8 0 ( 0.97809 0.02191 ) *
## 5) maritl: 2. Married,4. Divorced 1387 1233.0 0 ( 0.83706 0.16294 )
## 10) education: 1. < HS Grad,2. HS Grad 914 660.0 0 ( 0.88293 0.11707 ) *
## 11) education: 3. Some College 473 533.6 0 ( 0.74841 0.25159 ) *
## 3) education: 4. College Grad,5. Advanced Degree 1111 1539.0 0 ( 0.51575 0.48425 )
## 6) age < 33.5 227 246.7 0 ( 0.76652 0.23348 ) *
## 7) age > 33.5 884 1217.0 1 ( 0.45136 0.54864 )
## 14) health_ins: 1. Yes 708 950.3 1 ( 0.39548 0.60452 ) *
## 15) health_ins: 2. No 176 221.7 0 ( 0.67614 0.32386 ) *
##
## Classification tree:
## tree(formula = High ~ . - wage, data = data, subset = train)
## Variables actually used in tree construction:
## [1] "education" "age" "health" "jobclass" "maritl"
## [6] "race" "health_ins"
## Number of terminal nodes: 11
## Residual mean deviance: 0.6745 = 127.5 / 189
## Misclassification error rate: 0.19 = 38 / 200
## High.test
## tree.pred 0 1
## 0 1997 664
## 1 73 66
## Wage.class.tree.all
## 0.7367857
## [1] "size" "dev" "k" "method"
## $size
## [1] 11 4 1
##
## $dev
## [1] 60 58 52
##
## $k
## [1] -Inf 0.000000 2.333333
##
## $method
## [1] "misclass"
##
## attr(,"class")
## [1] "prune" "tree.sequence"

## 0 1
## 2669 131
## High.test
## tree.pred 0 1
## 0 2003 666
## 1 67 64
## Wage.class.tree.all Wage.class.tree.5
## 0.7367857 0.7382143
##
## Call:
## randomForest(formula = wage ~ ., data = data, mtry = 8, importance = TRUE, ntree = 500, subset = train)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 8
##
## Mean of squared residuals: 669.8182
## % Var explained: 54.77

## Wage.bag.8
## 802.3113
##
## Call:
## randomForest(formula = wage ~ ., data = data, mtry = 9, importance = TRUE, ntree = 1000, subset = train)
## Type of random forest: regression
## Number of trees: 1000
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 670.4685
## % Var explained: 54.72
## Wage.bag.8 Wage.bag.9.tr1000
## 802.3113 824.8688
##
## Call:
## randomForest(formula = wage ~ ., data = data, mtry = 2, importance = TRUE, ntree = 50, subset = train)
## Type of random forest: regression
## Number of trees: 50
## No. of variables tried at each split: 2
##
## Mean of squared residuals: 656.3471
## % Var explained: 55.68

## Wage.bag.8 Wage.bag.9.tr1000 Wage.bag.2.tr50
## 802.3113 824.8688 752.1208