Read data

Census <- read.csv("census.csv")
str(Census)
## 'data.frame':    31978 obs. of  13 variables:
##  $ age          : int  39 50 38 53 28 37 49 52 31 42 ...
##  $ workclass    : Factor w/ 9 levels " ?"," Federal-gov",..: 8 7 5 5 5 5 5 7 5 5 ...
##  $ education    : Factor w/ 16 levels " 10th"," 11th",..: 10 10 12 2 10 13 7 12 13 10 ...
##  $ maritalstatus: Factor w/ 7 levels " Divorced"," Married-AF-spouse",..: 5 3 1 3 3 3 4 3 5 3 ...
##  $ occupation   : Factor w/ 15 levels " ?"," Adm-clerical",..: 2 5 7 7 11 5 9 5 11 5 ...
##  $ relationship : Factor w/ 6 levels " Husband"," Not-in-family",..: 2 1 2 1 6 6 2 1 2 1 ...
##  $ race         : Factor w/ 5 levels " Amer-Indian-Eskimo",..: 5 5 5 3 3 5 3 5 5 5 ...
##  $ sex          : Factor w/ 2 levels " Female"," Male": 2 2 2 2 1 1 1 2 1 2 ...
##  $ capitalgain  : int  2174 0 0 0 0 0 0 0 14084 5178 ...
##  $ capitalloss  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ hoursperweek : int  40 13 40 40 40 40 16 45 50 40 ...
##  $ nativecountry: Factor w/ 41 levels " Cambodia"," Canada",..: 39 39 39 39 5 39 23 39 39 39 ...
##  $ over50k      : Factor w/ 2 levels " <=50K"," >50K": 1 1 1 1 1 1 1 2 2 2 ...

Create test and training sets

library(caTools)
## Warning: package 'caTools' was built under R version 3.1.3
set.seed(2000)
Spl <- sample.split(Census$over50k, SplitRatio=.6)
Train <- subset(Census, Spl == TRUE)
Test <- subset(Census, Spl == FALSE)

Build logistic regression model using all other variables

CensusLog <- glm(over50k ~ ., data=Train, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(CensusLog)
## 
## Call:
## glm(formula = over50k ~ ., family = binomial, data = Train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.1065  -0.5037  -0.1804  -0.0008   3.3383  
## 
## Coefficients: (1 not defined because of singularities)
##                                            Estimate Std. Error z value
## (Intercept)                              -8.658e+00  1.379e+00  -6.279
## age                                       2.548e-02  2.139e-03  11.916
## workclass Federal-gov                     1.105e+00  2.014e-01   5.489
## workclass Local-gov                       3.675e-01  1.821e-01   2.018
## workclass Never-worked                   -1.283e+01  8.453e+02  -0.015
## workclass Private                         6.012e-01  1.626e-01   3.698
## workclass Self-emp-inc                    7.575e-01  1.950e-01   3.884
## workclass Self-emp-not-inc                1.855e-01  1.774e-01   1.046
## workclass State-gov                       4.012e-01  1.961e-01   2.046
## workclass Without-pay                    -1.395e+01  6.597e+02  -0.021
## education 11th                            2.225e-01  2.867e-01   0.776
## education 12th                            6.380e-01  3.597e-01   1.774
## education 1st-4th                        -7.075e-01  7.760e-01  -0.912
## education 5th-6th                        -3.170e-01  4.880e-01  -0.650
## education 7th-8th                        -3.498e-01  3.126e-01  -1.119
## education 9th                            -1.258e-01  3.539e-01  -0.355
## education Assoc-acdm                      1.602e+00  2.427e-01   6.601
## education Assoc-voc                       1.541e+00  2.368e-01   6.506
## education Bachelors                       2.177e+00  2.218e-01   9.817
## education Doctorate                       2.761e+00  2.893e-01   9.544
## education HS-grad                         1.006e+00  2.169e-01   4.638
## education Masters                         2.421e+00  2.353e-01  10.289
## education Preschool                      -2.237e+01  6.864e+02  -0.033
## education Prof-school                     2.938e+00  2.753e-01  10.672
## education Some-college                    1.365e+00  2.195e-01   6.219
## maritalstatus Married-AF-spouse           2.540e+00  7.145e-01   3.555
## maritalstatus Married-civ-spouse          2.458e+00  3.573e-01   6.880
## maritalstatus Married-spouse-absent      -9.486e-02  3.204e-01  -0.296
## maritalstatus Never-married              -4.515e-01  1.139e-01  -3.962
## maritalstatus Separated                   3.609e-02  1.984e-01   0.182
## maritalstatus Widowed                     1.858e-01  1.962e-01   0.947
## occupation Adm-clerical                   9.470e-02  1.288e-01   0.735
## occupation Armed-Forces                  -1.008e+00  1.487e+00  -0.677
## occupation Craft-repair                   2.174e-01  1.109e-01   1.960
## occupation Exec-managerial                9.400e-01  1.138e-01   8.257
## occupation Farming-fishing               -1.068e+00  1.908e-01  -5.599
## occupation Handlers-cleaners             -6.237e-01  1.946e-01  -3.204
## occupation Machine-op-inspct             -1.862e-01  1.376e-01  -1.353
## occupation Other-service                 -8.183e-01  1.641e-01  -4.987
## occupation Priv-house-serv               -1.297e+01  2.267e+02  -0.057
## occupation Prof-specialty                 6.331e-01  1.222e-01   5.180
## occupation Protective-serv                6.267e-01  1.710e-01   3.664
## occupation Sales                          3.276e-01  1.175e-01   2.789
## occupation Tech-support                   6.173e-01  1.533e-01   4.028
## occupation Transport-moving                      NA         NA      NA
## relationship Not-in-family                7.881e-01  3.530e-01   2.233
## relationship Other-relative              -2.194e-01  3.137e-01  -0.699
## relationship Own-child                   -7.489e-01  3.507e-01  -2.136
## relationship Unmarried                    7.041e-01  3.720e-01   1.893
## relationship Wife                         1.324e+00  1.331e-01   9.942
## race Asian-Pac-Islander                   4.830e-01  3.548e-01   1.361
## race Black                                3.644e-01  2.882e-01   1.265
## race Other                                2.204e-01  4.513e-01   0.488
## race White                                4.108e-01  2.737e-01   1.501
## sex Male                                  7.729e-01  1.024e-01   7.545
## capitalgain                               3.280e-04  1.372e-05  23.904
## capitalloss                               6.445e-04  4.854e-05  13.277
## hoursperweek                              2.897e-02  2.101e-03  13.791
## nativecountry Canada                      2.593e-01  1.308e+00   0.198
## nativecountry China                      -9.695e-01  1.327e+00  -0.730
## nativecountry Columbia                   -1.954e+00  1.526e+00  -1.280
## nativecountry Cuba                        5.735e-02  1.323e+00   0.043
## nativecountry Dominican-Republic         -1.435e+01  3.092e+02  -0.046
## nativecountry Ecuador                    -3.550e-02  1.477e+00  -0.024
## nativecountry El-Salvador                -6.095e-01  1.395e+00  -0.437
## nativecountry England                    -6.707e-02  1.327e+00  -0.051
## nativecountry France                      5.301e-01  1.419e+00   0.374
## nativecountry Germany                     5.474e-02  1.306e+00   0.042
## nativecountry Greece                     -2.646e+00  1.714e+00  -1.544
## nativecountry Guatemala                  -1.293e+01  3.345e+02  -0.039
## nativecountry Haiti                      -9.221e-01  1.615e+00  -0.571
## nativecountry Holand-Netherlands         -1.282e+01  2.400e+03  -0.005
## nativecountry Honduras                   -9.584e-01  3.412e+00  -0.281
## nativecountry Hong                       -2.362e-01  1.492e+00  -0.158
## nativecountry Hungary                     1.412e-01  1.555e+00   0.091
## nativecountry India                      -8.218e-01  1.314e+00  -0.625
## nativecountry Iran                       -3.299e-02  1.366e+00  -0.024
## nativecountry Ireland                     1.579e-01  1.473e+00   0.107
## nativecountry Italy                       6.100e-01  1.333e+00   0.458
## nativecountry Jamaica                    -2.279e-01  1.387e+00  -0.164
## nativecountry Japan                       5.072e-01  1.375e+00   0.369
## nativecountry Laos                       -6.831e-01  1.661e+00  -0.411
## nativecountry Mexico                     -9.182e-01  1.303e+00  -0.705
## nativecountry Nicaragua                  -1.987e-01  1.507e+00  -0.132
## nativecountry Outlying-US(Guam-USVI-etc) -1.373e+01  8.502e+02  -0.016
## nativecountry Peru                       -9.660e-01  1.678e+00  -0.576
## nativecountry Philippines                 4.393e-02  1.281e+00   0.034
## nativecountry Poland                      2.410e-01  1.383e+00   0.174
## nativecountry Portugal                    7.276e-01  1.477e+00   0.493
## nativecountry Puerto-Rico                -5.769e-01  1.357e+00  -0.425
## nativecountry Scotland                   -1.188e+00  1.719e+00  -0.691
## nativecountry South                      -8.183e-01  1.341e+00  -0.610
## nativecountry Taiwan                     -2.590e-01  1.350e+00  -0.192
## nativecountry Thailand                   -1.693e+00  1.737e+00  -0.975
## nativecountry Trinadad&Tobago            -1.346e+00  1.721e+00  -0.782
## nativecountry United-States              -8.594e-02  1.269e+00  -0.068
## nativecountry Vietnam                    -1.008e+00  1.523e+00  -0.662
## nativecountry Yugoslavia                  1.402e+00  1.648e+00   0.851
##                                          Pr(>|z|)    
## (Intercept)                              3.41e-10 ***
## age                                       < 2e-16 ***
## workclass Federal-gov                    4.03e-08 ***
## workclass Local-gov                      0.043641 *  
## workclass Never-worked                   0.987885    
## workclass Private                        0.000218 ***
## workclass Self-emp-inc                   0.000103 ***
## workclass Self-emp-not-inc               0.295646    
## workclass State-gov                      0.040728 *  
## workclass Without-pay                    0.983134    
## education 11th                           0.437738    
## education 12th                           0.076064 .  
## education 1st-4th                        0.361897    
## education 5th-6th                        0.516008    
## education 7th-8th                        0.263152    
## education 9th                            0.722228    
## education Assoc-acdm                     4.10e-11 ***
## education Assoc-voc                      7.74e-11 ***
## education Bachelors                       < 2e-16 ***
## education Doctorate                       < 2e-16 ***
## education HS-grad                        3.52e-06 ***
## education Masters                         < 2e-16 ***
## education Preschool                      0.973996    
## education Prof-school                     < 2e-16 ***
## education Some-college                   5.00e-10 ***
## maritalstatus Married-AF-spouse          0.000378 ***
## maritalstatus Married-civ-spouse         6.00e-12 ***
## maritalstatus Married-spouse-absent      0.767155    
## maritalstatus Never-married              7.42e-05 ***
## maritalstatus Separated                  0.855672    
## maritalstatus Widowed                    0.343449    
## occupation Adm-clerical                  0.462064    
## occupation Armed-Forces                  0.498170    
## occupation Craft-repair                  0.049972 *  
## occupation Exec-managerial                < 2e-16 ***
## occupation Farming-fishing               2.15e-08 ***
## occupation Handlers-cleaners             0.001353 ** 
## occupation Machine-op-inspct             0.176061    
## occupation Other-service                 6.14e-07 ***
## occupation Priv-house-serv               0.954385    
## occupation Prof-specialty                2.22e-07 ***
## occupation Protective-serv               0.000248 ***
## occupation Sales                         0.005282 ** 
## occupation Tech-support                  5.63e-05 ***
## occupation Transport-moving                    NA    
## relationship Not-in-family               0.025562 *  
## relationship Other-relative              0.484263    
## relationship Own-child                   0.032716 *  
## relationship Unmarried                   0.058392 .  
## relationship Wife                         < 2e-16 ***
## race Asian-Pac-Islander                  0.173504    
## race Black                               0.206001    
## race Other                               0.625263    
## race White                               0.133356    
## sex Male                                 4.52e-14 ***
## capitalgain                               < 2e-16 ***
## capitalloss                               < 2e-16 ***
## hoursperweek                              < 2e-16 ***
## nativecountry Canada                     0.842879    
## nativecountry China                      0.465157    
## nativecountry Columbia                   0.200470    
## nativecountry Cuba                       0.965432    
## nativecountry Dominican-Republic         0.962972    
## nativecountry Ecuador                    0.980829    
## nativecountry El-Salvador                0.662181    
## nativecountry England                    0.959686    
## nativecountry France                     0.708642    
## nativecountry Germany                    0.966572    
## nativecountry Greece                     0.122527    
## nativecountry Guatemala                  0.969180    
## nativecountry Haiti                      0.568105    
## nativecountry Holand-Netherlands         0.995736    
## nativecountry Honduras                   0.778775    
## nativecountry Hong                       0.874155    
## nativecountry Hungary                    0.927653    
## nativecountry India                      0.531661    
## nativecountry Iran                       0.980736    
## nativecountry Ireland                    0.914628    
## nativecountry Italy                      0.647194    
## nativecountry Jamaica                    0.869467    
## nativecountry Japan                      0.712179    
## nativecountry Laos                       0.680866    
## nativecountry Mexico                     0.481103    
## nativecountry Nicaragua                  0.895132    
## nativecountry Outlying-US(Guam-USVI-etc) 0.987115    
## nativecountry Peru                       0.564797    
## nativecountry Philippines                0.972640    
## nativecountry Poland                     0.861624    
## nativecountry Portugal                   0.622327    
## nativecountry Puerto-Rico                0.670837    
## nativecountry Scotland                   0.489616    
## nativecountry South                      0.541809    
## nativecountry Taiwan                     0.847878    
## nativecountry Thailand                   0.329678    
## nativecountry Trinadad&Tobago            0.434105    
## nativecountry United-States              0.946020    
## nativecountry Vietnam                    0.507799    
## nativecountry Yugoslavia                 0.394874    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 21175  on 19186  degrees of freedom
## Residual deviance: 12104  on 19090  degrees of freedom
## AIC: 12298
## 
## Number of Fisher Scoring iterations: 15

Accuracy of model using threshold of 0.5

PredictLog <- predict(CensusLog, newdata=Test, type="response")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
(confmat <- table(Test$over50k, PredictLog > 0.5))
##         
##          FALSE TRUE
##    <=50K  9051  662
##    >50K   1190 1888
N <- nrow(Test)
sum(diag(confmat)) / N
## [1] 0.8552107

Baseline accuracy:

(table(Train$over50k))
## 
##  <=50K   >50K 
##  14570   4617
9713 / N
## [1] 0.7593621

AUC for test set

library(ROCR)
## Warning: package 'ROCR' was built under R version 3.1.3
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.1.3
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:stats':
## 
##     lowess
pred <- prediction(PredictLog, Test$over50k)
perf <- performance(pred, "tpr", "fpr")
as.numeric(performance(pred, "auc")@y.values)
## [1] 0.9061598

Build CART model using all defaults

library(rpart)
## Warning: package 'rpart' was built under R version 3.1.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.1.3
CensusCART <- rpart(over50k ~ ., data=Train, method="class")
prp(CensusCART)

Accuracy using threshold of 0.5 (2 ways to do same thing):

PredictCART <- predict(CensusCART, newdata=Test, type="class")
confmat <- table(Test$over50k, PredictCART)
(sum(diag(confmat)) / N)
## [1] 0.8473927
PredictCART <- predict(CensusCART, newdata=Test)[,2]
(confmat <- table(Test$over50k, PredictCART > 0.5))
##         
##          FALSE TRUE
##    <=50K  9243  470
##    >50K   1482 1596
sum(diag(confmat)) / N
## [1] 0.8473927

Generate ROC curve

library(ROCR)
PredictROC <- predict(CensusCART, newdata=Test)
pred <- prediction(PredictROC[,2], Test$over50k)
perf <- performance(pred, "tpr", "fpr")
plot(perf)

Compute AUC

as.numeric(performance(pred, "auc")@y.values)
## [1] 0.8470256

Make a smaller training set before running random forest because a large training set will require a lot of memory:

set.seed(1)
TrainSmall <- Train[sample(nrow(Train), 2000),]

Run random forest using all independent variables

library(randomForest)
## Warning: package 'randomForest' was built under R version 3.1.3
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
set.seed(1)
CensusForest <- randomForest(over50k ~ ., data=TrainSmall)

Accuracy of random forest, with threshold 0.5.

PredictForest <- predict(CensusForest, newdata=Test)
(confmat <- table(Test$over50k, PredictForest))
##         PredictForest
##           <=50K  >50K
##    <=50K   9586   127
##    >50K    1985  1093
sum(diag(confmat)) / nrow(Test)
## [1] 0.8348839

Find out the number of times, aggregated over all of the trees in random forest model, that a certain variable is selected for a split:

vu <- varUsed(CensusForest, count=TRUE)
vusorted <- sort(vu, decreasing=FALSE, index.return=TRUE)
dotchart(vusorted$x, names(CensusForest$forest$xlevels[vusorted$ix]))

Impurity - how homogenous each bucket or leaf of the tree is.

varImpPlot(CensusForest)

Select cp parameter of our CART model using k-fold cross validation, with k=10 folds and cp values from 0.002 to 0.1 in 0.002 increments.

set.seed(2)
library(caret)
## Warning: package 'caret' was built under R version 3.1.3
## Loading required package: lattice
## Loading required package: ggplot2
library(e1071)
## Warning: package 'e1071' was built under R version 3.1.3
kfolds <- 10
numFolds <- trainControl(method="cv", number=kfolds)
cpGrid <- expand.grid(.cp=seq(0.002, 0.1, 0.002))
(cpRes <- train(over50k ~ ., data=Train, method="rpart", trControl=numFolds, 
                tuneGrid=cpGrid))
## CART 
## 
## 19187 samples
##    12 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## 
## Summary of sample sizes: 17268, 17269, 17268, 17268, 17268, 17269, ... 
## 
## Resampling results across tuning parameters:
## 
##   cp     Accuracy   Kappa       Accuracy SD   Kappa SD  
##   0.002  0.8508375  0.55418892  0.0079783134  0.02572748
##   0.004  0.8483880  0.55337769  0.0090978011  0.02868258
##   0.006  0.8445315  0.54123823  0.0096617586  0.03233330
##   0.008  0.8435933  0.54042644  0.0094655587  0.03147254
##   0.010  0.8435933  0.54140269  0.0094655587  0.03136220
##   0.012  0.8435933  0.54140269  0.0094655587  0.03136220
##   0.014  0.8435933  0.54140269  0.0094655587  0.03136220
##   0.016  0.8428116  0.53681906  0.0103168751  0.03646328
##   0.018  0.8409353  0.52587747  0.0099473396  0.04023190
##   0.020  0.8401011  0.52059748  0.0082541575  0.03181982
##   0.022  0.8385896  0.50703191  0.0074531758  0.01922360
##   0.024  0.8385896  0.50703191  0.0074531758  0.01922360
##   0.026  0.8389023  0.50505134  0.0069620513  0.02144327
##   0.028  0.8389023  0.50505134  0.0069620513  0.02144327
##   0.030  0.8389023  0.50505134  0.0069620513  0.02144327
##   0.032  0.8362438  0.48904498  0.0058432854  0.01998146
##   0.034  0.8344199  0.47659522  0.0064123113  0.02443506
##   0.036  0.8330126  0.46868826  0.0056838776  0.02310071
##   0.038  0.8246742  0.43210364  0.0072908778  0.02753755
##   0.040  0.8240485  0.42930363  0.0065649913  0.02486563
##   0.042  0.8240485  0.42930363  0.0065649913  0.02486563
##   0.044  0.8240485  0.42930363  0.0065649913  0.02486563
##   0.046  0.8240485  0.42930363  0.0065649913  0.02486563
##   0.048  0.8240485  0.42930363  0.0065649913  0.02486563
##   0.050  0.8203998  0.40161015  0.0055555482  0.04997157
##   0.052  0.8161259  0.36716691  0.0070277942  0.06002374
##   0.054  0.8123214  0.32392652  0.0055354181  0.04550869
##   0.056  0.8123214  0.32392652  0.0055354181  0.04550869
##   0.058  0.8118524  0.30757892  0.0053132172  0.02549082
##   0.060  0.8118524  0.30757892  0.0053132172  0.02549082
##   0.062  0.8118524  0.30757892  0.0053132172  0.02549082
##   0.064  0.8095062  0.29517106  0.0063359730  0.03570646
##   0.066  0.8075771  0.28451429  0.0056423962  0.03432258
##   0.068  0.7981443  0.22868458  0.0055663233  0.03381141
##   0.070  0.7958514  0.21468972  0.0029555869  0.01620357
##   0.072  0.7958514  0.21468972  0.0029555869  0.01620357
##   0.074  0.7958514  0.21468972  0.0029555869  0.01620357
##   0.076  0.7728666  0.07956634  0.0174372406  0.10286117
##   0.078  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.080  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.082  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.084  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.086  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.088  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.090  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.092  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.094  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.096  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.098  0.7593684  0.00000000  0.0001912161  0.00000000
##   0.100  0.7593684  0.00000000  0.0001912161  0.00000000
## 
## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was cp = 0.002.
cpRes$bestTune
##      cp
## 1 0.002

Fit a CART model to the training data using this value of cp. What is the prediction accuracy on the test set?

CensusCV <- rpart(over50k ~ ., data=Train, method="class", cp=cpRes$bestTune)
PredictCV <- predict(CensusCV, newdata=Test, type="class")
(confmat <- table(Test$over50k, PredictCV))
##         PredictCV
##           <=50K  >50K
##    <=50K   9178   535
##    >50K    1240  1838
sum(diag(confmat)) / nrow(Test)
## [1] 0.8612306

Plot CART tree for new model

(summary(CensusCV))
## Call:
## rpart(formula = over50k ~ ., data = Train, method = "class", 
##     cp = cpRes$bestTune)
##   n= 19187 
## 
##             CP nsplit rel error    xerror       xstd
## 1  0.121832359      0 1.0000000 1.0000000 0.01282467
## 2  0.065627031      2 0.7563353 0.7686810 0.01164877
## 3  0.037470219      3 0.6907083 0.7115010 0.01130135
## 4  0.007580680      4 0.6532380 0.6560537 0.01093906
## 5  0.005956249      8 0.6229153 0.6419753 0.01084280
## 6  0.004331817     10 0.6110028 0.6343946 0.01079021
## 7  0.004223522     11 0.6066710 0.6235651 0.01071415
## 8  0.003465454     13 0.5982240 0.6166342 0.01066489
## 9  0.003248863     16 0.5869612 0.6116526 0.01062920
## 10 0.002165909     17 0.5837124 0.6086203 0.01060735
## 11 0.002000000     18 0.5815465 0.6045051 0.01057756
## 
## Variable importance
##  relationship maritalstatus   capitalgain     education    occupation 
##            24            24            12            10            10 
##           sex           age  hoursperweek   capitalloss nativecountry 
##             8             6             3             2             1 
##     workclass 
##             1 
## 
## Node number 1: 19187 observations,    complexity param=0.1218324
##   predicted class= <=50K  expected loss=0.2406317  P(node) =1
##     class counts: 14570  4617
##    probabilities: 0.759 0.241 
##   left son=2 (10478 obs) right son=3 (8709 obs)
##   Primary splits:
##       relationship  splits as  RLLLLR, improve=1391.9300, (0 missing)
##       maritalstatus splits as  LRRLLLL, improve=1375.6510, (0 missing)
##       capitalgain   < 5119   to the left,  improve= 979.0779, (0 missing)
##       education     splits as  LLLLLLLLLRRLRLRL, improve= 749.1199, (0 missing)
##       occupation    splits as  LLLLRLLLLLRLLLL, improve= 694.4727, (0 missing)
##   Surrogate splits:
##       maritalstatus splits as  LRRLLLL, agree=0.993, adj=0.984, (0 split)
##       sex           splits as  LR, agree=0.685, adj=0.307, (0 split)
##       age           < 34.5   to the left,  agree=0.647, adj=0.223, (0 split)
##       occupation    splits as  LLLRRRLLLLRRLLR, agree=0.621, adj=0.165, (0 split)
##       hoursperweek  < 43.5   to the left,  agree=0.604, adj=0.127, (0 split)
## 
## Node number 2: 10478 observations,    complexity param=0.03747022
##   predicted class= <=50K  expected loss=0.06699752  P(node) =0.5460989
##     class counts:  9776   702
##    probabilities: 0.933 0.067 
##   left son=4 (10297 obs) right son=5 (181 obs)
##   Primary splits:
##       capitalgain  < 7565.5 to the left,  improve=305.64730, (0 missing)
##       education    splits as  LLLLLLLLLRRLRLRL, improve= 99.69222, (0 missing)
##       occupation   splits as  LLLLRLLLLLRLLLL, improve= 80.12740, (0 missing)
##       hoursperweek < 44.5   to the left,  improve= 73.50345, (0 missing)
##       age          < 33.5   to the left,  improve= 45.95414, (0 missing)
## 
## Node number 3: 8709 observations,    complexity param=0.1218324
##   predicted class= <=50K  expected loss=0.449535  P(node) =0.4539011
##     class counts:  4794  3915
##    probabilities: 0.550 0.450 
##   left son=6 (6158 obs) right son=7 (2551 obs)
##   Primary splits:
##       education   splits as  LLLLLLLLLRRLRLRL, improve=529.7869, (0 missing)
##       occupation  splits as  LRRLRLLLLLRRRRL, improve=524.6148, (0 missing)
##       capitalgain < 5095.5 to the left,  improve=452.5539, (0 missing)
##       capitalloss < 1782.5 to the left,  improve=151.2895, (0 missing)
##       age         < 29.5   to the left,  improve=132.6677, (0 missing)
##   Surrogate splits:
##       occupation    splits as  LLLLRLLLLLRLLLL, agree=0.792, adj=0.290, (0 split)
##       capitalgain   < 7493   to the left,  agree=0.724, adj=0.057, (0 split)
##       nativecountry splits as  LLRLLLLLRLLLLL-RLLRRLLLRLLLLLRLLLLRRLLLLL, agree=0.715, adj=0.027, (0 split)
##       capitalloss   < 1894.5 to the left,  agree=0.712, adj=0.018, (0 split)
##       race          splits as  LRLLL, agree=0.709, adj=0.007, (0 split)
## 
## Node number 4: 10297 observations
##   predicted class= <=50K  expected loss=0.05098572  P(node) =0.5366655
##     class counts:  9772   525
##    probabilities: 0.949 0.051 
## 
## Node number 5: 181 observations
##   predicted class= >50K   expected loss=0.02209945  P(node) =0.009433471
##     class counts:     4   177
##    probabilities: 0.022 0.978 
## 
## Node number 6: 6158 observations,    complexity param=0.06562703
##   predicted class= <=50K  expected loss=0.3372848  P(node) =0.3209465
##     class counts:  4081  2077
##    probabilities: 0.663 0.337 
##   left son=12 (5847 obs) right son=13 (311 obs)
##   Primary splits:
##       capitalgain < 5095.5 to the left,  improve=276.64820, (0 missing)
##       occupation  splits as  LRLLRLLLLLRRRRL, improve=152.12600, (0 missing)
##       education   splits as  LLLLLLLRR--R-L-R, improve=110.43240, (0 missing)
##       age         < 33.5   to the left,  improve= 74.69885, (0 missing)
##       capitalloss < 1846   to the left,  improve= 71.75490, (0 missing)
## 
## Node number 7: 2551 observations,    complexity param=0.005956249
##   predicted class= >50K   expected loss=0.2794982  P(node) =0.1329546
##     class counts:   713  1838
##    probabilities: 0.279 0.721 
##   left son=14 (2151 obs) right son=15 (400 obs)
##   Primary splits:
##       capitalgain  < 5095.5 to the left,  improve=70.19274, (0 missing)
##       occupation   splits as  LLRLRLLLL-RRRRL, improve=60.75980, (0 missing)
##       hoursperweek < 31     to the left,  improve=27.87522, (0 missing)
##       capitalloss  < 1782.5 to the left,  improve=27.80474, (0 missing)
##       age          < 28.5   to the left,  improve=22.51366, (0 missing)
##   Surrogate splits:
##       nativecountry splits as  -LLLLLLLLLLR-L-LL-LLLLLL-L---LL-L-LLL-LLL, agree=0.844, adj=0.002, (0 split)
## 
## Node number 12: 5847 observations,    complexity param=0.00758068
##   predicted class= <=50K  expected loss=0.3027193  P(node) =0.3047376
##     class counts:  4077  1770
##    probabilities: 0.697 0.303 
##   left son=24 (3590 obs) right son=25 (2257 obs)
##   Primary splits:
##       occupation  splits as  LRLLRLLLLLRRRRL, improve=125.39530, (0 missing)
##       education   splits as  LLLLLLLRR--R-L-R, improve= 94.31411, (0 missing)
##       capitalloss < 1846   to the left,  improve= 84.51002, (0 missing)
##       age         < 33.5   to the left,  improve= 58.81017, (0 missing)
##       workclass   splits as  LRLLLRLLL, improve= 42.38332, (0 missing)
##   Surrogate splits:
##       education    splits as  LLLLLLLRL--L-L-R, agree=0.657, adj=0.112, (0 split)
##       workclass    splits as  LRRLLRLRL, agree=0.656, adj=0.109, (0 split)
##       sex          splits as  RL, agree=0.637, adj=0.059, (0 split)
##       relationship splits as  L----R, agree=0.637, adj=0.058, (0 split)
##       capitalloss  < 1867.5 to the left,  agree=0.617, adj=0.008, (0 split)
## 
## Node number 13: 311 observations
##   predicted class= >50K   expected loss=0.01286174  P(node) =0.01620889
##     class counts:     4   307
##    probabilities: 0.013 0.987 
## 
## Node number 14: 2151 observations,    complexity param=0.005956249
##   predicted class= >50K   expected loss=0.330079  P(node) =0.1121072
##     class counts:   710  1441
##    probabilities: 0.330 0.670 
##   left son=28 (407 obs) right son=29 (1744 obs)
##   Primary splits:
##       occupation   splits as  LLRLRLLLL-RRRRL, improve=56.62440, (0 missing)
##       capitalloss  < 1782.5 to the left,  improve=41.78535, (0 missing)
##       hoursperweek < 31     to the left,  improve=26.08142, (0 missing)
##       capitalgain  < 3120   to the right, improve=17.16168, (0 missing)
##       age          < 61.5   to the right, improve=17.04793, (0 missing)
##   Surrogate splits:
##       workclass     splits as  LRR-RRRR-, agree=0.845, adj=0.182, (0 split)
##       nativecountry splits as  -RRRRRRRRRR--R-RR-RRRRRR-L---LR-R-RRL-RLL, agree=0.815, adj=0.020, (0 split)
##       hoursperweek  < 14.5   to the left,  agree=0.814, adj=0.015, (0 split)
## 
## Node number 15: 400 observations
##   predicted class= >50K   expected loss=0.0075  P(node) =0.02084745
##     class counts:     3   397
##    probabilities: 0.007 0.992 
## 
## Node number 24: 3590 observations,    complexity param=0.003465454
##   predicted class= <=50K  expected loss=0.2206128  P(node) =0.1871059
##     class counts:  2798   792
##    probabilities: 0.779 0.221 
##   left son=48 (809 obs) right son=49 (2781 obs)
##   Primary splits:
##       education     splits as  LLLLLLLRR--R-L-R, improve=41.09426, (0 missing)
##       capitalloss   < 1782.5 to the left,  improve=37.68295, (0 missing)
##       occupation    splits as  L-LR-LLRLL----R, improve=28.21818, (0 missing)
##       age           < 30.5   to the left,  improve=21.23068, (0 missing)
##       nativecountry splits as  -RLLRLLLRLRLLL--LRLRRLRRRLLLLLRLLLRLLLRRR, improve=17.57917, (0 missing)
##   Surrogate splits:
##       nativecountry splits as  -RRRRLRRRRRRRR--LRLRRLRRRLRLRRRLRRRRRRRRR, agree=0.794, adj=0.088, (0 split)
##       occupation    splits as  R-LR-RRRRR----R, agree=0.775, adj=0.001, (0 split)
## 
## Node number 25: 2257 observations,    complexity param=0.00758068
##   predicted class= <=50K  expected loss=0.4333186  P(node) =0.1176317
##     class counts:  1279   978
##    probabilities: 0.567 0.433 
##   left son=50 (561 obs) right son=51 (1696 obs)
##   Primary splits:
##       age          < 33.5   to the left,  improve=40.23592, (0 missing)
##       capitalloss  < 1846   to the left,  improve=36.93343, (0 missing)
##       occupation   splits as  -L--R-----RLLR-, improve=20.55118, (0 missing)
##       education    splits as  LLRLLLLRR--L---R, improve=20.18098, (0 missing)
##       hoursperweek < 33.5   to the left,  improve=17.71678, (0 missing)
##   Surrogate splits:
##       nativecountry splits as  RRRLRLRRRRRRL---LRRRLRRLLRL-LRRRRRLLRRRRR, agree=0.758, adj=0.025, (0 split)
##       maritalstatus splits as  -LR----, agree=0.752, adj=0.004, (0 split)
##       race          splits as  RRRLR, agree=0.752, adj=0.004, (0 split)
## 
## Node number 28: 407 observations,    complexity param=0.004331817
##   predicted class= <=50K  expected loss=0.4324324  P(node) =0.02121228
##     class counts:   231   176
##    probabilities: 0.568 0.432 
##   left son=56 (379 obs) right son=57 (28 obs)
##   Primary splits:
##       capitalloss   < 1794   to the left,  improve=10.847490, (0 missing)
##       hoursperweek  < 32.5   to the left,  improve= 6.976672, (0 missing)
##       nativecountry splits as  -LL-L--LL-R--R----LR-R---L---RR-L-LRL-LLR, improve= 6.064786, (0 missing)
##       age           < 57.5   to the right, improve= 5.129288, (0 missing)
##       occupation    splits as  LR-R-LLRL-----L, improve= 4.227508, (0 missing)
## 
## Node number 29: 1744 observations
##   predicted class= >50K   expected loss=0.274656  P(node) =0.09089488
##     class counts:   479  1265
##    probabilities: 0.275 0.725 
## 
## Node number 48: 809 observations
##   predicted class= <=50K  expected loss=0.08034611  P(node) =0.04216397
##     class counts:   744    65
##    probabilities: 0.920 0.080 
## 
## Node number 49: 2781 observations,    complexity param=0.003465454
##   predicted class= <=50K  expected loss=0.2614168  P(node) =0.1449419
##     class counts:  2054   727
##    probabilities: 0.739 0.261 
##   left son=98 (2695 obs) right son=99 (86 obs)
##   Primary splits:
##       capitalloss   < 1782.5 to the left,  improve=32.003060, (0 missing)
##       age           < 29.5   to the left,  improve=24.700110, (0 missing)
##       occupation    splits as  R--R-LLRLL----R, improve=23.614270, (0 missing)
##       hoursperweek  < 31     to the left,  improve=17.134710, (0 missing)
##       nativecountry splits as  -RLLRLLLRLRLLL--LRLRRRRRLLL-LRRRLLRLLLRRR, improve= 9.412996, (0 missing)
## 
## Node number 50: 561 observations,    complexity param=0.002165909
##   predicted class= <=50K  expected loss=0.2691622  P(node) =0.02923855
##     class counts:   410   151
##    probabilities: 0.731 0.269 
##   left son=100 (549 obs) right son=101 (12 obs)
##   Primary splits:
##       nativecountry splits as  RRRLRL-LRRL-L---L-R-LL-RLLR-L---L-LLL-LL-, improve=10.282230, (0 missing)
##       age           < 27.5   to the left,  improve= 9.426169, (0 missing)
##       capitalloss   < 1794   to the left,  improve= 4.540000, (0 missing)
##       workclass     splits as  -RR-LRLL-, improve= 4.531931, (0 missing)
##       occupation    splits as  -L--R-----RLLR-, improve= 4.529803, (0 missing)
## 
## Node number 51: 1696 observations,    complexity param=0.00758068
##   predicted class= <=50K  expected loss=0.4876179  P(node) =0.08839318
##     class counts:   869   827
##    probabilities: 0.512 0.488 
##   left son=102 (1607 obs) right son=103 (89 obs)
##   Primary splits:
##       capitalloss  < 1846   to the left,  improve=30.06068, (0 missing)
##       education    splits as  LLLLLLLRR--L---R, improve=21.06198, (0 missing)
##       age          < 61.5   to the right, improve=20.21799, (0 missing)
##       hoursperweek < 33.5   to the left,  improve=19.16285, (0 missing)
##       occupation   splits as  -L--R-----RLLR-, improve=13.03722, (0 missing)
## 
## Node number 56: 379 observations
##   predicted class= <=50K  expected loss=0.4010554  P(node) =0.01975296
##     class counts:   227   152
##    probabilities: 0.599 0.401 
## 
## Node number 57: 28 observations
##   predicted class= >50K   expected loss=0.1428571  P(node) =0.001459321
##     class counts:     4    24
##    probabilities: 0.143 0.857 
## 
## Node number 98: 2695 observations
##   predicted class= <=50K  expected loss=0.2478664  P(node) =0.1404597
##     class counts:  2027   668
##    probabilities: 0.752 0.248 
## 
## Node number 99: 86 observations,    complexity param=0.003465454
##   predicted class= >50K   expected loss=0.3139535  P(node) =0.004482201
##     class counts:    27    59
##    probabilities: 0.314 0.686 
##   left son=198 (28 obs) right son=199 (58 obs)
##   Primary splits:
##       capitalloss  < 1989.5 to the right, improve=24.499710, (0 missing)
##       hoursperweek < 46.5   to the left,  improve= 2.651758, (0 missing)
##       workclass    splits as  RRL-RRR--, improve= 2.442533, (0 missing)
##       education    splits as  -------RR--L---R, improve= 1.405456, (0 missing)
##       age          < 60     to the right, improve= 1.173496, (0 missing)
##   Surrogate splits:
##       hoursperweek < 27.5   to the left,  agree=0.721, adj=0.143, (0 split)
##       age          < 60     to the right, agree=0.709, adj=0.107, (0 split)
##       workclass    splits as  LRL-RRR--, agree=0.698, adj=0.071, (0 split)
##       occupation   splits as  L--R-LRRR-----R, agree=0.698, adj=0.071, (0 split)
## 
## Node number 100: 549 observations
##   predicted class= <=50K  expected loss=0.2550091  P(node) =0.02861312
##     class counts:   409   140
##    probabilities: 0.745 0.255 
## 
## Node number 101: 12 observations
##   predicted class= >50K   expected loss=0.08333333  P(node) =0.0006254235
##     class counts:     1    11
##    probabilities: 0.083 0.917 
## 
## Node number 102: 1607 observations,    complexity param=0.00758068
##   predicted class= <=50K  expected loss=0.4654636  P(node) =0.08375463
##     class counts:   859   748
##    probabilities: 0.535 0.465 
##   left son=204 (758 obs) right son=205 (849 obs)
##   Primary splits:
##       education     splits as  LLLLLLLRR--L---R, improve=20.98485, (0 missing)
##       age           < 61.5   to the right, improve=19.96612, (0 missing)
##       hoursperweek  < 33.5   to the left,  improve=18.93180, (0 missing)
##       nativecountry splits as  LRLLLLLLRRRL-----LLR-RL--L---LLRLLL-RLLLR, improve=11.64503, (0 missing)
##       occupation    splits as  -L--R-----RLLR-, improve=11.62255, (0 missing)
##   Surrogate splits:
##       age           < 52.5   to the right, agree=0.582, adj=0.115, (0 split)
##       occupation    splits as  -L--R-----RLLR-, agree=0.569, adj=0.086, (0 split)
##       hoursperweek  < 33.5   to the left,  agree=0.554, adj=0.055, (0 split)
##       workclass     splits as  -RR-RRLLL, agree=0.546, adj=0.038, (0 split)
##       nativecountry splits as  LRLRLRLLLRRL-----LRL-LR--L---RRLRLL-RRRLR, agree=0.544, adj=0.034, (0 split)
## 
## Node number 103: 89 observations
##   predicted class= >50K   expected loss=0.1123596  P(node) =0.004638557
##     class counts:    10    79
##    probabilities: 0.112 0.888 
## 
## Node number 198: 28 observations
##   predicted class= <=50K  expected loss=0.1428571  P(node) =0.001459321
##     class counts:    24     4
##    probabilities: 0.857 0.143 
## 
## Node number 199: 58 observations
##   predicted class= >50K   expected loss=0.05172414  P(node) =0.00302288
##     class counts:     3    55
##    probabilities: 0.052 0.948 
## 
## Node number 204: 758 observations
##   predicted class= <=50K  expected loss=0.3799472  P(node) =0.03950592
##     class counts:   470   288
##    probabilities: 0.620 0.380 
## 
## Node number 205: 849 observations,    complexity param=0.004223522
##   predicted class= >50K   expected loss=0.4581861  P(node) =0.04424871
##     class counts:   389   460
##    probabilities: 0.458 0.542 
##   left son=410 (15 obs) right son=411 (834 obs)
##   Primary splits:
##       capitalloss   < 1512   to the right, improve=8.965266, (0 missing)
##       age           < 63.5   to the right, improve=8.663766, (0 missing)
##       hoursperweek  < 33.5   to the left,  improve=8.001880, (0 missing)
##       workclass     splits as  -RR-RRLL-, improve=5.670179, (0 missing)
##       nativecountry splits as  -R-LRL-R-RR-------L--RL--L---LL-L---RLL-R, improve=5.106393, (0 missing)
## 
## Node number 410: 15 observations
##   predicted class= <=50K  expected loss=0  P(node) =0.0007817793
##     class counts:    15     0
##    probabilities: 1.000 0.000 
## 
## Node number 411: 834 observations,    complexity param=0.004223522
##   predicted class= >50K   expected loss=0.4484412  P(node) =0.04346693
##     class counts:   374   460
##    probabilities: 0.448 0.552 
##   left son=822 (60 obs) right son=823 (774 obs)
##   Primary splits:
##       hoursperweek  < 33.5   to the left,  improve=8.182485, (0 missing)
##       age           < 63.5   to the right, improve=7.641514, (0 missing)
##       workclass     splits as  -RR-RRLL-, improve=5.132825, (0 missing)
##       nativecountry splits as  -R-LRL-R-RR-------L--RL--L---LL-L---RLL-R, improve=4.859651, (0 missing)
##       capitalgain   < 3120   to the right, improve=4.278247, (0 missing)
##   Surrogate splits:
##       nativecountry splits as  -R-RLR-R-RR-------R--RR--L---RR-R---RRR-R, agree=0.932, adj=0.05, (0 split)
## 
## Node number 822: 60 observations
##   predicted class= <=50K  expected loss=0.3  P(node) =0.003127117
##     class counts:    42    18
##    probabilities: 0.700 0.300 
## 
## Node number 823: 774 observations,    complexity param=0.003248863
##   predicted class= >50K   expected loss=0.4289406  P(node) =0.04033981
##     class counts:   332   442
##    probabilities: 0.429 0.571 
##   left son=1646 (101 obs) right son=1647 (673 obs)
##   Primary splits:
##       workclass     splits as  -RR-RRLL-, improve=4.905793, (0 missing)
##       age           < 63.5   to the right, improve=4.901891, (0 missing)
##       nativecountry splits as  -R-L-L-R-RR-------L--RL--L---LL-L---RLL-R, improve=3.644741, (0 missing)
##       capitalgain   < 3120   to the right, improve=3.372428, (0 missing)
##       race          splits as  LRRRR, improve=3.216752, (0 missing)
##   Surrogate splits:
##       capitalgain   < 4699.5 to the right, agree=0.871, adj=0.01, (0 split)
##       hoursperweek  < 91.5   to the right, agree=0.871, adj=0.01, (0 split)
##       nativecountry splits as  -R-R-R-R-RR-------R--RR--R---RR-R---RLR-R, agree=0.871, adj=0.01, (0 split)
## 
## Node number 1646: 101 observations
##   predicted class= <=50K  expected loss=0.4257426  P(node) =0.005263981
##     class counts:    58    43
##    probabilities: 0.574 0.426 
## 
## Node number 1647: 673 observations
##   predicted class= >50K   expected loss=0.4071322  P(node) =0.03507583
##     class counts:   274   399
##    probabilities: 0.407 0.593
## n= 19187 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##    1) root 19187 4617  <=50K (0.75936832 0.24063168)  
##      2) relationship= Not-in-family, Other-relative, Own-child, Unmarried 10478  702  <=50K (0.93300248 0.06699752)  
##        4) capitalgain< 7565.5 10297  525  <=50K (0.94901428 0.05098572) *
##        5) capitalgain>=7565.5 181    4  >50K (0.02209945 0.97790055) *
##      3) relationship= Husband, Wife 8709 3915  <=50K (0.55046504 0.44953496)  
##        6) education= 10th, 11th, 12th, 1st-4th, 5th-6th, 7th-8th, 9th, Assoc-acdm, Assoc-voc, HS-grad, Preschool, Some-college 6158 2077  <=50K (0.66271517 0.33728483)  
##         12) capitalgain< 5095.5 5847 1770  <=50K (0.69728066 0.30271934)  
##           24) occupation= ?, Armed-Forces, Craft-repair, Farming-fishing, Handlers-cleaners, Machine-op-inspct, Other-service, Priv-house-serv, Transport-moving 3590  792  <=50K (0.77938719 0.22061281)  
##             48) education= 10th, 11th, 12th, 1st-4th, 5th-6th, 7th-8th, 9th, Preschool 809   65  <=50K (0.91965389 0.08034611) *
##             49) education= Assoc-acdm, Assoc-voc, HS-grad, Some-college 2781  727  <=50K (0.73858324 0.26141676)  
##               98) capitalloss< 1782.5 2695  668  <=50K (0.75213358 0.24786642) *
##               99) capitalloss>=1782.5 86   27  >50K (0.31395349 0.68604651)  
##                198) capitalloss>=1989.5 28    4  <=50K (0.85714286 0.14285714) *
##                199) capitalloss< 1989.5 58    3  >50K (0.05172414 0.94827586) *
##           25) occupation= Adm-clerical, Exec-managerial, Prof-specialty, Protective-serv, Sales, Tech-support 2257  978  <=50K (0.56668144 0.43331856)  
##             50) age< 33.5 561  151  <=50K (0.73083779 0.26916221)  
##              100) nativecountry= Columbia, Dominican-Republic, El-Salvador, Germany, Guatemala, Hong, Ireland, Italy, Laos, Mexico, Peru, Puerto-Rico, South, Taiwan, Thailand, United-States, Vietnam 549  140  <=50K (0.74499089 0.25500911) *
##              101) nativecountry= Cambodia, Canada, China, Cuba, England, France, India, Japan, Nicaragua 12    1  >50K (0.08333333 0.91666667) *
##             51) age>=33.5 1696  827  <=50K (0.51238208 0.48761792)  
##              102) capitalloss< 1846 1607  748  <=50K (0.53453640 0.46546360)  
##                204) education= 10th, 11th, 12th, 1st-4th, 5th-6th, 7th-8th, 9th, HS-grad 758  288  <=50K (0.62005277 0.37994723) *
##                205) education= Assoc-acdm, Assoc-voc, Some-college 849  389  >50K (0.45818610 0.54181390)  
##                  410) capitalloss>=1512 15    0  <=50K (1.00000000 0.00000000) *
##                  411) capitalloss< 1512 834  374  >50K (0.44844125 0.55155875)  
##                    822) hoursperweek< 33.5 60   18  <=50K (0.70000000 0.30000000) *
##                    823) hoursperweek>=33.5 774  332  >50K (0.42894057 0.57105943)  
##                     1646) workclass= Self-emp-not-inc, State-gov 101   43  <=50K (0.57425743 0.42574257) *
##                     1647) workclass= Federal-gov, Local-gov, Private, Self-emp-inc 673  274  >50K (0.40713224 0.59286776) *
##              103) capitalloss>=1846 89   10  >50K (0.11235955 0.88764045) *
##         13) capitalgain>=5095.5 311    4  >50K (0.01286174 0.98713826) *
##        7) education= Bachelors, Doctorate, Masters, Prof-school 2551  713  >50K (0.27949824 0.72050176)  
##         14) capitalgain< 5095.5 2151  710  >50K (0.33007903 0.66992097)  
##           28) occupation= ?, Adm-clerical, Craft-repair, Farming-fishing, Handlers-cleaners, Machine-op-inspct, Other-service, Transport-moving 407  176  <=50K (0.56756757 0.43243243)  
##             56) capitalloss< 1794 379  152  <=50K (0.59894459 0.40105541) *
##             57) capitalloss>=1794 28    4  >50K (0.14285714 0.85714286) *
##           29) occupation= Armed-Forces, Exec-managerial, Prof-specialty, Protective-serv, Sales, Tech-support 1744  479  >50K (0.27465596 0.72534404) *
##         15) capitalgain>=5095.5 400    3  >50K (0.00750000 0.99250000) *
prp(CensusCV)