Data Extraction

library(ggplot2)
#Data Extraction
data<-read.csv("/home/heru/Desktop/HR_comma_sep.csv", header=TRUE, sep=",")
alldata<-data
str(alldata)
## 'data.frame':    14999 obs. of  10 variables:
##  $ satisfaction_level   : num  0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : num  0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
##  $ number_project       : int  2 5 7 5 2 2 6 5 5 2 ...
##  $ average_montly_hours : int  157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : int  3 6 4 5 3 3 4 5 5 3 ...
##  $ Work_accident        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ left                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ promotion_last_5years: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sales                : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ salary               : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...
head(alldata)
##   satisfaction_level last_evaluation number_project average_montly_hours
## 1               0.38            0.53              2                  157
## 2               0.80            0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.50              2                  153
##   time_spend_company Work_accident left promotion_last_5years sales salary
## 1                  3             0    1                     0 sales    low
## 2                  6             0    1                     0 sales medium
## 3                  4             0    1                     0 sales medium
## 4                  5             0    1                     0 sales    low
## 5                  3             0    1                     0 sales    low
## 6                  3             0    1                     0 sales    low
dim(alldata)
## [1] 14999    10
alldata1<-alldata

Code: Convert some ints to factors

alldata$last_evaluation <- as.factor(alldata$last_evaluation)
alldata$number_project <- as.factor(alldata$number_project)
alldata$time_spend_company <- as.factor(alldata$time_spend_company)
alldata$Work_accident <- as.factor(alldata$Work_accident)
alldata$left <- as.factor(alldata$left)
alldata$promotion_last_5years <- as.factor(alldata$promotion_last_5years)
alldata$sales <- as.factor(alldata$sales)
alldata$salary <- as.factor(alldata$salary)
str(alldata)
## 'data.frame':    14999 obs. of  10 variables:
##  $ satisfaction_level   : num  0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : Factor w/ 65 levels "0.36","0.37",..: 18 51 53 52 17 15 42 50 65 18 ...
##  $ number_project       : Factor w/ 6 levels "2","3","4","5",..: 1 4 6 4 1 1 5 4 4 1 ...
##  $ average_montly_hours : int  157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : Factor w/ 8 levels "2","3","4","5",..: 2 5 3 4 2 2 3 4 4 2 ...
##  $ Work_accident        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ left                 : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ promotion_last_5years: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sales                : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ salary               : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...

Split Train and Testing data

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] 4500   10
## [1] 4500   10

Part 1 : Descriptive Analysis

Code: Hypothesis: Two Variables Left Analysis

Left Vs Salary

Left Vs Salary (as percentage)

ggplot(train, aes(x = salary)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels=percent_format())

Left Vs Department

ggplot(train, aes(x = sales)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)

Left Vs Work Accident

ggplot(train, aes(x = Work_accident)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)

Left Vs Promotion last 5 years

ggplot(train, aes(x = promotion_last_5years)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)

Left Vs Time Spend Company

ggplot(train, aes(x = time_spend_company)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)

Left Vs Number Project

ggplot(train, aes(x = number_project)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels=percent_format())

Code: Hypothesis: Three Variables Left Analysis

Three Variables Left Vs (salary, promotion)

d<-ggplot(train, aes(x = promotion_last_5years)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
d + facet_wrap(~ salary)

#### Three Variables Left Vs (time-spend, number of project)

f<-ggplot(train, aes(x = time_spend_company)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
f + facet_wrap(~ number_project)

Part 2 : Predictive Analytics and Validity

Using Decision Tree (classic C4.50)

library(rpart)
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
decTree <- rpart(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company  + Work_accident + promotion_last_5years + sales + salary,data=train)
print(decTree)
## n= 10499 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##   1) root 10499 2522 0 (0.75978665 0.24021335)  
##     2) number_project=3,4,5 7815  778 0 (0.90044786 0.09955214)  
##       4) time_spend_company=2,3,4,7,8,10 6579  126 0 (0.98084815 0.01915185)  
##         8) satisfaction_level>=0.115 6552   99 0 (0.98489011 0.01510989) *
##         9) satisfaction_level< 0.115 27    0 1 (0.00000000 1.00000000) *
##       5) time_spend_company=5,6 1236  584 1 (0.47249191 0.52750809)  
##        10) satisfaction_level< 0.715 452   25 0 (0.94469027 0.05530973) *
##        11) satisfaction_level>=0.715 784  157 1 (0.20025510 0.79974490)  
##          22) last_evaluation=0.38,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.51,0.52,0.54,0.55,0.56,0.58,0.59,0.6,0.61,0.62,0.64,0.66,0.67,0.68,0.69,0.72,0.73,0.74,0.75,0.76,0.77,0.79,0.8 107    3 0 (0.97196262 0.02803738) *
##          23) last_evaluation=0.53,0.63,0.65,0.71,0.78,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1 677   53 1 (0.07828656 0.92171344)  
##            46) average_montly_hours< 213 37    5 0 (0.86486486 0.13513514) *
##            47) average_montly_hours>=213 640   21 1 (0.03281250 0.96718750) *
##     3) number_project=2,6,7 2684  940 1 (0.35022355 0.64977645)  
##       6) satisfaction_level>=0.465 620   40 0 (0.93548387 0.06451613) *
##       7) satisfaction_level< 0.465 2064  360 1 (0.17441860 0.82558140)  
##        14) last_evaluation=0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.99,1 197   15 0 (0.92385787 0.07614213) *
##        15) last_evaluation=0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.77,0.78,0.79,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98 1867  178 1 (0.09534012 0.90465988)  
##          30) time_spend_company=2,6,7,8 63    4 0 (0.93650794 0.06349206) *
##          31) time_spend_company=3,4,5 1804  119 1 (0.06596452 0.93403548)  
##            62) average_montly_hours< 125.5 14    0 0 (1.00000000 0.00000000) *
##            63) average_montly_hours>=125.5 1790  105 1 (0.05865922 0.94134078)  
##             126) average_montly_hours>=162.5 699   84 1 (0.12017167 0.87982833)  
##               252) satisfaction_level>=0.115 96   12 0 (0.87500000 0.12500000) *
##               253) satisfaction_level< 0.115 603    0 1 (0.00000000 1.00000000) *
##             127) average_montly_hours< 162.5 1091   21 1 (0.01924840 0.98075160) *
fancyRpartPlot(decTree)

Using Random Forest

library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
## Loading required package: lattice
randomForestModel <- randomForest(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company  + Work_accident + promotion_last_5years + sales + salary, data=train1,ntree=100,mtry=5, importance=TRUE)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
print(randomForestModel)
## 
## Call:
##  randomForest(formula = left ~ satisfaction_level + last_evaluation +      number_project + average_montly_hours + time_spend_company +      Work_accident + promotion_last_5years + sales + salary, data = train1,      ntree = 100, mtry = 5, importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 100
## No. of variables tried at each split: 5
## 
##           Mean of squared residuals: 0.0120259
##                     % Var explained: 93.35
varImpPlot(randomForestModel,type=2)

Using C.50

# Code : C50
alldataC50 <- alldata1[ sample( nrow(alldata) ), ]
C50x <- alldataC50[,-c(7)] # independent
C50y <- alldataC50[,c(7)] # dependent

## Code: Create train and test sets for x and y
C50xtrain <- C50x[1:13500, ]
C50xtest <- C50x[13501:14999, ]
C50ytrain <- C50y[1:13500]
C50ytest <- C50y[13501:14999]

##Code: Install and load C50
library(C50)
C50ytrain<-as.factor(C50ytrain)
c50model <- C50::C5.0( C50xtrain, C50ytrain )
summary( c50model )
## 
## Call:
## C5.0.default(x = C50xtrain, y = C50ytrain)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Jan 10 09:07:11 2017
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 13500 cases (10 attributes) from undefined.data
## 
## Decision tree:
## 
## average_montly_hours > 287: 1 (292)
## average_montly_hours <= 287:
## :...satisfaction_level <= 0.46:
##     :...number_project > 2:
##     :   :...satisfaction_level <= 0.11: 1 (533)
##     :   :   satisfaction_level > 0.11:
##     :   :   :...last_evaluation <= 0.98:
##     :   :       :...number_project <= 5: 0 (1021/45)
##     :   :       :   number_project > 5:
##     :   :       :   :...number_project <= 6: 0 (281/26)
##     :   :       :       number_project > 6: 1 (14)
##     :   :       last_evaluation > 0.98:
##     :   :       :...time_spend_company > 4: 0 (11)
##     :   :           time_spend_company <= 4:
##     :   :           :...number_project > 4: 0 (11/1)
##     :   :               number_project <= 4:
##     :   :               :...average_montly_hours <= 188: 0 (3)
##     :   :                   average_montly_hours > 188: 1 (12/1)
##     :   number_project <= 2:
##     :   :...last_evaluation > 0.57: 0 (118/6)
##     :       last_evaluation <= 0.57:
##     :       :...last_evaluation <= 0.44: 0 (34)
##     :           last_evaluation > 0.44:
##     :           :...average_montly_hours > 162:
##     :               :...satisfaction_level <= 0.12: 1 (3)
##     :               :   satisfaction_level > 0.12: 0 (22/2)
##     :               average_montly_hours <= 162:
##     :               :...average_montly_hours <= 127:
##     :                   :...average_montly_hours <= 125: 0 (15)
##     :                   :   average_montly_hours > 125: 1 (62)
##     :                   average_montly_hours > 127:
##     :                   :...satisfaction_level > 0.36: 1 (1222/4)
##     :                       satisfaction_level <= 0.36:
##     :                       :...satisfaction_level <= 0.31: 0 (8/1)
##     :                           satisfaction_level > 0.31: 1 (85/2)
##     satisfaction_level > 0.46:
##     :...time_spend_company <= 4: 0 (7974/108)
##         time_spend_company > 4:
##         :...last_evaluation <= 0.8: 0 (674/28)
##             last_evaluation > 0.8:
##             :...time_spend_company > 6: 0 (147)
##                 time_spend_company <= 6:
##                 :...average_montly_hours <= 214:
##                     :...number_project > 3: 0 (60/1)
##                     :   number_project <= 3:
##                     :   :...time_spend_company > 5: 0 (15)
##                     :       time_spend_company <= 5:
##                     :       :...average_montly_hours <= 152: 0 (11)
##                     :           average_montly_hours > 152: 1 (15/3)
##                     average_montly_hours > 214:
##                     :...number_project <= 3:
##                         :...average_montly_hours <= 221: 1 (3)
##                         :   average_montly_hours > 221: 0 (33/3)
##                         number_project > 3:
##                         :...satisfaction_level <= 0.72:
##                             :...satisfaction_level <= 0.71: 0 (21/4)
##                             :   satisfaction_level > 0.71: 1 (23)
##                             satisfaction_level > 0.72:
##                             :...satisfaction_level <= 0.91: 1 (752/16)
##                                 satisfaction_level > 0.91:
##                                 :...satisfaction_level <= 0.92: 1 (20/1)
##                                     satisfaction_level > 0.92: 0 (5)
## 
## 
## Evaluation on training data (13500 cases):
## 
##      Decision Tree   
##    ----------------  
##    Size      Errors  
## 
##      32  252( 1.9%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##   10239    27    (a): class 0
##     225  3009    (b): class 1
## 
## 
##  Attribute usage:
## 
##  100.00% average_montly_hours
##   97.84% satisfaction_level
##   72.52% time_spend_company
##   34.82% last_evaluation
##   32.69% number_project
## 
## 
## Time: 0.1 secs
## Code: Try Boosting with 10 trials
c50model <- C50::C5.0( C50xtrain, C50ytrain, trials=10 )
summary( c50model )
## 
## Call:
## C5.0.default(x = C50xtrain, y = C50ytrain, trials = 10)
## 
## 
## C5.0 [Release 2.07 GPL Edition]      Tue Jan 10 09:07:11 2017
## -------------------------------
## 
## Class specified by attribute `outcome'
## 
## Read 13500 cases (10 attributes) from undefined.data
## 
## -----  Trial 0:  -----
## 
## Decision tree:
## 
## average_montly_hours > 287: 1 (292)
## average_montly_hours <= 287:
## :...satisfaction_level <= 0.46:
##     :...number_project > 2:
##     :   :...satisfaction_level <= 0.11: 1 (533)
##     :   :   satisfaction_level > 0.11:
##     :   :   :...last_evaluation <= 0.98:
##     :   :       :...number_project <= 5: 0 (1021/45)
##     :   :       :   number_project > 5:
##     :   :       :   :...number_project <= 6: 0 (281/26)
##     :   :       :       number_project > 6: 1 (14)
##     :   :       last_evaluation > 0.98:
##     :   :       :...time_spend_company > 4: 0 (11)
##     :   :           time_spend_company <= 4:
##     :   :           :...number_project > 4: 0 (11/1)
##     :   :               number_project <= 4:
##     :   :               :...average_montly_hours <= 188: 0 (3)
##     :   :                   average_montly_hours > 188: 1 (12/1)
##     :   number_project <= 2:
##     :   :...last_evaluation > 0.57: 0 (118/6)
##     :       last_evaluation <= 0.57:
##     :       :...last_evaluation <= 0.44: 0 (34)
##     :           last_evaluation > 0.44:
##     :           :...average_montly_hours > 162:
##     :               :...satisfaction_level <= 0.12: 1 (3)
##     :               :   satisfaction_level > 0.12: 0 (22/2)
##     :               average_montly_hours <= 162:
##     :               :...average_montly_hours <= 127:
##     :                   :...average_montly_hours <= 125: 0 (15)
##     :                   :   average_montly_hours > 125: 1 (62)
##     :                   average_montly_hours > 127:
##     :                   :...satisfaction_level > 0.36: 1 (1222/4)
##     :                       satisfaction_level <= 0.36:
##     :                       :...satisfaction_level <= 0.31: 0 (8/1)
##     :                           satisfaction_level > 0.31: 1 (85/2)
##     satisfaction_level > 0.46:
##     :...time_spend_company <= 4: 0 (7974/108)
##         time_spend_company > 4:
##         :...last_evaluation <= 0.8: 0 (674/28)
##             last_evaluation > 0.8:
##             :...time_spend_company > 6: 0 (147)
##                 time_spend_company <= 6:
##                 :...average_montly_hours <= 214:
##                     :...number_project > 3: 0 (60/1)
##                     :   number_project <= 3:
##                     :   :...time_spend_company > 5: 0 (15)
##                     :       time_spend_company <= 5:
##                     :       :...average_montly_hours <= 152: 0 (11)
##                     :           average_montly_hours > 152: 1 (15/3)
##                     average_montly_hours > 214:
##                     :...number_project <= 3:
##                         :...average_montly_hours <= 221: 1 (3)
##                         :   average_montly_hours > 221: 0 (33/3)
##                         number_project > 3:
##                         :...satisfaction_level <= 0.72:
##                             :...satisfaction_level <= 0.71: 0 (21/4)
##                             :   satisfaction_level > 0.71: 1 (23)
##                             satisfaction_level > 0.72:
##                             :...satisfaction_level <= 0.91: 1 (752/16)
##                                 satisfaction_level > 0.91:
##                                 :...satisfaction_level <= 0.92: 1 (20/1)
##                                     satisfaction_level > 0.92: 0 (5)
## 
## -----  Trial 1:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.48:
## :...average_montly_hours <= 125: 0 (136.6)
## :   average_montly_hours > 125:
## :   :...last_evaluation <= 0.44: 0 (107.2)
## :       last_evaluation > 0.44:
## :       :...time_spend_company > 5: 0 (246.5/57.8)
## :           time_spend_company <= 5:
## :           :...salary = high: 0 (197.3/68.5)
## :               salary in {low,medium}:
## :               :...satisfaction_level <= 0.11: 1 (611.7)
## :                   satisfaction_level > 0.11:
## :                   :...satisfaction_level > 0.3:
## :                       :...average_montly_hours <= 160: 1 (1384.2/113.1)
## :                       :   average_montly_hours > 160:
## :                       :   :...sales in {accounting,management,marketing,
## :                       :       :         product_mng,sales,support,
## :                       :       :         technical}: 1 (682/172.8)
## :                       :       sales in {hr,IT,RandD}: 0 (52.8/4.5)
## :                       satisfaction_level <= 0.3:
## :                       :...last_evaluation > 0.9: 0 (90.1/5.3)
## :                           last_evaluation <= 0.9:
## :                           :...sales in {management,marketing,
## :                               :         product_mng}: 0 (50.6/1.5)
## :                               sales in {accounting,hr,IT,RandD,sales,support,
## :                               :         technical}:
## :                               :...last_evaluation <= 0.46: 1 (90.9/5.3)
## :                                   last_evaluation > 0.46:
## :                                   :...satisfaction_level <= 0.17: 1 (365.9/100.4)
## :                                       satisfaction_level > 0.17: 0 (234.4/62.4)
## satisfaction_level > 0.48:
## :...time_spend_company <= 3:
##     :...number_project <= 5: 0 (5535.2/642.1)
##     :   number_project > 5: 1 (194/54.3)
##     time_spend_company > 3:
##     :...time_spend_company > 6: 0 (309.4)
##         time_spend_company <= 6:
##         :...satisfaction_level > 0.92: 0 (189.4)
##             satisfaction_level <= 0.92:
##             :...last_evaluation <= 0.45: 0 (73.2)
##                 last_evaluation > 0.45:
##                 :...promotion_last_5years > 0: 0 (25.7/0.8)
##                     promotion_last_5years <= 0:
##                     :...average_montly_hours <= 149: 0 (211.9/27.8)
##                         average_montly_hours > 149:
##                         :...last_evaluation > 0.99: 1 (144/10.6)
##                             last_evaluation <= 0.99:
##                             :...Work_accident > 0: 0 (217.4/64.3)
##                                 Work_accident <= 0:
##                                 :...time_spend_company > 4:
##                                     :...number_project <= 5: 1 (1188.4/301.4)
##                                     :   number_project > 5: 0 (134.5/42.4)
##                                     time_spend_company <= 4:
##                                     :...salary = high: 0 (53.6)
##                                         salary in {low,medium}:
##                                         :...last_evaluation > 0.95: 0 (28.7)
##                                             last_evaluation <= 0.95: [S1]
## 
## SubTree [S1]
## 
## sales in {accounting,IT,management,marketing,product_mng,RandD,sales,
## :         support}: 0 (511.9/180.6)
## sales in {hr,technical}: 1 (432.3/96.6)
## 
## -----  Trial 2:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (524.3)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (761.9)
##     satisfaction_level <= 0.92:
##     :...time_spend_company > 6: 0 (260.6)
##         time_spend_company <= 6:
##         :...last_evaluation <= 0.44: 0 (228.1)
##             last_evaluation > 0.44:
##             :...number_project <= 2:
##                 :...satisfaction_level <= 0.35: 0 (139.6/12.8)
##                 :   satisfaction_level > 0.35:
##                 :   :...Work_accident > 0: 0 (122.9/38.9)
##                 :       Work_accident <= 0:
##                 :       :...last_evaluation <= 0.57:
##                 :           :...satisfaction_level > 0.46: 1 (180.4/83.7)
##                 :           :   satisfaction_level <= 0.46:
##                 :           :   :...average_montly_hours <= 166: 1 (946.8/59.1)
##                 :           :       average_montly_hours > 166: 0 (33.7/11)
##                 :           last_evaluation > 0.57:
##                 :           :...salary = high: 0 (22.2)
##                 :               salary in {low,medium}:
##                 :               :...sales in {accounting,hr,management,
##                 :                   :         product_mng}: 0 (62.1/3.6)
##                 :                   sales in {IT,marketing,RandD,sales,support,
##                 :                             technical}: 1 (529.8/205)
##                 number_project > 2:
##                 :...time_spend_company <= 3:
##                     :...satisfaction_level > 0.36: 0 (4446/698.8)
##                     :   satisfaction_level <= 0.36:
##                     :   :...Work_accident > 0: 0 (52.5)
##                     :       Work_accident <= 0:
##                     :       :...number_project <= 3: 0 (101.4/8.9)
##                     :           number_project > 3: 1 (538.2/225)
##                     time_spend_company > 3:
##                     :...average_montly_hours <= 130: 0 (106)
##                         average_montly_hours > 130:
##                         :...average_montly_hours <= 139: 1 (254/62.2)
##                             average_montly_hours > 139:
##                             :...number_project > 6: 1 (54.6)
##                                 number_project <= 6:
##                                 :...average_montly_hours <= 148: 0 (102.8)
##                                     average_montly_hours > 148:
##                                     :...satisfaction_level <= 0.24:
##                                         :...average_montly_hours <= 276: 0 (513.7/94.2)
##                                         :   average_montly_hours > 276: 1 (91/32.2)
##                                         satisfaction_level > 0.24:
##                                         :...number_project > 4: [S1]
##                                             number_project <= 4:
##                                             :...average_montly_hours > 244: [S2]
##                                                 average_montly_hours <= 244:
##                                                 :...salary = high: 0 (55)
##                                                     salary in {low,medium}: [S3]
## 
## SubTree [S1]
## 
## average_montly_hours <= 162: 1 (213.3/33.5)
## average_montly_hours > 162:
## :...average_montly_hours <= 194: 0 (169.2/27.1)
##     average_montly_hours > 194:
##     :...average_montly_hours > 271: 0 (79.7/18)
##         average_montly_hours <= 271:
##         :...sales in {accounting,IT,marketing,product_mng,RandD,
##             :         support}: 1 (435.1/78)
##             sales in {hr,management,sales,technical}: 0 (550.3/267.2)
## 
## SubTree [S2]
## 
## average_montly_hours > 281: 0 (35.2/3)
## average_montly_hours <= 281:
## :...time_spend_company <= 4: 0 (243.9/77.2)
##     time_spend_company > 4: 1 (423.5/138)
## 
## SubTree [S3]
## 
## sales in {IT,marketing,RandD,sales}: 0 (351.6/64.2)
## sales in {accounting,hr,management,product_mng,support,technical}:
## :...average_montly_hours > 235: 0 (80.9/9.6)
##     average_montly_hours <= 235:
##     :...average_montly_hours > 232: 1 (38.8/2.4)
##         average_montly_hours <= 232:
##         :...sales = support: 0 (113.8/18.8)
##             sales in {accounting,hr,management,product_mng,technical}:
##             :...satisfaction_level <= 0.44: 1 (108.8/31.6)
##                 satisfaction_level > 0.44: 0 (528.4/212.9)
## 
## -----  Trial 3:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (423.2)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (615.1)
##     satisfaction_level <= 0.92:
##     :...average_montly_hours <= 125: 0 (305.5)
##         average_montly_hours > 125:
##         :...time_spend_company > 6: 0 (199.3)
##             time_spend_company <= 6:
##             :...last_evaluation <= 0.44: 0 (157.1)
##                 last_evaluation > 0.44:
##                 :...time_spend_company > 4:
##                     :...number_project <= 3: 0 (696.2/183.7)
##                     :   number_project > 3:
##                     :   :...satisfaction_level > 0.71:
##                     :       :...last_evaluation <= 0.62: 0 (62.2)
##                     :       :   last_evaluation > 0.62:
##                     :       :   :...number_project <= 5: 1 (1329/195.6)
##                     :       :       number_project > 5: 0 (63.7/4.6)
##                     :       satisfaction_level <= 0.71:
##                     :       :...average_montly_hours <= 139: 1 (156.2/36.3)
##                     :           average_montly_hours > 139:
##                     :           :...last_evaluation <= 0.54: 0 (83.8/1.5)
##                     :               last_evaluation > 0.54:
##                     :               :...last_evaluation <= 0.58: 1 (115.8/34.8)
##                     :                   last_evaluation > 0.58: 0 (672/226.9)
##                     time_spend_company <= 4:
##                     :...number_project <= 2:
##                         :...satisfaction_level > 0.46:
##                         :   :...average_montly_hours <= 273: 0 (749.3/191.7)
##                         :   :   average_montly_hours > 273: 1 (86.4/16.8)
##                         :   satisfaction_level <= 0.46:
##                         :   :...satisfaction_level <= 0.35: 0 (97.1/12.6)
##                         :       satisfaction_level > 0.35:
##                         :       :...average_montly_hours <= 162: 1 (1000.9/115.3)
##                         :           average_montly_hours > 162: 0 (94.7/21.5)
##                         number_project > 2:
##                         :...number_project > 6: 1 (53.8)
##                             number_project <= 6:
##                             :...average_montly_hours > 290: 1 (45.6)
##                                 average_montly_hours <= 290:
##                                 :...satisfaction_level > 0.9: 0 (156.9)
##                                     satisfaction_level <= 0.9:
##                                     :...sales in {accounting,IT,marketing,
##                                         :         product_mng,
##                                         :         RandD}: 0 (1593.9/243.1)
##                                         sales in {hr,management,sales,support,
##                                         :         technical}:
##                                         :...last_evaluation <= 0.49: 0 (211/53.2)
##                                             last_evaluation > 0.49:
##                                             :...Work_accident > 0: 0 (572.1/106.8)
##                                                 Work_accident <= 0: [S1]
## 
## SubTree [S1]
## 
## promotion_last_5years > 0: 1 (94.2/41.1)
## promotion_last_5years <= 0:
## :...salary = high: 0 (234.7/30.7)
##     salary = medium:
##     :...satisfaction_level <= 0.3: 0 (154/8.9)
##     :   satisfaction_level > 0.3:
##     :   :...last_evaluation > 0.94: 0 (108.6/8.8)
##     :       last_evaluation <= 0.94:
##     :       :...average_montly_hours > 259: 1 (225.9/101.8)
##     :           average_montly_hours <= 259:
##     :           :...average_montly_hours > 249: 0 (64.9)
##     :               average_montly_hours <= 249:
##     :               :...average_montly_hours <= 248: 0 (1002.3/291)
##     :                   average_montly_hours > 248: 1 (44.1/6.3)
##     salary = low:
##     :...satisfaction_level > 0.89: 1 (68/12.1)
##         satisfaction_level <= 0.89:
##         :...average_montly_hours > 271: 0 (59.3)
##             average_montly_hours <= 271:
##             :...number_project <= 3: 0 (534.7/146.9)
##                 number_project > 3:
##                 :...satisfaction_level > 0.74: 0 (216.2)
##                     satisfaction_level <= 0.74:
##                     :...satisfaction_level > 0.72: 1 (107.4/29)
##                         satisfaction_level <= 0.72:
##                         :...average_montly_hours > 231: 1 (358/120.3)
##                             average_montly_hours <= 231:
##                             :...average_montly_hours <= 184: 1 (453.5/197.5)
##                                 average_montly_hours > 184: 0 (233/25.2)
## 
## -----  Trial 4:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (341)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (495.6)
##     satisfaction_level <= 0.92:
##     :...average_montly_hours <= 125: 0 (246.2)
##         average_montly_hours > 125:
##         :...average_montly_hours > 289: 1 (65.4)
##             average_montly_hours <= 289:
##             :...last_evaluation > 0.99: 1 (392.7/93.5)
##                 last_evaluation <= 0.99:
##                 :...number_project <= 2:
##                     :...last_evaluation <= 0.57:
##                     :   :...last_evaluation <= 0.44: 0 (27.7)
##                     :   :   last_evaluation > 0.44:
##                     :   :   :...satisfaction_level <= 0.46: 1 (860.5/125.1)
##                     :   :       satisfaction_level > 0.46: 0 (257.7/77.4)
##                     :   last_evaluation > 0.57:
##                     :   :...Work_accident > 0: 0 (52.8)
##                     :       Work_accident <= 0:
##                     :       :...sales in {accounting,hr,management,
##                     :           :         product_mng}: 0 (65.1)
##                     :           sales in {IT,marketing,RandD,sales,support,
##                     :           :         technical}:
##                     :           :...time_spend_company > 5: 0 (32.1)
##                     :               time_spend_company <= 5:
##                     :               :...average_montly_hours <= 150: 0 (82.2)
##                     :                   average_montly_hours > 150:
##                     :                   :...last_evaluation <= 0.77: 0 (310.1/79.6)
##                     :                       last_evaluation > 0.77: 1 (347.2/135.8)
##                     number_project > 2:
##                     :...time_spend_company <= 3: 0 (5163.3/931.4)
##                         time_spend_company > 3:
##                         :...number_project > 6: 1 (51.1)
##                             number_project <= 6:
##                             :...time_spend_company > 6: 0 (148.9)
##                                 time_spend_company <= 6:
##                                 :...promotion_last_5years > 0: 0 (50.4)
##                                     promotion_last_5years <= 0:
##                                     :...average_montly_hours > 281: 0 (60.9)
##                                         average_montly_hours <= 281:
##                                         :...last_evaluation > 0.82: [S1]
##                                             last_evaluation <= 0.82: [S2]
## 
## SubTree [S1]
## 
## satisfaction_level <= 0.29: 0 (117.1/6.9)
## satisfaction_level > 0.29:
## :...number_project <= 3: 0 (318.6/123.3)
##     number_project > 3:
##     :...average_montly_hours <= 204: 0 (334.1/136.8)
##         average_montly_hours > 204: 1 (1250/358.6)
## 
## SubTree [S2]
## 
## satisfaction_level <= 0.13: 0 (56.9)
## satisfaction_level > 0.13:
## :...average_montly_hours > 269: 1 (290.4/106.1)
##     average_montly_hours <= 269:
##     :...average_montly_hours > 255: 0 (188.5/12.7)
##         average_montly_hours <= 255:
##         :...average_montly_hours <= 131: 0 (46.2)
##             average_montly_hours > 131:
##             :...time_spend_company > 5: 0 (264.5/46.4)
##                 time_spend_company <= 5:
##                 :...sales in {hr,product_mng,RandD,sales}: 0 (484.7/138.3)
##                     sales in {accounting,IT,management,marketing,support,
##                     :         technical}:
##                     :...number_project <= 3: 1 (342.3/118.4)
##                         number_project > 3:
##                         :...average_montly_hours > 234: 0 (72.6/0.8)
##                             average_montly_hours <= 234:
##                             :...sales in {management,marketing}: 0 (52.7)
##                                 sales in {accounting,IT,support,technical}:
##                                 :...satisfaction_level <= 0.31: 1 (227.2/65.2)
##                                     satisfaction_level > 0.31: 0 (403.6/154.6)
## 
## -----  Trial 5:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (277.2)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (81.9)
##     number_project <= 6:
##     :...satisfaction_level > 0.92: 0 (402.9)
##         satisfaction_level <= 0.92:
##         :...average_montly_hours <= 125: 0 (200.1)
##             average_montly_hours > 125:
##             :...last_evaluation <= 0.44: 0 (155.4)
##                 last_evaluation > 0.44:
##                 :...last_evaluation <= 0.47: 1 (485.9/178.4)
##                     last_evaluation > 0.47:
##                     :...time_spend_company > 4:
##                         :...time_spend_company > 6: 0 (137.2)
##                         :   time_spend_company <= 6:
##                         :   :...satisfaction_level <= 0.29: 0 (469.6/76.3)
##                         :       satisfaction_level > 0.29:
##                         :       :...last_evaluation > 0.87:
##                         :           :...average_montly_hours <= 215: 0 (192/62.3)
##                         :           :   average_montly_hours > 215: 1 (828.4/174)
##                         :           last_evaluation <= 0.87:
##                         :           :...average_montly_hours <= 131: 0 (28)
##                         :               average_montly_hours > 131:
##                         :               :...average_montly_hours <= 135: 1 (76.2/5.1)
##                         :                   average_montly_hours > 135: [S1]
##                         time_spend_company <= 4:
##                         :...satisfaction_level <= 0.47:
##                             :...average_montly_hours <= 161: 1 (1078/395.2)
##                             :   average_montly_hours > 161:
##                             :   :...average_montly_hours <= 167: 0 (62.6)
##                             :       average_montly_hours > 167:
##                             :       :...number_project <= 2: 0 (138.5/9.1)
##                             :           number_project > 2:
##                             :           :...satisfaction_level > 0.3: 1 (526.9/239.7)
##                             :               satisfaction_level <= 0.3:
##                             :               :...last_evaluation <= 0.98: 0 (541.7/87.5)
##                             :                   last_evaluation > 0.98: 1 (53.1/15.3)
##                             satisfaction_level > 0.47:
##                             :...number_project > 5: 1 (331.9/142.7)
##                                 number_project <= 5:
##                                 :...salary = high: 0 (336.4)
##                                     salary in {low,medium}:
##                                     :...last_evaluation <= 0.52: 0 (335.8)
##                                         last_evaluation > 0.52:
##                                         :...satisfaction_level > 0.9: 0 (151.3)
##                                             satisfaction_level <= 0.9:
##                                             :...sales in {hr,management,
##                                                 :         marketing}: 0 (773/266.8)
##                                                 sales in {accounting,IT,
##                                                 :         product_mng,RandD,
##                                                 :         sales,support,
##                                                 :         technical}: [S2]
## 
## SubTree [S1]
## 
## satisfaction_level <= 0.37: 0 (48)
## satisfaction_level > 0.37:
## :...last_evaluation <= 0.55: 0 (178.4/42.5)
##     last_evaluation > 0.55:
##     :...average_montly_hours > 280: 1 (65.9/11.3)
##         average_montly_hours <= 280:
##         :...sales in {hr,IT,management,product_mng}: 0 (164.8/37.6)
##             sales in {accounting,marketing,RandD,sales,support,technical}:
##             :...number_project <= 4: 0 (466.8/187.2)
##                 number_project > 4: 1 (494.5/173.7)
## 
## SubTree [S2]
## 
## promotion_last_5years > 0: 0 (107.4)
## promotion_last_5years <= 0:
## :...last_evaluation > 0.94: 0 (556.6/41.6)
##     last_evaluation <= 0.94:
##     :...average_montly_hours <= 148: 0 (343.3/40.6)
##         average_montly_hours > 148:
##         :...sales in {accounting,IT,product_mng,RandD}: 0 (553.7/72.3)
##             sales in {sales,support,technical}:
##             :...satisfaction_level > 0.77:
##                 :...average_montly_hours > 268: 0 (70.8)
##                 :   average_montly_hours <= 268:
##                 :   :...last_evaluation <= 0.57: 0 (57.4)
##                 :       last_evaluation > 0.57:
##                 :       :...last_evaluation > 0.83: 0 (155.3/25.6)
##                 :           last_evaluation <= 0.83:
##                 :           :...satisfaction_level <= 0.86: 1 (459.3/163.8)
##                 :               satisfaction_level > 0.86: 0 (137.5/48.3)
##                 satisfaction_level <= 0.77:
##                 :...last_evaluation <= 0.54: 1 (172.1/72.5)
##                     last_evaluation > 0.54:
##                     :...satisfaction_level > 0.66: 0 (558.3/19.7)
##                         satisfaction_level <= 0.66:
##                         :...average_montly_hours <= 163: 0 (95.6)
##                             average_montly_hours > 163:
##                             :...average_montly_hours <= 164: 1 (54.3/3.5)
##                                 average_montly_hours > 164:
##                                 :...average_montly_hours <= 178: 0 (110.8)
##                                     average_montly_hours > 178:
##                                     :...average_montly_hours <= 181: 1 (84.9/25.6)
##                                         average_montly_hours > 181:
##                                         :...average_montly_hours > 270: 0 (76.6)
##                                             average_montly_hours <= 270: [S3]
## 
## SubTree [S3]
## 
## average_montly_hours <= 197: 0 (69)
## average_montly_hours > 197:
## :...number_project <= 2: 1 (111.1/33.9)
##     number_project > 2: 0 (643.8/169)
## 
## -----  Trial 6:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (228.4)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (67.5)
##     number_project <= 6:
##     :...average_montly_hours > 289: 1 (70.3)
##         average_montly_hours <= 289:
##         :...satisfaction_level > 0.92: 0 (331.9)
##             satisfaction_level <= 0.92:
##             :...average_montly_hours <= 125: 0 (164.9)
##                 average_montly_hours > 125:
##                 :...time_spend_company > 4:
##                     :...time_spend_company > 6: 0 (124.2)
##                     :   time_spend_company <= 6:
##                     :   :...satisfaction_level <= 0.69:
##                     :       :...last_evaluation <= 0.49: 0 (121.1)
##                     :       :   last_evaluation > 0.49:
##                     :       :   :...average_montly_hours <= 166: 1 (415.7/199.4)
##                     :       :       average_montly_hours > 166:
##                     :       :       :...average_montly_hours <= 218: 0 (318.7/24.9)
##                     :       :           average_montly_hours > 218:
##                     :       :           :...average_montly_hours <= 220: 1 (46.9/0.8)
##                     :       :               average_montly_hours > 220: 0 (661.6/240.9)
##                     :       satisfaction_level > 0.69:
##                     :       :...number_project > 5: 0 (77.6)
##                     :           number_project <= 5:
##                     :           :...average_montly_hours > 277: 0 (43.1)
##                     :               average_montly_hours <= 277:
##                     :               :...number_project <= 2: 0 (56.7/12.1)
##                     :                   number_project > 2:
##                     :                   :...average_montly_hours <= 177: 0 (99.1/32.1)
##                     :                       average_montly_hours > 177:
##                     :                       :...last_evaluation <= 0.62: 0 (103.7/36.1)
##                     :                           last_evaluation > 0.62: [S1]
##                     time_spend_company <= 4:
##                     :...number_project <= 2:
##                         :...last_evaluation > 0.57: 0 (850.1/199.6)
##                         :   last_evaluation <= 0.57:
##                         :   :...time_spend_company <= 2: 0 (107.2/10)
##                         :       time_spend_company > 2:
##                         :       :...satisfaction_level <= 0.35: 0 (67.5/10)
##                         :           satisfaction_level > 0.35: 1 (704.5/164)
##                         number_project > 2:
##                         :...sales in {hr,management}:
##                             :...time_spend_company <= 2: 0 (123.7)
##                             :   time_spend_company > 2:
##                             :   :...last_evaluation <= 0.58: 0 (69.1)
##                             :       last_evaluation > 0.58:
##                             :       :...last_evaluation <= 0.62: 1 (148.3/22.9)
##                             :           last_evaluation > 0.62: 0 (432.6/191.5)
##                             sales in {accounting,IT,marketing,product_mng,
##                             :         RandD,sales,support,technical}:
##                             :...last_evaluation <= 0.44: 0 (73.6)
##                                 last_evaluation > 0.44:
##                                 :...satisfaction_level > 0.55:
##                                     :...average_montly_hours <= 131: 1 (52.1/13.4)
##                                     :   average_montly_hours > 131:
##                                     :   :...sales in {marketing,product_mng,
##                                     :       :         RandD}: 0 (388.3)
##                                     :       sales in {accounting,IT,sales,
##                                     :       :         support,technical}:
##                                     :       :...number_project <= 4: 0 (2422.4/203.6)
##                                     :           number_project > 4:
##                                     :           :...Work_accident > 0: 0 (105.3)
##                                     :               Work_accident <= 0: [S2]
##                                     satisfaction_level <= 0.55:
##                                     :...sales in {IT,
##                                         :         product_mng}: 0 (260.4/11.4)
##                                         sales in {accounting,marketing,RandD,
##                                         :         sales,support,technical}:
##                                         :...average_montly_hours <= 132: 0 (54.5)
##                                             average_montly_hours > 132: [S3]
## 
## SubTree [S1]
## 
## sales in {IT,management,marketing,product_mng,RandD}: 1 (238.8/4.5)
## sales in {accounting,hr,sales,support,technical}:
## :...last_evaluation > 0.99: 1 (100.8)
##     last_evaluation <= 0.99:
##     :...satisfaction_level <= 0.72: 1 (48.8)
##         satisfaction_level > 0.72:
##         :...number_project <= 3: 0 (42.7)
##             number_project > 3:
##             :...last_evaluation > 0.95: 0 (179.6/84.2)
##                 last_evaluation <= 0.95:
##                 :...satisfaction_level <= 0.73: 0 (43.6/13.3)
##                     satisfaction_level > 0.73: 1 (568.8/111.2)
## 
## SubTree [S2]
## 
## average_montly_hours > 263: 0 (108.4)
## average_montly_hours <= 263:
## :...sales in {accounting,support}: 0 (153.8/14.5)
##     sales in {IT,sales,technical}:
##     :...average_montly_hours <= 262: 0 (636.4/217.3)
##         average_montly_hours > 262: 1 (53.7/11.8)
## 
## SubTree [S3]
## 
## satisfaction_level <= 0.14: 1 (183.5/84.4)
## satisfaction_level > 0.14:
## :...Work_accident > 0: 0 (244.4/22.3)
##     Work_accident <= 0:
##     :...salary = high: 0 (91.3/10.3)
##         salary in {low,medium}:
##         :...satisfaction_level <= 0.16: 0 (66.6)
##             satisfaction_level > 0.16:
##             :...satisfaction_level <= 0.17: 1 (75.5/26.3)
##                 satisfaction_level > 0.17:
##                 :...satisfaction_level <= 0.18: 0 (34.1)
##                     satisfaction_level > 0.18:
##                     :...time_spend_company <= 3: 0 (1210.3/315.2)
##                         time_spend_company > 3:
##                         :...last_evaluation <= 0.5: 1 (73.9/4.6)
##                             last_evaluation > 0.5: 0 (544/203.1)
## 
## -----  Trial 7:  -----
## 
## Decision tree:
## 
## satisfaction_level <= 0.11: 1 (188.2)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (55.6)
##     number_project <= 6:
##     :...satisfaction_level > 0.92: 0 (273.4)
##         satisfaction_level <= 0.92:
##         :...time_spend_company > 4:
##             :...average_montly_hours <= 215:
##             :   :...satisfaction_level <= 0.85: 0 (1497.3/260.8)
##             :   :   satisfaction_level > 0.85: 1 (126.2/37.6)
##             :   average_montly_hours > 215:
##             :   :...time_spend_company > 6: 0 (44.8)
##             :       time_spend_company <= 6:
##             :       :...last_evaluation <= 0.67: 0 (341.2/95.8)
##             :           last_evaluation > 0.67:
##             :           :...number_project > 5: 0 (162.8/67)
##             :               number_project <= 5:
##             :               :...satisfaction_level <= 0.56: 0 (227.4/95.4)
##             :                   satisfaction_level > 0.56:
##             :                   :...average_montly_hours <= 277: 1 (1610.2/242.3)
##             :                       average_montly_hours > 277: 0 (41.5/6.9)
##             time_spend_company <= 4:
##             :...last_evaluation <= 0.44: 0 (188)
##                 last_evaluation > 0.44:
##                 :...satisfaction_level > 0.48:
##                     :...average_montly_hours > 286: 1 (26.2/1.5)
##                     :   average_montly_hours <= 286:
##                     :   :...number_project > 5: 0 (464/213.3)
##                     :       number_project <= 5:
##                     :       :...sales in {marketing,
##                     :           :         product_mng}: 0 (276.7)
##                     :           sales in {accounting,hr,IT,management,RandD,
##                     :           :         sales,support,technical}:
##                     :           :...satisfaction_level > 0.9: 0 (128.8)
##                     :               satisfaction_level <= 0.9:
##                     :               :...salary in {high,
##                     :                   :          medium}: 0 (2120.8/208.4)
##                     :                   salary = low:
##                     :                   :...promotion_last_5years > 0: 1 (62.6/15.8)
##                     :                       promotion_last_5years <= 0:
##                     :                       :...last_evaluation <= 0.47: 1 (86.2/34.2)
##                     :                           last_evaluation > 0.47: 0 (2360.3/533.5)
##                     satisfaction_level <= 0.48:
##                     :...number_project <= 2:
##                         :...satisfaction_level <= 0.35: 0 (131.7/14.1)
##                         :   satisfaction_level > 0.35:
##                         :   :...average_montly_hours <= 161: 1 (545.6/137.4)
##                         :       average_montly_hours > 161: 0 (176.1/64)
##                         number_project > 2:
##                         :...sales = IT: 0 (87.3)
##                             sales in {accounting,hr,management,marketing,
##                             :         product_mng,RandD,sales,support,
##                             :         technical}:
##                             :...average_montly_hours <= 132: 0 (74.9)
##                                 average_montly_hours > 132:
##                                 :...promotion_last_5years > 0: 0 (32.1)
##                                     promotion_last_5years <= 0:
##                                     :...last_evaluation > 0.98: 1 (138.5/50.7)
##                                         last_evaluation <= 0.98:
##                                         :...average_montly_hours <= 141: 1 (186.4/76.2)
##                                             average_montly_hours > 141: [S1]
## 
## SubTree [S1]
## 
## average_montly_hours <= 148: 0 (74.6)
## average_montly_hours > 148:
## :...satisfaction_level <= 0.26:
##     :...last_evaluation > 0.86: 0 (101.7)
##     :   last_evaluation <= 0.86:
##     :   :...average_montly_hours <= 168: 1 (101.4/38.8)
##     :       average_montly_hours > 168: 0 (499.9/87.8)
##     satisfaction_level > 0.26:
##     :...sales in {hr,RandD}: 0 (48.5)
##         sales in {accounting,management,marketing,product_mng,sales,support,
##         :         technical}:
##         :...time_spend_company > 3: 1 (350.6/120.8)
##             time_spend_company <= 3:
##             :...satisfaction_level <= 0.32: 1 (215.1/79.9)
##                 satisfaction_level > 0.32: 0 (419.7/61.9)
## 
## -----  Trial 8:  -----
## 
## Decision tree:
## 
## time_spend_company > 4:
## :...time_spend_company > 6: 0 (298.5)
## :   time_spend_company <= 6:
## :   :...average_montly_hours <= 215:
## :       :...satisfaction_level <= 0.3: 0 (412.8/1.3)
## :       :   satisfaction_level > 0.3:
## :       :   :...average_montly_hours <= 131: 0 (134.2)
## :       :       average_montly_hours > 131:
## :       :       :...satisfaction_level <= 0.34: 1 (143.1/23.3)
## :       :           satisfaction_level > 0.34: 0 (880/278.9)
## :       average_montly_hours > 215:
## :       :...average_montly_hours > 289: 1 (67.1)
## :           average_montly_hours <= 289:
## :           :...average_montly_hours > 282: 0 (40.9/1.7)
## :               average_montly_hours <= 282:
## :               :...last_evaluation <= 0.81:
## :                   :...average_montly_hours <= 272: 0 (475.3/104.3)
## :                   :   average_montly_hours > 272: 1 (319.5/78.5)
## :                   last_evaluation > 0.81:
## :                   :...average_montly_hours > 277: 0 (28.5/0.9)
## :                       average_montly_hours <= 277:
## :                       :...number_project <= 3: 0 (228.5/69.4)
## :                           number_project > 3:
## :                           :...satisfaction_level <= 0.71: 0 (288.5/137.4)
## :                               satisfaction_level > 0.71: 1 (1009.5/27.8)
## time_spend_company <= 4:
## :...average_montly_hours > 287: 1 (115.8)
##     average_montly_hours <= 287:
##     :...number_project > 6: 1 (52.3)
##         number_project <= 6:
##         :...average_montly_hours <= 126: 0 (281.1/1.7)
##             average_montly_hours > 126:
##             :...promotion_last_5years > 0: 0 (274.9/4.1)
##                 promotion_last_5years <= 0:
##                 :...sales in {hr,IT,product_mng,RandD,support}:
##                     :...satisfaction_level <= 0.15: 1 (168.6/67.1)
##                     :   satisfaction_level > 0.15: 0 (2458.1/203.3)
##                     sales in {accounting,management,marketing,sales,technical}:
##                     :...satisfaction_level > 0.7: 0 (1462.2/156.2)
##                         satisfaction_level <= 0.7:
##                         :...satisfaction_level > 0.69: 1 (169.2/82.5)
##                             satisfaction_level <= 0.69:
##                             :...satisfaction_level > 0.64: 0 (250.3)
##                                 satisfaction_level <= 0.64:
##                                 :...average_montly_hours <= 197: 0 (2005.1/451)
##                                     average_montly_hours > 197:
##                                     :...Work_accident > 0: 0 (167/2)
##                                         Work_accident <= 0:
##                                         :...satisfaction_level <= 0.16: 0 (158.1/45.1)
##                                             satisfaction_level > 0.16: [S1]
## 
## SubTree [S1]
## 
## average_montly_hours <= 205: 1 (180.2/58.4)
## average_montly_hours > 205:
## :...average_montly_hours <= 213: 0 (100.8)
##     average_montly_hours > 213:
##     :...sales = marketing: 0 (59.3)
##         sales in {accounting,management,sales,technical}:
##         :...last_evaluation > 0.84: 0 (308.2/76.4)
##             last_evaluation <= 0.84:
##             :...salary = high: 0 (41.7)
##                 salary in {low,medium}:
##                 :...average_montly_hours <= 218: 1 (68/6.5)
##                     average_montly_hours > 218:
##                     :...average_montly_hours <= 231: 0 (90.5)
##                         average_montly_hours > 231:
##                         :...last_evaluation <= 0.82: 0 (534.6/234.7)
##                             last_evaluation > 0.82: 1 (126.8/11.5)
## 
## -----  Trial 9:  -----
## 
## Decision tree:
## 
## number_project <= 2:
## :...satisfaction_level > 0.46: 0 (967.4/199.1)
## :   satisfaction_level <= 0.46:
## :   :...average_montly_hours <= 125: 0 (103.9)
## :       average_montly_hours > 125:
## :       :...last_evaluation > 0.57: 0 (312.1/73)
## :           last_evaluation <= 0.57:
## :           :...average_montly_hours <= 163: 1 (2215.2/43.4)
## :               average_montly_hours > 163: 0 (87.8/4.6)
## number_project > 2:
## :...satisfaction_level <= 0.11: 1 (518.8)
##     satisfaction_level > 0.11:
##     :...time_spend_company > 4:
##         :...average_montly_hours <= 215: 0 (1253.9/166.8)
##         :   average_montly_hours > 215:
##         :   :...number_project <= 3: 0 (281.3/58.8)
##         :       number_project > 3:
##         :       :...last_evaluation <= 0.8: 0 (460.8/145.8)
##         :           last_evaluation > 0.8:
##         :           :...satisfaction_level <= 0.7: 0 (241/104.6)
##         :               satisfaction_level > 0.7:
##         :               :...number_project <= 5: 1 (886.7/18.5)
##         :                   number_project > 5: 0 (54.4/8.6)
##         time_spend_company <= 4:
##         :...average_montly_hours > 290: 1 (54.2)
##             average_montly_hours <= 290:
##             :...number_project > 6: 1 (27.9)
##                 number_project <= 6:
##                 :...satisfaction_level > 0.53: 0 (3017.3/32.3)
##                     satisfaction_level <= 0.53:
##                     :...average_montly_hours <= 135: 0 (140.6)
##                         average_montly_hours > 135:
##                         :...average_montly_hours > 278: 0 (102.3)
##                             average_montly_hours <= 278:
##                             :...sales in {IT,product_mng,
##                                 :         support}: 0 (541.2/33.7)
##                                 sales in {accounting,hr,management,marketing,
##                                 :         RandD,sales,technical}:
##                                 :...salary = high: 0 (109.4)
##                                     salary in {low,medium}:
##                                     :...Work_accident > 0: 0 (267.8/26.4)
##                                         Work_accident <= 0:
##                                         :...last_evaluation <= 0.82: [S1]
##                                             last_evaluation > 0.82:
##                                             :...last_evaluation <= 0.84: 1 (116.4/20.6)
##                                                 last_evaluation > 0.84: 0 (496.7/159.1)
## 
## SubTree [S1]
## 
## average_montly_hours <= 269: 0 (1007.9/177.3)
## average_montly_hours > 269: 1 (84.9/27.8)
## 
## 
## Evaluation on training data (13500 cases):
## 
## Trial        Decision Tree   
## -----      ----------------  
##    Size      Errors  
## 
##    0     32  252( 1.9%)
##    1     28 1065( 7.9%)
##    2     37 1331( 9.9%)
##    3     40  931( 6.9%)
##    4     34  759( 5.6%)
##    5     48 1192( 8.8%)
##    6     51  632( 4.7%)
##    7     36  491( 3.6%)
##    8     34 2135(15.8%)
##    9     24  342( 2.5%)
## boost            214( 1.6%)   <<
## 
## 
##     (a)   (b)    <-classified as
##    ----  ----
##   10246    20    (a): class 0
##     194  3040    (b): class 1
## 
## 
##  Attribute usage:
## 
##  100.00% satisfaction_level
##  100.00% number_project
##  100.00% average_montly_hours
##  100.00% time_spend_company
##   92.02% last_evaluation
##   89.96% promotion_last_5years
##   88.65% sales
##   75.96% salary
##   63.03% Work_accident
## 
## 
## Time: 0.8 secs
## Code: Evaluate the model using the Test Set (y)
p <- predict( c50model, C50xtest, type="class")
sum( p == C50ytest ) / length( p )
## [1] 0.9833222
# Code: Probability of Left
pprob <- predict( c50model, C50xtest, type="prob" )

Evaluation Model

Decision Tree

library(rpart)
library(rattle)
decTree <- rpart(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company  + Work_accident + promotion_last_5years + sales + salary,data=train)
data$left <- factor(data$left)
model <- rpart(left ~ ., alldata)
decTreeResult1 <- predict(model, train, type=c("class"))

library(ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
pr <- predict(model, test, type="prob")
pred <- prediction(pr[,2], labels=test$left)

perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
perf_auc <- performance(pred, measure="auc")
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")

Random Forest

randomForestResult <- predict(randomForestModel, train1, type=c("class"))
FRR <- predict(randomForestModel, train1, type=c("class"))
## test$PAtt <– predict(randomForestModel, test, type=”response”)
## prediction <- function(t) ifelse(randomForestResult > t, 1,0)
pred <- prediction(predictions = FRR, labels=train1$left)
perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
erf_auc <- performance(pred, measure="auc")
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")

C.50

pr <- predict(c50model, C50xtest, type="prob")
pred <- prediction(predictions = pr[,2], labels=C50ytest)
perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
perf_auc <- performance(pred, measure="auc")
str(perf_auc)
## Formal class 'performance' [package "ROCR"] with 6 slots
##   ..@ x.name      : chr "None"
##   ..@ y.name      : chr "Area under the ROC curve"
##   ..@ alpha.name  : chr "none"
##   ..@ x.values    : list()
##   ..@ y.values    :List of 1
##   .. ..$ : num 0.991
##   ..@ alpha.values: list()
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")

Part 3 : Survival Analysis

Summary Statistics and Parameters

# Code: Load Survival package
library(survival)
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
# Code: Load and attach data
attach(alldata)
# Code: Define variables
library(survival)
time <- time_spend_company
time<-as.numeric(time_spend_company)
event1<-as.numeric(left)
event <- event1

x <- cbind(satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years)
group <- left

# Code: Summary Statistics
summary(time)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   2.000   2.484   3.000   8.000
summary(event)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.238   1.000   2.000
summary(x)
##  satisfaction_level last_evaluation number_project  average_montly_hours
##  Min.   :0.0900     Min.   : 1.00   Min.   :1.000   Min.   : 96.0       
##  1st Qu.:0.4400     1st Qu.:21.00   1st Qu.:2.000   1st Qu.:156.0       
##  Median :0.6400     Median :37.00   Median :3.000   Median :200.0       
##  Mean   :0.6128     Mean   :36.61   Mean   :2.803   Mean   :201.1       
##  3rd Qu.:0.8200     3rd Qu.:52.00   3rd Qu.:4.000   3rd Qu.:245.0       
##  Max.   :1.0000     Max.   :65.00   Max.   :6.000   Max.   :310.0       
##  time_spend_company Work_accident   promotion_last_5years
##  Min.   :1.000      Min.   :1.000   Min.   :1.000        
##  1st Qu.:2.000      1st Qu.:1.000   1st Qu.:1.000        
##  Median :2.000      Median :1.000   Median :1.000        
##  Mean   :2.484      Mean   :1.145   Mean   :1.021        
##  3rd Qu.:3.000      3rd Qu.:1.000   3rd Qu.:1.000        
##  Max.   :8.000      Max.   :2.000   Max.   :2.000
summary(group)
##     0     1 
## 11428  3571
# Code: Create Surv object
survival <- Surv(time,event)

Kaplan-Meier non-parametric analysis by group

# Code: Create Surv object
survival <- Surv(time,event)
# Code: # Kaplan-Meier non-parametric analysis by group
kmsurvival <- survfit(Surv(time,event) ~group)
plot(kmsurvival, conf.int=FALSE, col=c("blue","red"), xlab="Trial", ylab = "Proportion Surviving")
legend("topright", c("No","Yes"), col=c("blue","red"), lty = 1)

Cox Proportional Hazards Regression Model

coxph <- coxph(survival ~ satisfaction_level + last_evaluation + number_project + average_montly_hours, alldata)
## Warning in fitter(X, Y, strats, offset, init, control,
## weights = weights, : Loglik converged before variable
## 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65 ;
## beta may be infinite.
plot(survfit(coxph), ylim=c(0.7, 1)) 
title(main="Survival at Average Predictors")

#Code : Compute covariates are overtime, age, monthly income, and distance from home     
cox.zph(coxph)
##                            rho    chisq     p
## satisfaction_level    0.468603 4.54e+02 0.000
## last_evaluation0.37  -0.577923 1.84e-08 1.000
## last_evaluation0.38  -0.391412 1.84e-08 1.000
## last_evaluation0.39  -0.450620 3.22e-08 1.000
## last_evaluation0.4   -0.501672 9.23e-09 1.000
## last_evaluation0.41  -0.379129 1.08e-08 1.000
## last_evaluation0.42  -0.405588 2.68e-08 1.000
## last_evaluation0.43  -0.137196 2.22e-09 1.000
## last_evaluation0.44  -0.566318 4.63e-08 1.000
## last_evaluation0.45  -0.017929 5.38e-09 1.000
## last_evaluation0.46  -0.072242 3.56e-08 1.000
## last_evaluation0.47  -0.107427 1.07e-07 1.000
## last_evaluation0.48  -0.138600 1.51e-07 1.000
## last_evaluation0.49  -0.167345 2.66e-07 1.000
## last_evaluation0.5   -0.072836 4.14e-08 1.000
## last_evaluation0.51  -0.083624 5.64e-08 1.000
## last_evaluation0.52  -0.127672 1.54e-07 1.000
## last_evaluation0.53  -0.093622 7.71e-08 1.000
## last_evaluation0.54  -0.051802 1.99e-08 1.000
## last_evaluation0.55  -0.122476 1.45e-07 1.000
## last_evaluation0.56  -0.116299 1.16e-07 1.000
## last_evaluation0.57  -0.091192 7.05e-08 1.000
## last_evaluation0.58  -0.008494 9.86e-09 1.000
## last_evaluation0.59  -0.054818 5.45e-07 0.999
## last_evaluation0.6    0.010538 1.74e-08 1.000
## last_evaluation0.61   0.012576 8.63e-08 1.000
## last_evaluation0.62  -0.038629 9.50e-08 1.000
## last_evaluation0.63  -0.029842 3.25e-07 1.000
## last_evaluation0.64  -0.690724 3.81e-08 1.000
## last_evaluation0.65   0.045889 3.28e-07 1.000
## last_evaluation0.66  -0.031739 5.51e-07 0.999
## last_evaluation0.67  -0.054341 5.38e-07 0.999
## last_evaluation0.68  -0.029231 1.33e-07 1.000
## last_evaluation0.69   0.175391 6.71e-09 1.000
## last_evaluation0.7    0.017701 3.16e-08 1.000
## last_evaluation0.71  -0.011124 1.68e-08 1.000
## last_evaluation0.72   0.065290 1.54e-06 0.999
## last_evaluation0.73  -0.020437 1.52e-07 1.000
## last_evaluation0.74  -0.045472 2.52e-07 1.000
## last_evaluation0.75  -0.044953 2.45e-07 1.000
## last_evaluation0.76   0.018430 1.85e-07 1.000
## last_evaluation0.77  -0.015457 5.74e-09 1.000
## last_evaluation0.78  -0.042220 5.32e-08 1.000
## last_evaluation0.79   0.016414 6.49e-09 1.000
## last_evaluation0.8    0.001574 5.13e-11 1.000
## last_evaluation0.81   0.000576 7.36e-12 1.000
## last_evaluation0.82   0.031289 2.34e-08 1.000
## last_evaluation0.83   0.037196 1.75e-08 1.000
## last_evaluation0.84   0.039173 1.65e-08 1.000
## last_evaluation0.85   0.058363 3.78e-08 1.000
## last_evaluation0.86   0.030758 1.29e-08 1.000
## last_evaluation0.87   0.036152 1.12e-08 1.000
## last_evaluation0.88   0.034248 2.11e-08 1.000
## last_evaluation0.89   0.033208 1.41e-08 1.000
## last_evaluation0.9    0.006677 5.97e-10 1.000
## last_evaluation0.91   0.075331 8.66e-08 1.000
## last_evaluation0.92   0.039565 1.76e-08 1.000
## last_evaluation0.93   0.021123 5.03e-09 1.000
## last_evaluation0.94   0.038513 2.07e-08 1.000
## last_evaluation0.95   0.051930 3.56e-08 1.000
## last_evaluation0.96   0.044371 3.26e-08 1.000
## last_evaluation0.97   0.043470 2.51e-08 1.000
## last_evaluation0.98   0.084003 1.16e-07 1.000
## last_evaluation0.99   0.039223 2.46e-08 1.000
## last_evaluation1      0.021282 2.93e-09 1.000
## number_project3       0.025375 2.20e+00 0.138
## number_project4       0.317451 2.21e+02 0.000
## number_project5       0.393844 2.70e+02 0.000
## number_project6       0.247959 8.46e+01 0.000
## number_project7       0.176536 8.34e+01 0.000
## average_montly_hours  0.369786 2.22e+02 0.000
## GLOBAL                      NA 2.97e+03 0.000
plot(cox.zph(coxph))