library(ggplot2)
#Data Extraction
data<-read.csv("/home/heru/Desktop/HR_comma_sep.csv", header=TRUE, sep=",")
alldata<-data
str(alldata)
## 'data.frame': 14999 obs. of 10 variables:
## $ satisfaction_level : num 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
## $ last_evaluation : num 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
## $ number_project : int 2 5 7 5 2 2 6 5 5 2 ...
## $ average_montly_hours : int 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : int 3 6 4 5 3 3 4 5 5 3 ...
## $ Work_accident : int 0 0 0 0 0 0 0 0 0 0 ...
## $ left : int 1 1 1 1 1 1 1 1 1 1 ...
## $ promotion_last_5years: int 0 0 0 0 0 0 0 0 0 0 ...
## $ sales : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ salary : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...
head(alldata)
## satisfaction_level last_evaluation number_project average_montly_hours
## 1 0.38 0.53 2 157
## 2 0.80 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.50 2 153
## time_spend_company Work_accident left promotion_last_5years sales salary
## 1 3 0 1 0 sales low
## 2 6 0 1 0 sales medium
## 3 4 0 1 0 sales medium
## 4 5 0 1 0 sales low
## 5 3 0 1 0 sales low
## 6 3 0 1 0 sales low
dim(alldata)
## [1] 14999 10
alldata1<-alldata
alldata$last_evaluation <- as.factor(alldata$last_evaluation)
alldata$number_project <- as.factor(alldata$number_project)
alldata$time_spend_company <- as.factor(alldata$time_spend_company)
alldata$Work_accident <- as.factor(alldata$Work_accident)
alldata$left <- as.factor(alldata$left)
alldata$promotion_last_5years <- as.factor(alldata$promotion_last_5years)
alldata$sales <- as.factor(alldata$sales)
alldata$salary <- as.factor(alldata$salary)
str(alldata)
## 'data.frame': 14999 obs. of 10 variables:
## $ satisfaction_level : num 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
## $ last_evaluation : Factor w/ 65 levels "0.36","0.37",..: 18 51 53 52 17 15 42 50 65 18 ...
## $ number_project : Factor w/ 6 levels "2","3","4","5",..: 1 4 6 4 1 1 5 4 4 1 ...
## $ average_montly_hours : int 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : Factor w/ 8 levels "2","3","4","5",..: 2 5 3 4 2 2 3 4 4 2 ...
## $ Work_accident : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ left : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ promotion_last_5years: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ sales : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ salary : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## [1] 4500 10
## [1] 4500 10
ggplot(train, aes(x = salary)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels=percent_format())
ggplot(train, aes(x = sales)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
ggplot(train, aes(x = Work_accident)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
ggplot(train, aes(x = promotion_last_5years)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
ggplot(train, aes(x = time_spend_company)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
ggplot(train, aes(x = number_project)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels=percent_format())
d<-ggplot(train, aes(x = promotion_last_5years)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
d + facet_wrap(~ salary)
#### Three Variables Left Vs (time-spend, number of project)
f<-ggplot(train, aes(x = time_spend_company)) + geom_bar(aes(fill=left), position ="fill")+ scale_y_continuous(labels = percent)
f + facet_wrap(~ number_project)
library(rpart)
library(rattle)
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
decTree <- rpart(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary,data=train)
print(decTree)
## n= 10499
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 10499 2522 0 (0.75978665 0.24021335)
## 2) number_project=3,4,5 7815 778 0 (0.90044786 0.09955214)
## 4) time_spend_company=2,3,4,7,8,10 6579 126 0 (0.98084815 0.01915185)
## 8) satisfaction_level>=0.115 6552 99 0 (0.98489011 0.01510989) *
## 9) satisfaction_level< 0.115 27 0 1 (0.00000000 1.00000000) *
## 5) time_spend_company=5,6 1236 584 1 (0.47249191 0.52750809)
## 10) satisfaction_level< 0.715 452 25 0 (0.94469027 0.05530973) *
## 11) satisfaction_level>=0.715 784 157 1 (0.20025510 0.79974490)
## 22) last_evaluation=0.38,0.4,0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.51,0.52,0.54,0.55,0.56,0.58,0.59,0.6,0.61,0.62,0.64,0.66,0.67,0.68,0.69,0.72,0.73,0.74,0.75,0.76,0.77,0.79,0.8 107 3 0 (0.97196262 0.02803738) *
## 23) last_evaluation=0.53,0.63,0.65,0.71,0.78,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1 677 53 1 (0.07828656 0.92171344)
## 46) average_montly_hours< 213 37 5 0 (0.86486486 0.13513514) *
## 47) average_montly_hours>=213 640 21 1 (0.03281250 0.96718750) *
## 3) number_project=2,6,7 2684 940 1 (0.35022355 0.64977645)
## 6) satisfaction_level>=0.465 620 40 0 (0.93548387 0.06451613) *
## 7) satisfaction_level< 0.465 2064 360 1 (0.17441860 0.82558140)
## 14) last_evaluation=0.36,0.37,0.38,0.39,0.4,0.41,0.42,0.43,0.44,0.58,0.59,0.6,0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.99,1 197 15 0 (0.92385787 0.07614213) *
## 15) last_evaluation=0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.77,0.78,0.79,0.8,0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98 1867 178 1 (0.09534012 0.90465988)
## 30) time_spend_company=2,6,7,8 63 4 0 (0.93650794 0.06349206) *
## 31) time_spend_company=3,4,5 1804 119 1 (0.06596452 0.93403548)
## 62) average_montly_hours< 125.5 14 0 0 (1.00000000 0.00000000) *
## 63) average_montly_hours>=125.5 1790 105 1 (0.05865922 0.94134078)
## 126) average_montly_hours>=162.5 699 84 1 (0.12017167 0.87982833)
## 252) satisfaction_level>=0.115 96 12 0 (0.87500000 0.12500000) *
## 253) satisfaction_level< 0.115 603 0 1 (0.00000000 1.00000000) *
## 127) average_montly_hours< 162.5 1091 21 1 (0.01924840 0.98075160) *
fancyRpartPlot(decTree)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
## Loading required package: lattice
randomForestModel <- randomForest(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary, data=train1,ntree=100,mtry=5, importance=TRUE)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
print(randomForestModel)
##
## Call:
## randomForest(formula = left ~ satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary, data = train1, ntree = 100, mtry = 5, importance = TRUE)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 5
##
## Mean of squared residuals: 0.0120259
## % Var explained: 93.35
varImpPlot(randomForestModel,type=2)
# Code : C50
alldataC50 <- alldata1[ sample( nrow(alldata) ), ]
C50x <- alldataC50[,-c(7)] # independent
C50y <- alldataC50[,c(7)] # dependent
## Code: Create train and test sets for x and y
C50xtrain <- C50x[1:13500, ]
C50xtest <- C50x[13501:14999, ]
C50ytrain <- C50y[1:13500]
C50ytest <- C50y[13501:14999]
##Code: Install and load C50
library(C50)
C50ytrain<-as.factor(C50ytrain)
c50model <- C50::C5.0( C50xtrain, C50ytrain )
summary( c50model )
##
## Call:
## C5.0.default(x = C50xtrain, y = C50ytrain)
##
##
## C5.0 [Release 2.07 GPL Edition] Tue Jan 10 09:07:11 2017
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 13500 cases (10 attributes) from undefined.data
##
## Decision tree:
##
## average_montly_hours > 287: 1 (292)
## average_montly_hours <= 287:
## :...satisfaction_level <= 0.46:
## :...number_project > 2:
## : :...satisfaction_level <= 0.11: 1 (533)
## : : satisfaction_level > 0.11:
## : : :...last_evaluation <= 0.98:
## : : :...number_project <= 5: 0 (1021/45)
## : : : number_project > 5:
## : : : :...number_project <= 6: 0 (281/26)
## : : : number_project > 6: 1 (14)
## : : last_evaluation > 0.98:
## : : :...time_spend_company > 4: 0 (11)
## : : time_spend_company <= 4:
## : : :...number_project > 4: 0 (11/1)
## : : number_project <= 4:
## : : :...average_montly_hours <= 188: 0 (3)
## : : average_montly_hours > 188: 1 (12/1)
## : number_project <= 2:
## : :...last_evaluation > 0.57: 0 (118/6)
## : last_evaluation <= 0.57:
## : :...last_evaluation <= 0.44: 0 (34)
## : last_evaluation > 0.44:
## : :...average_montly_hours > 162:
## : :...satisfaction_level <= 0.12: 1 (3)
## : : satisfaction_level > 0.12: 0 (22/2)
## : average_montly_hours <= 162:
## : :...average_montly_hours <= 127:
## : :...average_montly_hours <= 125: 0 (15)
## : : average_montly_hours > 125: 1 (62)
## : average_montly_hours > 127:
## : :...satisfaction_level > 0.36: 1 (1222/4)
## : satisfaction_level <= 0.36:
## : :...satisfaction_level <= 0.31: 0 (8/1)
## : satisfaction_level > 0.31: 1 (85/2)
## satisfaction_level > 0.46:
## :...time_spend_company <= 4: 0 (7974/108)
## time_spend_company > 4:
## :...last_evaluation <= 0.8: 0 (674/28)
## last_evaluation > 0.8:
## :...time_spend_company > 6: 0 (147)
## time_spend_company <= 6:
## :...average_montly_hours <= 214:
## :...number_project > 3: 0 (60/1)
## : number_project <= 3:
## : :...time_spend_company > 5: 0 (15)
## : time_spend_company <= 5:
## : :...average_montly_hours <= 152: 0 (11)
## : average_montly_hours > 152: 1 (15/3)
## average_montly_hours > 214:
## :...number_project <= 3:
## :...average_montly_hours <= 221: 1 (3)
## : average_montly_hours > 221: 0 (33/3)
## number_project > 3:
## :...satisfaction_level <= 0.72:
## :...satisfaction_level <= 0.71: 0 (21/4)
## : satisfaction_level > 0.71: 1 (23)
## satisfaction_level > 0.72:
## :...satisfaction_level <= 0.91: 1 (752/16)
## satisfaction_level > 0.91:
## :...satisfaction_level <= 0.92: 1 (20/1)
## satisfaction_level > 0.92: 0 (5)
##
##
## Evaluation on training data (13500 cases):
##
## Decision Tree
## ----------------
## Size Errors
##
## 32 252( 1.9%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 10239 27 (a): class 0
## 225 3009 (b): class 1
##
##
## Attribute usage:
##
## 100.00% average_montly_hours
## 97.84% satisfaction_level
## 72.52% time_spend_company
## 34.82% last_evaluation
## 32.69% number_project
##
##
## Time: 0.1 secs
## Code: Try Boosting with 10 trials
c50model <- C50::C5.0( C50xtrain, C50ytrain, trials=10 )
summary( c50model )
##
## Call:
## C5.0.default(x = C50xtrain, y = C50ytrain, trials = 10)
##
##
## C5.0 [Release 2.07 GPL Edition] Tue Jan 10 09:07:11 2017
## -------------------------------
##
## Class specified by attribute `outcome'
##
## Read 13500 cases (10 attributes) from undefined.data
##
## ----- Trial 0: -----
##
## Decision tree:
##
## average_montly_hours > 287: 1 (292)
## average_montly_hours <= 287:
## :...satisfaction_level <= 0.46:
## :...number_project > 2:
## : :...satisfaction_level <= 0.11: 1 (533)
## : : satisfaction_level > 0.11:
## : : :...last_evaluation <= 0.98:
## : : :...number_project <= 5: 0 (1021/45)
## : : : number_project > 5:
## : : : :...number_project <= 6: 0 (281/26)
## : : : number_project > 6: 1 (14)
## : : last_evaluation > 0.98:
## : : :...time_spend_company > 4: 0 (11)
## : : time_spend_company <= 4:
## : : :...number_project > 4: 0 (11/1)
## : : number_project <= 4:
## : : :...average_montly_hours <= 188: 0 (3)
## : : average_montly_hours > 188: 1 (12/1)
## : number_project <= 2:
## : :...last_evaluation > 0.57: 0 (118/6)
## : last_evaluation <= 0.57:
## : :...last_evaluation <= 0.44: 0 (34)
## : last_evaluation > 0.44:
## : :...average_montly_hours > 162:
## : :...satisfaction_level <= 0.12: 1 (3)
## : : satisfaction_level > 0.12: 0 (22/2)
## : average_montly_hours <= 162:
## : :...average_montly_hours <= 127:
## : :...average_montly_hours <= 125: 0 (15)
## : : average_montly_hours > 125: 1 (62)
## : average_montly_hours > 127:
## : :...satisfaction_level > 0.36: 1 (1222/4)
## : satisfaction_level <= 0.36:
## : :...satisfaction_level <= 0.31: 0 (8/1)
## : satisfaction_level > 0.31: 1 (85/2)
## satisfaction_level > 0.46:
## :...time_spend_company <= 4: 0 (7974/108)
## time_spend_company > 4:
## :...last_evaluation <= 0.8: 0 (674/28)
## last_evaluation > 0.8:
## :...time_spend_company > 6: 0 (147)
## time_spend_company <= 6:
## :...average_montly_hours <= 214:
## :...number_project > 3: 0 (60/1)
## : number_project <= 3:
## : :...time_spend_company > 5: 0 (15)
## : time_spend_company <= 5:
## : :...average_montly_hours <= 152: 0 (11)
## : average_montly_hours > 152: 1 (15/3)
## average_montly_hours > 214:
## :...number_project <= 3:
## :...average_montly_hours <= 221: 1 (3)
## : average_montly_hours > 221: 0 (33/3)
## number_project > 3:
## :...satisfaction_level <= 0.72:
## :...satisfaction_level <= 0.71: 0 (21/4)
## : satisfaction_level > 0.71: 1 (23)
## satisfaction_level > 0.72:
## :...satisfaction_level <= 0.91: 1 (752/16)
## satisfaction_level > 0.91:
## :...satisfaction_level <= 0.92: 1 (20/1)
## satisfaction_level > 0.92: 0 (5)
##
## ----- Trial 1: -----
##
## Decision tree:
##
## satisfaction_level <= 0.48:
## :...average_montly_hours <= 125: 0 (136.6)
## : average_montly_hours > 125:
## : :...last_evaluation <= 0.44: 0 (107.2)
## : last_evaluation > 0.44:
## : :...time_spend_company > 5: 0 (246.5/57.8)
## : time_spend_company <= 5:
## : :...salary = high: 0 (197.3/68.5)
## : salary in {low,medium}:
## : :...satisfaction_level <= 0.11: 1 (611.7)
## : satisfaction_level > 0.11:
## : :...satisfaction_level > 0.3:
## : :...average_montly_hours <= 160: 1 (1384.2/113.1)
## : : average_montly_hours > 160:
## : : :...sales in {accounting,management,marketing,
## : : : product_mng,sales,support,
## : : : technical}: 1 (682/172.8)
## : : sales in {hr,IT,RandD}: 0 (52.8/4.5)
## : satisfaction_level <= 0.3:
## : :...last_evaluation > 0.9: 0 (90.1/5.3)
## : last_evaluation <= 0.9:
## : :...sales in {management,marketing,
## : : product_mng}: 0 (50.6/1.5)
## : sales in {accounting,hr,IT,RandD,sales,support,
## : : technical}:
## : :...last_evaluation <= 0.46: 1 (90.9/5.3)
## : last_evaluation > 0.46:
## : :...satisfaction_level <= 0.17: 1 (365.9/100.4)
## : satisfaction_level > 0.17: 0 (234.4/62.4)
## satisfaction_level > 0.48:
## :...time_spend_company <= 3:
## :...number_project <= 5: 0 (5535.2/642.1)
## : number_project > 5: 1 (194/54.3)
## time_spend_company > 3:
## :...time_spend_company > 6: 0 (309.4)
## time_spend_company <= 6:
## :...satisfaction_level > 0.92: 0 (189.4)
## satisfaction_level <= 0.92:
## :...last_evaluation <= 0.45: 0 (73.2)
## last_evaluation > 0.45:
## :...promotion_last_5years > 0: 0 (25.7/0.8)
## promotion_last_5years <= 0:
## :...average_montly_hours <= 149: 0 (211.9/27.8)
## average_montly_hours > 149:
## :...last_evaluation > 0.99: 1 (144/10.6)
## last_evaluation <= 0.99:
## :...Work_accident > 0: 0 (217.4/64.3)
## Work_accident <= 0:
## :...time_spend_company > 4:
## :...number_project <= 5: 1 (1188.4/301.4)
## : number_project > 5: 0 (134.5/42.4)
## time_spend_company <= 4:
## :...salary = high: 0 (53.6)
## salary in {low,medium}:
## :...last_evaluation > 0.95: 0 (28.7)
## last_evaluation <= 0.95: [S1]
##
## SubTree [S1]
##
## sales in {accounting,IT,management,marketing,product_mng,RandD,sales,
## : support}: 0 (511.9/180.6)
## sales in {hr,technical}: 1 (432.3/96.6)
##
## ----- Trial 2: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (524.3)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (761.9)
## satisfaction_level <= 0.92:
## :...time_spend_company > 6: 0 (260.6)
## time_spend_company <= 6:
## :...last_evaluation <= 0.44: 0 (228.1)
## last_evaluation > 0.44:
## :...number_project <= 2:
## :...satisfaction_level <= 0.35: 0 (139.6/12.8)
## : satisfaction_level > 0.35:
## : :...Work_accident > 0: 0 (122.9/38.9)
## : Work_accident <= 0:
## : :...last_evaluation <= 0.57:
## : :...satisfaction_level > 0.46: 1 (180.4/83.7)
## : : satisfaction_level <= 0.46:
## : : :...average_montly_hours <= 166: 1 (946.8/59.1)
## : : average_montly_hours > 166: 0 (33.7/11)
## : last_evaluation > 0.57:
## : :...salary = high: 0 (22.2)
## : salary in {low,medium}:
## : :...sales in {accounting,hr,management,
## : : product_mng}: 0 (62.1/3.6)
## : sales in {IT,marketing,RandD,sales,support,
## : technical}: 1 (529.8/205)
## number_project > 2:
## :...time_spend_company <= 3:
## :...satisfaction_level > 0.36: 0 (4446/698.8)
## : satisfaction_level <= 0.36:
## : :...Work_accident > 0: 0 (52.5)
## : Work_accident <= 0:
## : :...number_project <= 3: 0 (101.4/8.9)
## : number_project > 3: 1 (538.2/225)
## time_spend_company > 3:
## :...average_montly_hours <= 130: 0 (106)
## average_montly_hours > 130:
## :...average_montly_hours <= 139: 1 (254/62.2)
## average_montly_hours > 139:
## :...number_project > 6: 1 (54.6)
## number_project <= 6:
## :...average_montly_hours <= 148: 0 (102.8)
## average_montly_hours > 148:
## :...satisfaction_level <= 0.24:
## :...average_montly_hours <= 276: 0 (513.7/94.2)
## : average_montly_hours > 276: 1 (91/32.2)
## satisfaction_level > 0.24:
## :...number_project > 4: [S1]
## number_project <= 4:
## :...average_montly_hours > 244: [S2]
## average_montly_hours <= 244:
## :...salary = high: 0 (55)
## salary in {low,medium}: [S3]
##
## SubTree [S1]
##
## average_montly_hours <= 162: 1 (213.3/33.5)
## average_montly_hours > 162:
## :...average_montly_hours <= 194: 0 (169.2/27.1)
## average_montly_hours > 194:
## :...average_montly_hours > 271: 0 (79.7/18)
## average_montly_hours <= 271:
## :...sales in {accounting,IT,marketing,product_mng,RandD,
## : support}: 1 (435.1/78)
## sales in {hr,management,sales,technical}: 0 (550.3/267.2)
##
## SubTree [S2]
##
## average_montly_hours > 281: 0 (35.2/3)
## average_montly_hours <= 281:
## :...time_spend_company <= 4: 0 (243.9/77.2)
## time_spend_company > 4: 1 (423.5/138)
##
## SubTree [S3]
##
## sales in {IT,marketing,RandD,sales}: 0 (351.6/64.2)
## sales in {accounting,hr,management,product_mng,support,technical}:
## :...average_montly_hours > 235: 0 (80.9/9.6)
## average_montly_hours <= 235:
## :...average_montly_hours > 232: 1 (38.8/2.4)
## average_montly_hours <= 232:
## :...sales = support: 0 (113.8/18.8)
## sales in {accounting,hr,management,product_mng,technical}:
## :...satisfaction_level <= 0.44: 1 (108.8/31.6)
## satisfaction_level > 0.44: 0 (528.4/212.9)
##
## ----- Trial 3: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (423.2)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (615.1)
## satisfaction_level <= 0.92:
## :...average_montly_hours <= 125: 0 (305.5)
## average_montly_hours > 125:
## :...time_spend_company > 6: 0 (199.3)
## time_spend_company <= 6:
## :...last_evaluation <= 0.44: 0 (157.1)
## last_evaluation > 0.44:
## :...time_spend_company > 4:
## :...number_project <= 3: 0 (696.2/183.7)
## : number_project > 3:
## : :...satisfaction_level > 0.71:
## : :...last_evaluation <= 0.62: 0 (62.2)
## : : last_evaluation > 0.62:
## : : :...number_project <= 5: 1 (1329/195.6)
## : : number_project > 5: 0 (63.7/4.6)
## : satisfaction_level <= 0.71:
## : :...average_montly_hours <= 139: 1 (156.2/36.3)
## : average_montly_hours > 139:
## : :...last_evaluation <= 0.54: 0 (83.8/1.5)
## : last_evaluation > 0.54:
## : :...last_evaluation <= 0.58: 1 (115.8/34.8)
## : last_evaluation > 0.58: 0 (672/226.9)
## time_spend_company <= 4:
## :...number_project <= 2:
## :...satisfaction_level > 0.46:
## : :...average_montly_hours <= 273: 0 (749.3/191.7)
## : : average_montly_hours > 273: 1 (86.4/16.8)
## : satisfaction_level <= 0.46:
## : :...satisfaction_level <= 0.35: 0 (97.1/12.6)
## : satisfaction_level > 0.35:
## : :...average_montly_hours <= 162: 1 (1000.9/115.3)
## : average_montly_hours > 162: 0 (94.7/21.5)
## number_project > 2:
## :...number_project > 6: 1 (53.8)
## number_project <= 6:
## :...average_montly_hours > 290: 1 (45.6)
## average_montly_hours <= 290:
## :...satisfaction_level > 0.9: 0 (156.9)
## satisfaction_level <= 0.9:
## :...sales in {accounting,IT,marketing,
## : product_mng,
## : RandD}: 0 (1593.9/243.1)
## sales in {hr,management,sales,support,
## : technical}:
## :...last_evaluation <= 0.49: 0 (211/53.2)
## last_evaluation > 0.49:
## :...Work_accident > 0: 0 (572.1/106.8)
## Work_accident <= 0: [S1]
##
## SubTree [S1]
##
## promotion_last_5years > 0: 1 (94.2/41.1)
## promotion_last_5years <= 0:
## :...salary = high: 0 (234.7/30.7)
## salary = medium:
## :...satisfaction_level <= 0.3: 0 (154/8.9)
## : satisfaction_level > 0.3:
## : :...last_evaluation > 0.94: 0 (108.6/8.8)
## : last_evaluation <= 0.94:
## : :...average_montly_hours > 259: 1 (225.9/101.8)
## : average_montly_hours <= 259:
## : :...average_montly_hours > 249: 0 (64.9)
## : average_montly_hours <= 249:
## : :...average_montly_hours <= 248: 0 (1002.3/291)
## : average_montly_hours > 248: 1 (44.1/6.3)
## salary = low:
## :...satisfaction_level > 0.89: 1 (68/12.1)
## satisfaction_level <= 0.89:
## :...average_montly_hours > 271: 0 (59.3)
## average_montly_hours <= 271:
## :...number_project <= 3: 0 (534.7/146.9)
## number_project > 3:
## :...satisfaction_level > 0.74: 0 (216.2)
## satisfaction_level <= 0.74:
## :...satisfaction_level > 0.72: 1 (107.4/29)
## satisfaction_level <= 0.72:
## :...average_montly_hours > 231: 1 (358/120.3)
## average_montly_hours <= 231:
## :...average_montly_hours <= 184: 1 (453.5/197.5)
## average_montly_hours > 184: 0 (233/25.2)
##
## ----- Trial 4: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (341)
## satisfaction_level > 0.11:
## :...satisfaction_level > 0.92: 0 (495.6)
## satisfaction_level <= 0.92:
## :...average_montly_hours <= 125: 0 (246.2)
## average_montly_hours > 125:
## :...average_montly_hours > 289: 1 (65.4)
## average_montly_hours <= 289:
## :...last_evaluation > 0.99: 1 (392.7/93.5)
## last_evaluation <= 0.99:
## :...number_project <= 2:
## :...last_evaluation <= 0.57:
## : :...last_evaluation <= 0.44: 0 (27.7)
## : : last_evaluation > 0.44:
## : : :...satisfaction_level <= 0.46: 1 (860.5/125.1)
## : : satisfaction_level > 0.46: 0 (257.7/77.4)
## : last_evaluation > 0.57:
## : :...Work_accident > 0: 0 (52.8)
## : Work_accident <= 0:
## : :...sales in {accounting,hr,management,
## : : product_mng}: 0 (65.1)
## : sales in {IT,marketing,RandD,sales,support,
## : : technical}:
## : :...time_spend_company > 5: 0 (32.1)
## : time_spend_company <= 5:
## : :...average_montly_hours <= 150: 0 (82.2)
## : average_montly_hours > 150:
## : :...last_evaluation <= 0.77: 0 (310.1/79.6)
## : last_evaluation > 0.77: 1 (347.2/135.8)
## number_project > 2:
## :...time_spend_company <= 3: 0 (5163.3/931.4)
## time_spend_company > 3:
## :...number_project > 6: 1 (51.1)
## number_project <= 6:
## :...time_spend_company > 6: 0 (148.9)
## time_spend_company <= 6:
## :...promotion_last_5years > 0: 0 (50.4)
## promotion_last_5years <= 0:
## :...average_montly_hours > 281: 0 (60.9)
## average_montly_hours <= 281:
## :...last_evaluation > 0.82: [S1]
## last_evaluation <= 0.82: [S2]
##
## SubTree [S1]
##
## satisfaction_level <= 0.29: 0 (117.1/6.9)
## satisfaction_level > 0.29:
## :...number_project <= 3: 0 (318.6/123.3)
## number_project > 3:
## :...average_montly_hours <= 204: 0 (334.1/136.8)
## average_montly_hours > 204: 1 (1250/358.6)
##
## SubTree [S2]
##
## satisfaction_level <= 0.13: 0 (56.9)
## satisfaction_level > 0.13:
## :...average_montly_hours > 269: 1 (290.4/106.1)
## average_montly_hours <= 269:
## :...average_montly_hours > 255: 0 (188.5/12.7)
## average_montly_hours <= 255:
## :...average_montly_hours <= 131: 0 (46.2)
## average_montly_hours > 131:
## :...time_spend_company > 5: 0 (264.5/46.4)
## time_spend_company <= 5:
## :...sales in {hr,product_mng,RandD,sales}: 0 (484.7/138.3)
## sales in {accounting,IT,management,marketing,support,
## : technical}:
## :...number_project <= 3: 1 (342.3/118.4)
## number_project > 3:
## :...average_montly_hours > 234: 0 (72.6/0.8)
## average_montly_hours <= 234:
## :...sales in {management,marketing}: 0 (52.7)
## sales in {accounting,IT,support,technical}:
## :...satisfaction_level <= 0.31: 1 (227.2/65.2)
## satisfaction_level > 0.31: 0 (403.6/154.6)
##
## ----- Trial 5: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (277.2)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (81.9)
## number_project <= 6:
## :...satisfaction_level > 0.92: 0 (402.9)
## satisfaction_level <= 0.92:
## :...average_montly_hours <= 125: 0 (200.1)
## average_montly_hours > 125:
## :...last_evaluation <= 0.44: 0 (155.4)
## last_evaluation > 0.44:
## :...last_evaluation <= 0.47: 1 (485.9/178.4)
## last_evaluation > 0.47:
## :...time_spend_company > 4:
## :...time_spend_company > 6: 0 (137.2)
## : time_spend_company <= 6:
## : :...satisfaction_level <= 0.29: 0 (469.6/76.3)
## : satisfaction_level > 0.29:
## : :...last_evaluation > 0.87:
## : :...average_montly_hours <= 215: 0 (192/62.3)
## : : average_montly_hours > 215: 1 (828.4/174)
## : last_evaluation <= 0.87:
## : :...average_montly_hours <= 131: 0 (28)
## : average_montly_hours > 131:
## : :...average_montly_hours <= 135: 1 (76.2/5.1)
## : average_montly_hours > 135: [S1]
## time_spend_company <= 4:
## :...satisfaction_level <= 0.47:
## :...average_montly_hours <= 161: 1 (1078/395.2)
## : average_montly_hours > 161:
## : :...average_montly_hours <= 167: 0 (62.6)
## : average_montly_hours > 167:
## : :...number_project <= 2: 0 (138.5/9.1)
## : number_project > 2:
## : :...satisfaction_level > 0.3: 1 (526.9/239.7)
## : satisfaction_level <= 0.3:
## : :...last_evaluation <= 0.98: 0 (541.7/87.5)
## : last_evaluation > 0.98: 1 (53.1/15.3)
## satisfaction_level > 0.47:
## :...number_project > 5: 1 (331.9/142.7)
## number_project <= 5:
## :...salary = high: 0 (336.4)
## salary in {low,medium}:
## :...last_evaluation <= 0.52: 0 (335.8)
## last_evaluation > 0.52:
## :...satisfaction_level > 0.9: 0 (151.3)
## satisfaction_level <= 0.9:
## :...sales in {hr,management,
## : marketing}: 0 (773/266.8)
## sales in {accounting,IT,
## : product_mng,RandD,
## : sales,support,
## : technical}: [S2]
##
## SubTree [S1]
##
## satisfaction_level <= 0.37: 0 (48)
## satisfaction_level > 0.37:
## :...last_evaluation <= 0.55: 0 (178.4/42.5)
## last_evaluation > 0.55:
## :...average_montly_hours > 280: 1 (65.9/11.3)
## average_montly_hours <= 280:
## :...sales in {hr,IT,management,product_mng}: 0 (164.8/37.6)
## sales in {accounting,marketing,RandD,sales,support,technical}:
## :...number_project <= 4: 0 (466.8/187.2)
## number_project > 4: 1 (494.5/173.7)
##
## SubTree [S2]
##
## promotion_last_5years > 0: 0 (107.4)
## promotion_last_5years <= 0:
## :...last_evaluation > 0.94: 0 (556.6/41.6)
## last_evaluation <= 0.94:
## :...average_montly_hours <= 148: 0 (343.3/40.6)
## average_montly_hours > 148:
## :...sales in {accounting,IT,product_mng,RandD}: 0 (553.7/72.3)
## sales in {sales,support,technical}:
## :...satisfaction_level > 0.77:
## :...average_montly_hours > 268: 0 (70.8)
## : average_montly_hours <= 268:
## : :...last_evaluation <= 0.57: 0 (57.4)
## : last_evaluation > 0.57:
## : :...last_evaluation > 0.83: 0 (155.3/25.6)
## : last_evaluation <= 0.83:
## : :...satisfaction_level <= 0.86: 1 (459.3/163.8)
## : satisfaction_level > 0.86: 0 (137.5/48.3)
## satisfaction_level <= 0.77:
## :...last_evaluation <= 0.54: 1 (172.1/72.5)
## last_evaluation > 0.54:
## :...satisfaction_level > 0.66: 0 (558.3/19.7)
## satisfaction_level <= 0.66:
## :...average_montly_hours <= 163: 0 (95.6)
## average_montly_hours > 163:
## :...average_montly_hours <= 164: 1 (54.3/3.5)
## average_montly_hours > 164:
## :...average_montly_hours <= 178: 0 (110.8)
## average_montly_hours > 178:
## :...average_montly_hours <= 181: 1 (84.9/25.6)
## average_montly_hours > 181:
## :...average_montly_hours > 270: 0 (76.6)
## average_montly_hours <= 270: [S3]
##
## SubTree [S3]
##
## average_montly_hours <= 197: 0 (69)
## average_montly_hours > 197:
## :...number_project <= 2: 1 (111.1/33.9)
## number_project > 2: 0 (643.8/169)
##
## ----- Trial 6: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (228.4)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (67.5)
## number_project <= 6:
## :...average_montly_hours > 289: 1 (70.3)
## average_montly_hours <= 289:
## :...satisfaction_level > 0.92: 0 (331.9)
## satisfaction_level <= 0.92:
## :...average_montly_hours <= 125: 0 (164.9)
## average_montly_hours > 125:
## :...time_spend_company > 4:
## :...time_spend_company > 6: 0 (124.2)
## : time_spend_company <= 6:
## : :...satisfaction_level <= 0.69:
## : :...last_evaluation <= 0.49: 0 (121.1)
## : : last_evaluation > 0.49:
## : : :...average_montly_hours <= 166: 1 (415.7/199.4)
## : : average_montly_hours > 166:
## : : :...average_montly_hours <= 218: 0 (318.7/24.9)
## : : average_montly_hours > 218:
## : : :...average_montly_hours <= 220: 1 (46.9/0.8)
## : : average_montly_hours > 220: 0 (661.6/240.9)
## : satisfaction_level > 0.69:
## : :...number_project > 5: 0 (77.6)
## : number_project <= 5:
## : :...average_montly_hours > 277: 0 (43.1)
## : average_montly_hours <= 277:
## : :...number_project <= 2: 0 (56.7/12.1)
## : number_project > 2:
## : :...average_montly_hours <= 177: 0 (99.1/32.1)
## : average_montly_hours > 177:
## : :...last_evaluation <= 0.62: 0 (103.7/36.1)
## : last_evaluation > 0.62: [S1]
## time_spend_company <= 4:
## :...number_project <= 2:
## :...last_evaluation > 0.57: 0 (850.1/199.6)
## : last_evaluation <= 0.57:
## : :...time_spend_company <= 2: 0 (107.2/10)
## : time_spend_company > 2:
## : :...satisfaction_level <= 0.35: 0 (67.5/10)
## : satisfaction_level > 0.35: 1 (704.5/164)
## number_project > 2:
## :...sales in {hr,management}:
## :...time_spend_company <= 2: 0 (123.7)
## : time_spend_company > 2:
## : :...last_evaluation <= 0.58: 0 (69.1)
## : last_evaluation > 0.58:
## : :...last_evaluation <= 0.62: 1 (148.3/22.9)
## : last_evaluation > 0.62: 0 (432.6/191.5)
## sales in {accounting,IT,marketing,product_mng,
## : RandD,sales,support,technical}:
## :...last_evaluation <= 0.44: 0 (73.6)
## last_evaluation > 0.44:
## :...satisfaction_level > 0.55:
## :...average_montly_hours <= 131: 1 (52.1/13.4)
## : average_montly_hours > 131:
## : :...sales in {marketing,product_mng,
## : : RandD}: 0 (388.3)
## : sales in {accounting,IT,sales,
## : : support,technical}:
## : :...number_project <= 4: 0 (2422.4/203.6)
## : number_project > 4:
## : :...Work_accident > 0: 0 (105.3)
## : Work_accident <= 0: [S2]
## satisfaction_level <= 0.55:
## :...sales in {IT,
## : product_mng}: 0 (260.4/11.4)
## sales in {accounting,marketing,RandD,
## : sales,support,technical}:
## :...average_montly_hours <= 132: 0 (54.5)
## average_montly_hours > 132: [S3]
##
## SubTree [S1]
##
## sales in {IT,management,marketing,product_mng,RandD}: 1 (238.8/4.5)
## sales in {accounting,hr,sales,support,technical}:
## :...last_evaluation > 0.99: 1 (100.8)
## last_evaluation <= 0.99:
## :...satisfaction_level <= 0.72: 1 (48.8)
## satisfaction_level > 0.72:
## :...number_project <= 3: 0 (42.7)
## number_project > 3:
## :...last_evaluation > 0.95: 0 (179.6/84.2)
## last_evaluation <= 0.95:
## :...satisfaction_level <= 0.73: 0 (43.6/13.3)
## satisfaction_level > 0.73: 1 (568.8/111.2)
##
## SubTree [S2]
##
## average_montly_hours > 263: 0 (108.4)
## average_montly_hours <= 263:
## :...sales in {accounting,support}: 0 (153.8/14.5)
## sales in {IT,sales,technical}:
## :...average_montly_hours <= 262: 0 (636.4/217.3)
## average_montly_hours > 262: 1 (53.7/11.8)
##
## SubTree [S3]
##
## satisfaction_level <= 0.14: 1 (183.5/84.4)
## satisfaction_level > 0.14:
## :...Work_accident > 0: 0 (244.4/22.3)
## Work_accident <= 0:
## :...salary = high: 0 (91.3/10.3)
## salary in {low,medium}:
## :...satisfaction_level <= 0.16: 0 (66.6)
## satisfaction_level > 0.16:
## :...satisfaction_level <= 0.17: 1 (75.5/26.3)
## satisfaction_level > 0.17:
## :...satisfaction_level <= 0.18: 0 (34.1)
## satisfaction_level > 0.18:
## :...time_spend_company <= 3: 0 (1210.3/315.2)
## time_spend_company > 3:
## :...last_evaluation <= 0.5: 1 (73.9/4.6)
## last_evaluation > 0.5: 0 (544/203.1)
##
## ----- Trial 7: -----
##
## Decision tree:
##
## satisfaction_level <= 0.11: 1 (188.2)
## satisfaction_level > 0.11:
## :...number_project > 6: 1 (55.6)
## number_project <= 6:
## :...satisfaction_level > 0.92: 0 (273.4)
## satisfaction_level <= 0.92:
## :...time_spend_company > 4:
## :...average_montly_hours <= 215:
## : :...satisfaction_level <= 0.85: 0 (1497.3/260.8)
## : : satisfaction_level > 0.85: 1 (126.2/37.6)
## : average_montly_hours > 215:
## : :...time_spend_company > 6: 0 (44.8)
## : time_spend_company <= 6:
## : :...last_evaluation <= 0.67: 0 (341.2/95.8)
## : last_evaluation > 0.67:
## : :...number_project > 5: 0 (162.8/67)
## : number_project <= 5:
## : :...satisfaction_level <= 0.56: 0 (227.4/95.4)
## : satisfaction_level > 0.56:
## : :...average_montly_hours <= 277: 1 (1610.2/242.3)
## : average_montly_hours > 277: 0 (41.5/6.9)
## time_spend_company <= 4:
## :...last_evaluation <= 0.44: 0 (188)
## last_evaluation > 0.44:
## :...satisfaction_level > 0.48:
## :...average_montly_hours > 286: 1 (26.2/1.5)
## : average_montly_hours <= 286:
## : :...number_project > 5: 0 (464/213.3)
## : number_project <= 5:
## : :...sales in {marketing,
## : : product_mng}: 0 (276.7)
## : sales in {accounting,hr,IT,management,RandD,
## : : sales,support,technical}:
## : :...satisfaction_level > 0.9: 0 (128.8)
## : satisfaction_level <= 0.9:
## : :...salary in {high,
## : : medium}: 0 (2120.8/208.4)
## : salary = low:
## : :...promotion_last_5years > 0: 1 (62.6/15.8)
## : promotion_last_5years <= 0:
## : :...last_evaluation <= 0.47: 1 (86.2/34.2)
## : last_evaluation > 0.47: 0 (2360.3/533.5)
## satisfaction_level <= 0.48:
## :...number_project <= 2:
## :...satisfaction_level <= 0.35: 0 (131.7/14.1)
## : satisfaction_level > 0.35:
## : :...average_montly_hours <= 161: 1 (545.6/137.4)
## : average_montly_hours > 161: 0 (176.1/64)
## number_project > 2:
## :...sales = IT: 0 (87.3)
## sales in {accounting,hr,management,marketing,
## : product_mng,RandD,sales,support,
## : technical}:
## :...average_montly_hours <= 132: 0 (74.9)
## average_montly_hours > 132:
## :...promotion_last_5years > 0: 0 (32.1)
## promotion_last_5years <= 0:
## :...last_evaluation > 0.98: 1 (138.5/50.7)
## last_evaluation <= 0.98:
## :...average_montly_hours <= 141: 1 (186.4/76.2)
## average_montly_hours > 141: [S1]
##
## SubTree [S1]
##
## average_montly_hours <= 148: 0 (74.6)
## average_montly_hours > 148:
## :...satisfaction_level <= 0.26:
## :...last_evaluation > 0.86: 0 (101.7)
## : last_evaluation <= 0.86:
## : :...average_montly_hours <= 168: 1 (101.4/38.8)
## : average_montly_hours > 168: 0 (499.9/87.8)
## satisfaction_level > 0.26:
## :...sales in {hr,RandD}: 0 (48.5)
## sales in {accounting,management,marketing,product_mng,sales,support,
## : technical}:
## :...time_spend_company > 3: 1 (350.6/120.8)
## time_spend_company <= 3:
## :...satisfaction_level <= 0.32: 1 (215.1/79.9)
## satisfaction_level > 0.32: 0 (419.7/61.9)
##
## ----- Trial 8: -----
##
## Decision tree:
##
## time_spend_company > 4:
## :...time_spend_company > 6: 0 (298.5)
## : time_spend_company <= 6:
## : :...average_montly_hours <= 215:
## : :...satisfaction_level <= 0.3: 0 (412.8/1.3)
## : : satisfaction_level > 0.3:
## : : :...average_montly_hours <= 131: 0 (134.2)
## : : average_montly_hours > 131:
## : : :...satisfaction_level <= 0.34: 1 (143.1/23.3)
## : : satisfaction_level > 0.34: 0 (880/278.9)
## : average_montly_hours > 215:
## : :...average_montly_hours > 289: 1 (67.1)
## : average_montly_hours <= 289:
## : :...average_montly_hours > 282: 0 (40.9/1.7)
## : average_montly_hours <= 282:
## : :...last_evaluation <= 0.81:
## : :...average_montly_hours <= 272: 0 (475.3/104.3)
## : : average_montly_hours > 272: 1 (319.5/78.5)
## : last_evaluation > 0.81:
## : :...average_montly_hours > 277: 0 (28.5/0.9)
## : average_montly_hours <= 277:
## : :...number_project <= 3: 0 (228.5/69.4)
## : number_project > 3:
## : :...satisfaction_level <= 0.71: 0 (288.5/137.4)
## : satisfaction_level > 0.71: 1 (1009.5/27.8)
## time_spend_company <= 4:
## :...average_montly_hours > 287: 1 (115.8)
## average_montly_hours <= 287:
## :...number_project > 6: 1 (52.3)
## number_project <= 6:
## :...average_montly_hours <= 126: 0 (281.1/1.7)
## average_montly_hours > 126:
## :...promotion_last_5years > 0: 0 (274.9/4.1)
## promotion_last_5years <= 0:
## :...sales in {hr,IT,product_mng,RandD,support}:
## :...satisfaction_level <= 0.15: 1 (168.6/67.1)
## : satisfaction_level > 0.15: 0 (2458.1/203.3)
## sales in {accounting,management,marketing,sales,technical}:
## :...satisfaction_level > 0.7: 0 (1462.2/156.2)
## satisfaction_level <= 0.7:
## :...satisfaction_level > 0.69: 1 (169.2/82.5)
## satisfaction_level <= 0.69:
## :...satisfaction_level > 0.64: 0 (250.3)
## satisfaction_level <= 0.64:
## :...average_montly_hours <= 197: 0 (2005.1/451)
## average_montly_hours > 197:
## :...Work_accident > 0: 0 (167/2)
## Work_accident <= 0:
## :...satisfaction_level <= 0.16: 0 (158.1/45.1)
## satisfaction_level > 0.16: [S1]
##
## SubTree [S1]
##
## average_montly_hours <= 205: 1 (180.2/58.4)
## average_montly_hours > 205:
## :...average_montly_hours <= 213: 0 (100.8)
## average_montly_hours > 213:
## :...sales = marketing: 0 (59.3)
## sales in {accounting,management,sales,technical}:
## :...last_evaluation > 0.84: 0 (308.2/76.4)
## last_evaluation <= 0.84:
## :...salary = high: 0 (41.7)
## salary in {low,medium}:
## :...average_montly_hours <= 218: 1 (68/6.5)
## average_montly_hours > 218:
## :...average_montly_hours <= 231: 0 (90.5)
## average_montly_hours > 231:
## :...last_evaluation <= 0.82: 0 (534.6/234.7)
## last_evaluation > 0.82: 1 (126.8/11.5)
##
## ----- Trial 9: -----
##
## Decision tree:
##
## number_project <= 2:
## :...satisfaction_level > 0.46: 0 (967.4/199.1)
## : satisfaction_level <= 0.46:
## : :...average_montly_hours <= 125: 0 (103.9)
## : average_montly_hours > 125:
## : :...last_evaluation > 0.57: 0 (312.1/73)
## : last_evaluation <= 0.57:
## : :...average_montly_hours <= 163: 1 (2215.2/43.4)
## : average_montly_hours > 163: 0 (87.8/4.6)
## number_project > 2:
## :...satisfaction_level <= 0.11: 1 (518.8)
## satisfaction_level > 0.11:
## :...time_spend_company > 4:
## :...average_montly_hours <= 215: 0 (1253.9/166.8)
## : average_montly_hours > 215:
## : :...number_project <= 3: 0 (281.3/58.8)
## : number_project > 3:
## : :...last_evaluation <= 0.8: 0 (460.8/145.8)
## : last_evaluation > 0.8:
## : :...satisfaction_level <= 0.7: 0 (241/104.6)
## : satisfaction_level > 0.7:
## : :...number_project <= 5: 1 (886.7/18.5)
## : number_project > 5: 0 (54.4/8.6)
## time_spend_company <= 4:
## :...average_montly_hours > 290: 1 (54.2)
## average_montly_hours <= 290:
## :...number_project > 6: 1 (27.9)
## number_project <= 6:
## :...satisfaction_level > 0.53: 0 (3017.3/32.3)
## satisfaction_level <= 0.53:
## :...average_montly_hours <= 135: 0 (140.6)
## average_montly_hours > 135:
## :...average_montly_hours > 278: 0 (102.3)
## average_montly_hours <= 278:
## :...sales in {IT,product_mng,
## : support}: 0 (541.2/33.7)
## sales in {accounting,hr,management,marketing,
## : RandD,sales,technical}:
## :...salary = high: 0 (109.4)
## salary in {low,medium}:
## :...Work_accident > 0: 0 (267.8/26.4)
## Work_accident <= 0:
## :...last_evaluation <= 0.82: [S1]
## last_evaluation > 0.82:
## :...last_evaluation <= 0.84: 1 (116.4/20.6)
## last_evaluation > 0.84: 0 (496.7/159.1)
##
## SubTree [S1]
##
## average_montly_hours <= 269: 0 (1007.9/177.3)
## average_montly_hours > 269: 1 (84.9/27.8)
##
##
## Evaluation on training data (13500 cases):
##
## Trial Decision Tree
## ----- ----------------
## Size Errors
##
## 0 32 252( 1.9%)
## 1 28 1065( 7.9%)
## 2 37 1331( 9.9%)
## 3 40 931( 6.9%)
## 4 34 759( 5.6%)
## 5 48 1192( 8.8%)
## 6 51 632( 4.7%)
## 7 36 491( 3.6%)
## 8 34 2135(15.8%)
## 9 24 342( 2.5%)
## boost 214( 1.6%) <<
##
##
## (a) (b) <-classified as
## ---- ----
## 10246 20 (a): class 0
## 194 3040 (b): class 1
##
##
## Attribute usage:
##
## 100.00% satisfaction_level
## 100.00% number_project
## 100.00% average_montly_hours
## 100.00% time_spend_company
## 92.02% last_evaluation
## 89.96% promotion_last_5years
## 88.65% sales
## 75.96% salary
## 63.03% Work_accident
##
##
## Time: 0.8 secs
## Code: Evaluate the model using the Test Set (y)
p <- predict( c50model, C50xtest, type="class")
sum( p == C50ytest ) / length( p )
## [1] 0.9833222
# Code: Probability of Left
pprob <- predict( c50model, C50xtest, type="prob" )
library(rpart)
library(rattle)
decTree <- rpart(left~satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary,data=train)
data$left <- factor(data$left)
model <- rpart(left ~ ., alldata)
decTreeResult1 <- predict(model, train, type=c("class"))
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
pr <- predict(model, test, type="prob")
pred <- prediction(pr[,2], labels=test$left)
perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
perf_auc <- performance(pred, measure="auc")
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")
randomForestResult <- predict(randomForestModel, train1, type=c("class"))
FRR <- predict(randomForestModel, train1, type=c("class"))
## test$PAtt <– predict(randomForestModel, test, type=”response”)
## prediction <- function(t) ifelse(randomForestResult > t, 1,0)
pred <- prediction(predictions = FRR, labels=train1$left)
perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
erf_auc <- performance(pred, measure="auc")
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")
pr <- predict(c50model, C50xtest, type="prob")
pred <- prediction(predictions = pr[,2], labels=C50ytest)
perf <- performance(pred, measure="tpr", x.measure="fpr")
plot(perf, main="ROC curve", col="darkmagenta", lwd=3)
abline(a=0, b=1, lwd=2, lty=2)
perf_auc <- performance(pred, measure="auc")
str(perf_auc)
## Formal class 'performance' [package "ROCR"] with 6 slots
## ..@ x.name : chr "None"
## ..@ y.name : chr "Area under the ROC curve"
## ..@ alpha.name : chr "none"
## ..@ x.values : list()
## ..@ y.values :List of 1
## .. ..$ : num 0.991
## ..@ alpha.values: list()
auc<-unlist(perf_auc@y.values)
buc<-signif(auc,digits=4)
legend(0.7,0.4,(c(buc)),border="white",cex=1.0,box.col = "white")
legend(0.55,0.4,c("AUC="),border="white",cex=1.0,box.col = "white")
# Code: Load Survival package
library(survival)
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
# Code: Load and attach data
attach(alldata)
# Code: Define variables
library(survival)
time <- time_spend_company
time<-as.numeric(time_spend_company)
event1<-as.numeric(left)
event <- event1
x <- cbind(satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years)
group <- left
# Code: Summary Statistics
summary(time)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 2.484 3.000 8.000
summary(event)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.238 1.000 2.000
summary(x)
## satisfaction_level last_evaluation number_project average_montly_hours
## Min. :0.0900 Min. : 1.00 Min. :1.000 Min. : 96.0
## 1st Qu.:0.4400 1st Qu.:21.00 1st Qu.:2.000 1st Qu.:156.0
## Median :0.6400 Median :37.00 Median :3.000 Median :200.0
## Mean :0.6128 Mean :36.61 Mean :2.803 Mean :201.1
## 3rd Qu.:0.8200 3rd Qu.:52.00 3rd Qu.:4.000 3rd Qu.:245.0
## Max. :1.0000 Max. :65.00 Max. :6.000 Max. :310.0
## time_spend_company Work_accident promotion_last_5years
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :1.000 Median :1.000
## Mean :2.484 Mean :1.145 Mean :1.021
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :8.000 Max. :2.000 Max. :2.000
summary(group)
## 0 1
## 11428 3571
# Code: Create Surv object
survival <- Surv(time,event)
# Code: Create Surv object
survival <- Surv(time,event)
# Code: # Kaplan-Meier non-parametric analysis by group
kmsurvival <- survfit(Surv(time,event) ~group)
plot(kmsurvival, conf.int=FALSE, col=c("blue","red"), xlab="Trial", ylab = "Proportion Surviving")
legend("topright", c("No","Yes"), col=c("blue","red"), lty = 1)
coxph <- coxph(survival ~ satisfaction_level + last_evaluation + number_project + average_montly_hours, alldata)
## Warning in fitter(X, Y, strats, offset, init, control,
## weights = weights, : Loglik converged before variable
## 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65 ;
## beta may be infinite.
plot(survfit(coxph), ylim=c(0.7, 1))
title(main="Survival at Average Predictors")
#Code : Compute covariates are overtime, age, monthly income, and distance from home
cox.zph(coxph)
## rho chisq p
## satisfaction_level 0.468603 4.54e+02 0.000
## last_evaluation0.37 -0.577923 1.84e-08 1.000
## last_evaluation0.38 -0.391412 1.84e-08 1.000
## last_evaluation0.39 -0.450620 3.22e-08 1.000
## last_evaluation0.4 -0.501672 9.23e-09 1.000
## last_evaluation0.41 -0.379129 1.08e-08 1.000
## last_evaluation0.42 -0.405588 2.68e-08 1.000
## last_evaluation0.43 -0.137196 2.22e-09 1.000
## last_evaluation0.44 -0.566318 4.63e-08 1.000
## last_evaluation0.45 -0.017929 5.38e-09 1.000
## last_evaluation0.46 -0.072242 3.56e-08 1.000
## last_evaluation0.47 -0.107427 1.07e-07 1.000
## last_evaluation0.48 -0.138600 1.51e-07 1.000
## last_evaluation0.49 -0.167345 2.66e-07 1.000
## last_evaluation0.5 -0.072836 4.14e-08 1.000
## last_evaluation0.51 -0.083624 5.64e-08 1.000
## last_evaluation0.52 -0.127672 1.54e-07 1.000
## last_evaluation0.53 -0.093622 7.71e-08 1.000
## last_evaluation0.54 -0.051802 1.99e-08 1.000
## last_evaluation0.55 -0.122476 1.45e-07 1.000
## last_evaluation0.56 -0.116299 1.16e-07 1.000
## last_evaluation0.57 -0.091192 7.05e-08 1.000
## last_evaluation0.58 -0.008494 9.86e-09 1.000
## last_evaluation0.59 -0.054818 5.45e-07 0.999
## last_evaluation0.6 0.010538 1.74e-08 1.000
## last_evaluation0.61 0.012576 8.63e-08 1.000
## last_evaluation0.62 -0.038629 9.50e-08 1.000
## last_evaluation0.63 -0.029842 3.25e-07 1.000
## last_evaluation0.64 -0.690724 3.81e-08 1.000
## last_evaluation0.65 0.045889 3.28e-07 1.000
## last_evaluation0.66 -0.031739 5.51e-07 0.999
## last_evaluation0.67 -0.054341 5.38e-07 0.999
## last_evaluation0.68 -0.029231 1.33e-07 1.000
## last_evaluation0.69 0.175391 6.71e-09 1.000
## last_evaluation0.7 0.017701 3.16e-08 1.000
## last_evaluation0.71 -0.011124 1.68e-08 1.000
## last_evaluation0.72 0.065290 1.54e-06 0.999
## last_evaluation0.73 -0.020437 1.52e-07 1.000
## last_evaluation0.74 -0.045472 2.52e-07 1.000
## last_evaluation0.75 -0.044953 2.45e-07 1.000
## last_evaluation0.76 0.018430 1.85e-07 1.000
## last_evaluation0.77 -0.015457 5.74e-09 1.000
## last_evaluation0.78 -0.042220 5.32e-08 1.000
## last_evaluation0.79 0.016414 6.49e-09 1.000
## last_evaluation0.8 0.001574 5.13e-11 1.000
## last_evaluation0.81 0.000576 7.36e-12 1.000
## last_evaluation0.82 0.031289 2.34e-08 1.000
## last_evaluation0.83 0.037196 1.75e-08 1.000
## last_evaluation0.84 0.039173 1.65e-08 1.000
## last_evaluation0.85 0.058363 3.78e-08 1.000
## last_evaluation0.86 0.030758 1.29e-08 1.000
## last_evaluation0.87 0.036152 1.12e-08 1.000
## last_evaluation0.88 0.034248 2.11e-08 1.000
## last_evaluation0.89 0.033208 1.41e-08 1.000
## last_evaluation0.9 0.006677 5.97e-10 1.000
## last_evaluation0.91 0.075331 8.66e-08 1.000
## last_evaluation0.92 0.039565 1.76e-08 1.000
## last_evaluation0.93 0.021123 5.03e-09 1.000
## last_evaluation0.94 0.038513 2.07e-08 1.000
## last_evaluation0.95 0.051930 3.56e-08 1.000
## last_evaluation0.96 0.044371 3.26e-08 1.000
## last_evaluation0.97 0.043470 2.51e-08 1.000
## last_evaluation0.98 0.084003 1.16e-07 1.000
## last_evaluation0.99 0.039223 2.46e-08 1.000
## last_evaluation1 0.021282 2.93e-09 1.000
## number_project3 0.025375 2.20e+00 0.138
## number_project4 0.317451 2.21e+02 0.000
## number_project5 0.393844 2.70e+02 0.000
## number_project6 0.247959 8.46e+01 0.000
## number_project7 0.176536 8.34e+01 0.000
## average_montly_hours 0.369786 2.22e+02 0.000
## GLOBAL NA 2.97e+03 0.000
plot(cox.zph(coxph))