Load Libraries

library(caret)  # Model Building
library(pROC)   # ROC Curves
library(corrplot)
library(Hmisc)
library(dplyr)

Step 1: Data Exploration

Read Data

data <- read.csv("HR data.csv")
str(data)
## 'data.frame':    14999 obs. of  10 variables:
##  $ satisfaction_level   : num  0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : num  0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
##  $ number_project       : int  2 5 7 5 2 2 6 5 5 2 ...
##  $ average_montly_hours : int  157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : int  3 6 4 5 3 3 4 5 5 3 ...
##  $ Work_accident        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ left                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ promotion_last_5years: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sales                : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ salary               : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...

Define Data Types

data$salary<-ordered(data$salary,levels=c("low","medium","high"))
data$left <- factor(data$left, levels = c(0,1), labels = c("No","Yes"))
data$Work_accident <- factor(data$Work_accident)
data$promotion_last_5years <- factor(data$promotion_last_5years)

Data Summary

describe(data)
## data 
## 
##  10  Variables      14999  Observations
## ---------------------------------------------------------------------------
## satisfaction_level 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##    14999        0       92        1   0.6128   0.2823     0.11     0.21 
##      .25      .50      .75      .90      .95 
##     0.44     0.64     0.82     0.92     0.96 
## 
## lowest : 0.09 0.10 0.11 0.12 0.13, highest: 0.96 0.97 0.98 0.99 1.00
## ---------------------------------------------------------------------------
## last_evaluation 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##    14999        0       65        1   0.7161   0.1973     0.46     0.49 
##      .25      .50      .75      .90      .95 
##     0.56     0.72     0.87     0.95     0.98 
## 
## lowest : 0.36 0.37 0.38 0.39 0.40, highest: 0.96 0.97 0.98 0.99 1.00
## ---------------------------------------------------------------------------
## number_project 
##        n  missing distinct     Info     Mean      Gmd 
##    14999        0        6    0.945    3.803    1.367 
##                                               
## Value          2     3     4     5     6     7
## Frequency   2388  4055  4365  2761  1174   256
## Proportion 0.159 0.270 0.291 0.184 0.078 0.017
## ---------------------------------------------------------------------------
## average_montly_hours 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##    14999        0      215        1    201.1    57.48      130      137 
##      .25      .50      .75      .90      .95 
##      156      200      245      267      275 
## 
## lowest :  96  97  98  99 100, highest: 306 307 308 309 310
## ---------------------------------------------------------------------------
## time_spend_company 
##        n  missing distinct     Info     Mean      Gmd 
##    14999        0        8    0.905    3.498     1.43 
##                                                           
## Value          2     3     4     5     6     7     8    10
## Frequency   3244  6443  2557  1473   718   188   162   214
## Proportion 0.216 0.430 0.170 0.098 0.048 0.013 0.011 0.014
## ---------------------------------------------------------------------------
## Work_accident 
##        n  missing distinct 
##    14999        0        2 
##                       
## Value          0     1
## Frequency  12830  2169
## Proportion 0.855 0.145
## ---------------------------------------------------------------------------
## left 
##        n  missing distinct 
##    14999        0        2 
##                       
## Value         No   Yes
## Frequency  11428  3571
## Proportion 0.762 0.238
## ---------------------------------------------------------------------------
## promotion_last_5years 
##        n  missing distinct 
##    14999        0        2 
##                       
## Value          0     1
## Frequency  14680   319
## Proportion 0.979 0.021
## ---------------------------------------------------------------------------
## sales 
##        n  missing distinct 
##    14999        0       10 
##                                                                       
## Value       accounting          hr          IT  management   marketing
## Frequency          767         739        1227         630         858
## Proportion       0.051       0.049       0.082       0.042       0.057
##                                                                       
## Value      product_mng       RandD       sales     support   technical
## Frequency          902         787        4140        2229        2720
## Proportion       0.060       0.052       0.276       0.149       0.181
## ---------------------------------------------------------------------------
## salary 
##        n  missing distinct 
##    14999        0        3 
##                                
## Value         low medium   high
## Frequency    7316   6446   1237
## Proportion  0.488  0.430  0.082
## ---------------------------------------------------------------------------

Correlation amongst variable

correlationMatrix <- cor(data[,c(1:5)])
print(correlationMatrix)
##                      satisfaction_level last_evaluation number_project
## satisfaction_level           1.00000000       0.1050212     -0.1429696
## last_evaluation              0.10502121       1.0000000      0.3493326
## number_project              -0.14296959       0.3493326      1.0000000
## average_montly_hours        -0.02004811       0.3397418      0.4172106
## time_spend_company          -0.10086607       0.1315907      0.1967859
##                      average_montly_hours time_spend_company
## satisfaction_level            -0.02004811         -0.1008661
## last_evaluation                0.33974180          0.1315907
## number_project                 0.41721063          0.1967859
## average_montly_hours           1.00000000          0.1277549
## time_spend_company             0.12775491          1.0000000
corrplot(correlationMatrix, method="circle")

Check for Absolute Correlation > 0.75

highlyCorrelated <- findCorrelation(correlationMatrix, cutoff = 0.75, names = TRUE)
print(highlyCorrelated)
## character(0)

No of Employees Left

left.emps <- data %>% filter(left == "Yes")
nrow(left.emps)
## [1] 3571

Valuable Employees

val.emps <- left.emps %>% filter(last_evaluation >= 0.5)
nrow(val.emps)
## [1] 2982

Out of 3571 employees who had left, 2845 ARE VALUABLE !

If low performers are not our concern, We can perform analyis on only Valuable employees

Objective : Only Prediction of Employees who are leaving

Create Data Partition

set.seed(1)
splitIndex <- createDataPartition(data$left, times = 1, p=0.6, list = FALSE)
train <- data[splitIndex,]
test <- data[-splitIndex,]

# Check for proportion
describe(train$left)
## train$left 
##        n  missing distinct 
##     9000        0        2 
##                       
## Value         No   Yes
## Frequency   6857  2143
## Proportion 0.762 0.238
describe(test$left)
## test$left 
##        n  missing distinct 
##     5999        0        2 
##                       
## Value         No   Yes
## Frequency   4571  1428
## Proportion 0.762 0.238

Step 2 : Evaluate Algorithms

  1. Create Multiple Models
  2. Compare using metric ROC and pick the best algorithm

Set Control Parameters

objControl <- trainControl(method='cv', number=5, 
                           summaryFunction = twoClassSummary, 
                           savePredictions = TRUE,
                           classProbs = TRUE)
eval.metric <- "ROC"

Stepwise Logistic Regression

set.seed(1)
fit.lr <- train(left~., data = train, method = 'glmStepAIC',
                trControl = objControl, metric = eval.metric)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Start:  AIC=6196.14
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessales              1   6158.2 6194.2
## - salesIT                 1   6158.3 6194.3
## - salesproduct_mng        1   6158.3 6194.3
## - salessupport            1   6158.6 6194.6
## - salesmarketing          1   6159.0 6195.0
## <none>                        6158.1 6196.1
## - salesmanagement         1   6160.3 6196.3
## - saleshr                 1   6160.6 6196.6
## - salestechnical          1   6161.0 6197.0
## - salesRandD              1   6166.6 6202.6
## - last_evaluation         1   6168.8 6204.8
## - promotion_last_5years1  1   6174.0 6210.0
## - salary.Q                1   6182.7 6218.7
## - average_montly_hours    1   6192.4 6228.4
## - number_project          1   6258.5 6294.5
## - time_spend_company      1   6272.5 6308.5
## - salary.L                1   6310.1 6346.1
## - Work_accident1          1   6351.5 6387.5
## - satisfaction_level      1   7239.8 7275.8
## 
## Step:  AIC=6194.16
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   6158.6 6192.6
## - salesIT                 1   6158.6 6192.6
## - salessupport            1   6159.0 6193.0
## - salesmarketing          1   6159.5 6193.5
## <none>                        6158.2 6194.2
## - salesmanagement         1   6161.6 6195.6
## - saleshr                 1   6161.9 6195.9
## - salestechnical          1   6165.0 6199.0
## - last_evaluation         1   6168.8 6202.8
## - salesRandD              1   6172.4 6206.4
## - promotion_last_5years1  1   6174.0 6208.0
## - salary.Q                1   6182.7 6216.7
## - average_montly_hours    1   6192.5 6226.5
## - number_project          1   6258.5 6292.5
## - time_spend_company      1   6272.5 6306.5
## - salary.L                1   6310.2 6344.2
## - Work_accident1          1   6351.6 6385.6
## - satisfaction_level      1   7240.1 7274.1
## 
## Step:  AIC=6192.58
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesRandD + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   6158.9 6190.9
## - salessupport            1   6159.8 6191.8
## - salesmarketing          1   6160.1 6192.1
## <none>                        6158.6 6192.6
## - salesmanagement         1   6161.8 6193.8
## - saleshr                 1   6162.7 6194.7
## - salestechnical          1   6166.7 6198.7
## - last_evaluation         1   6169.2 6201.2
## - salesRandD              1   6172.5 6204.5
## - promotion_last_5years1  1   6174.3 6206.3
## - salary.Q                1   6183.2 6215.2
## - average_montly_hours    1   6193.0 6225.0
## - number_project          1   6259.2 6291.2
## - time_spend_company      1   6273.1 6305.1
## - salary.L                1   6311.2 6343.2
## - Work_accident1          1   6352.1 6384.1
## - satisfaction_level      1   7241.1 7273.1
## 
## Step:  AIC=6190.9
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessupport            1   6160.4 6190.4
## - salesmarketing          1   6160.7 6190.7
## <none>                        6158.9 6190.9
## - salesmanagement         1   6161.9 6191.9
## - saleshr                 1   6163.5 6193.5
## - salestechnical          1   6168.3 6198.3
## - last_evaluation         1   6169.5 6199.5
## - salesRandD              1   6172.5 6202.5
## - promotion_last_5years1  1   6174.6 6204.6
## - salary.Q                1   6183.5 6213.5
## - average_montly_hours    1   6193.4 6223.4
## - number_project          1   6259.7 6289.7
## - time_spend_company      1   6273.5 6303.5
## - salary.L                1   6311.6 6341.6
## - Work_accident1          1   6352.5 6382.5
## - satisfaction_level      1   7242.1 7272.1
## 
## Step:  AIC=6190.45
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesmarketing          1   6161.8 6189.8
## <none>                        6160.4 6190.4
## - salesmanagement         1   6164.0 6192.0
## - saleshr                 1   6164.3 6192.3
## - salestechnical          1   6168.5 6196.5
## - last_evaluation         1   6171.1 6199.1
## - salesRandD              1   6175.7 6203.7
## - promotion_last_5years1  1   6176.2 6204.2
## - salary.Q                1   6184.8 6212.8
## - average_montly_hours    1   6194.9 6222.9
## - number_project          1   6261.3 6289.3
## - time_spend_company      1   6274.5 6302.5
## - salary.L                1   6313.2 6341.2
## - Work_accident1          1   6353.2 6381.2
## - satisfaction_level      1   7244.0 7272.0
## 
## Step:  AIC=6189.8
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesRandD + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        6161.8 6189.8
## - saleshr                 1   6165.3 6191.3
## - salesmanagement         1   6165.7 6191.7
## - salestechnical          1   6169.1 6195.1
## - last_evaluation         1   6172.5 6198.5
## - promotion_last_5years1  1   6177.3 6203.3
## - salesRandD              1   6177.8 6203.8
## - salary.Q                1   6186.1 6212.1
## - average_montly_hours    1   6196.4 6222.4
## - number_project          1   6263.1 6289.1
## - time_spend_company      1   6276.3 6302.3
## - salary.L                1   6314.2 6340.2
## - Work_accident1          1   6354.4 6380.4
## - satisfaction_level      1   7244.3 7270.3
## Start:  AIC=6264.01
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessales              1   6226.0 6262.0
## - salesproduct_mng        1   6226.1 6262.1
## - salesIT                 1   6226.1 6262.1
## - salestechnical          1   6226.8 6262.8
## - salesmarketing          1   6226.8 6262.8
## - salessupport            1   6227.5 6263.5
## <none>                        6226.0 6264.0
## - saleshr                 1   6228.5 6264.5
## - salesmanagement         1   6228.9 6264.9
## - salesRandD              1   6229.7 6265.7
## - last_evaluation         1   6238.0 6274.0
## - salary.Q                1   6244.3 6280.3
## - average_montly_hours    1   6254.7 6290.7
## - promotion_last_5years1  1   6255.1 6291.1
## - number_project          1   6315.9 6351.9
## - salary.L                1   6355.0 6391.0
## - time_spend_company      1   6360.3 6396.3
## - Work_accident1          1   6408.0 6444.0
## - satisfaction_level      1   7259.4 7295.4
## 
## Step:  AIC=6262.02
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   6226.2 6260.2
## - salesIT                 1   6226.2 6260.2
## - salesmarketing          1   6227.4 6261.4
## <none>                        6226.0 6262.0
## - salestechnical          1   6228.4 6262.4
## - salesmanagement         1   6229.9 6263.9
## - salessupport            1   6230.2 6264.2
## - saleshr                 1   6230.5 6264.5
## - salesRandD              1   6231.8 6265.8
## - last_evaluation         1   6238.0 6272.0
## - salary.Q                1   6244.3 6278.3
## - average_montly_hours    1   6254.7 6288.7
## - promotion_last_5years1  1   6255.1 6289.1
## - number_project          1   6315.9 6349.9
## - salary.L                1   6355.0 6389.0
## - time_spend_company      1   6360.3 6394.3
## - Work_accident1          1   6408.0 6442.0
## - satisfaction_level      1   7260.1 7294.1
## 
## Step:  AIC=6260.19
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesRandD + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   6226.4 6258.4
## - salesmarketing          1   6227.5 6259.5
## <none>                        6226.2 6260.2
## - salestechnical          1   6228.4 6260.4
## - salessupport            1   6230.2 6262.2
## - salesmanagement         1   6230.3 6262.3
## - saleshr                 1   6230.5 6262.5
## - salesRandD              1   6232.3 6264.3
## - last_evaluation         1   6238.2 6270.2
## - salary.Q                1   6244.5 6276.5
## - average_montly_hours    1   6254.8 6286.8
## - promotion_last_5years1  1   6255.4 6287.4
## - number_project          1   6315.9 6347.9
## - salary.L                1   6355.1 6387.1
## - time_spend_company      1   6360.3 6392.3
## - Work_accident1          1   6408.1 6440.1
## - satisfaction_level      1   7260.3 7292.3
## 
## Step:  AIC=6258.45
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesmarketing          1   6228.0 6258.0
## <none>                        6226.4 6258.4
## - salestechnical          1   6229.1 6259.1
## - salesmanagement         1   6230.4 6260.4
## - salessupport            1   6231.2 6261.2
## - saleshr                 1   6231.2 6261.2
## - salesRandD              1   6232.3 6262.3
## - last_evaluation         1   6238.4 6268.4
## - salary.Q                1   6244.7 6274.7
## - average_montly_hours    1   6255.1 6285.1
## - promotion_last_5years1  1   6255.6 6285.6
## - number_project          1   6316.3 6346.3
## - salary.L                1   6355.3 6385.3
## - time_spend_company      1   6360.8 6390.8
## - Work_accident1          1   6408.4 6438.4
## - satisfaction_level      1   7260.3 7290.3
## 
## Step:  AIC=6257.96
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesRandD + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        6228.0 6258.0
## - salestechnical          1   6230.1 6258.1
## - salessupport            1   6232.0 6260.0
## - saleshr                 1   6232.2 6260.2
## - salesmanagement         1   6232.4 6260.4
## - salesRandD              1   6234.5 6262.5
## - last_evaluation         1   6240.0 6268.0
## - salary.Q                1   6246.2 6274.2
## - promotion_last_5years1  1   6256.5 6284.5
## - average_montly_hours    1   6256.7 6284.7
## - number_project          1   6318.0 6346.0
## - salary.L                1   6356.7 6384.7
## - time_spend_company      1   6362.7 6390.7
## - Work_accident1          1   6409.7 6437.7
## - satisfaction_level      1   7260.6 7288.6
## Start:  AIC=6247.98
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   6210.0 6246.0
## - salesproduct_mng        1   6210.0 6246.0
## - salessales              1   6210.2 6246.2
## - salessupport            1   6210.8 6246.8
## - salesmanagement         1   6211.5 6247.5
## - salesRandD              1   6211.6 6247.6
## <none>                        6210.0 6248.0
## - salestechnical          1   6212.1 6248.1
## - salesmarketing          1   6212.2 6248.2
## - saleshr                 1   6212.7 6248.7
## - last_evaluation         1   6219.7 6255.7
## - salary.Q                1   6231.1 6267.1
## - promotion_last_5years1  1   6239.1 6275.1
## - average_montly_hours    1   6247.2 6283.2
## - number_project          1   6303.2 6339.2
## - time_spend_company      1   6325.1 6361.1
## - salary.L                1   6352.4 6388.4
## - Work_accident1          1   6378.6 6414.6
## - satisfaction_level      1   7276.9 7312.9
## 
## Step:  AIC=6245.99
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesproduct_mng + salesRandD + salessales + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   6210.1 6244.1
## - salessales              1   6210.5 6244.5
## - salessupport            1   6211.6 6245.6
## - salesmanagement         1   6211.8 6245.8
## <none>                        6210.0 6246.0
## - salesRandD              1   6212.0 6246.0
## - salesmarketing          1   6213.4 6247.4
## - saleshr                 1   6214.1 6248.1
## - salestechnical          1   6214.2 6248.2
## - last_evaluation         1   6219.7 6253.7
## - salary.Q                1   6231.1 6265.1
## - promotion_last_5years1  1   6239.1 6273.1
## - average_montly_hours    1   6247.2 6281.2
## - number_project          1   6303.2 6337.2
## - time_spend_company      1   6325.1 6359.1
## - salary.L                1   6352.4 6386.4
## - Work_accident1          1   6378.6 6412.6
## - satisfaction_level      1   7277.4 7311.4
## 
## Step:  AIC=6244.05
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessales + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessales              1   6210.9 6242.9
## - salesmanagement         1   6211.8 6243.8
## <none>                        6210.1 6244.1
## - salesRandD              1   6212.1 6244.1
## - salessupport            1   6212.2 6244.2
## - salesmarketing          1   6214.1 6246.1
## - saleshr                 1   6215.0 6247.0
## - salestechnical          1   6215.6 6247.6
## - last_evaluation         1   6219.8 6251.8
## - salary.Q                1   6231.2 6263.2
## - promotion_last_5years1  1   6239.2 6271.2
## - average_montly_hours    1   6247.4 6279.4
## - number_project          1   6303.3 6335.3
## - time_spend_company      1   6325.3 6357.3
## - salary.L                1   6352.5 6384.5
## - Work_accident1          1   6378.6 6410.6
## - satisfaction_level      1   7277.6 7309.6
## 
## Step:  AIC=6242.95
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessupport            1   6212.3 6242.3
## <none>                        6210.9 6242.9
## - salesmanagement         1   6213.7 6243.7
## - salesmarketing          1   6214.1 6244.1
## - salesRandD              1   6214.4 6244.4
## - saleshr                 1   6215.0 6245.0
## - salestechnical          1   6215.8 6245.8
## - last_evaluation         1   6220.6 6250.6
## - salary.Q                1   6232.0 6262.0
## - promotion_last_5years1  1   6239.9 6269.9
## - average_montly_hours    1   6248.1 6278.1
## - number_project          1   6304.3 6334.3
## - time_spend_company      1   6326.8 6356.8
## - salary.L                1   6353.8 6383.8
## - Work_accident1          1   6379.6 6409.6
## - satisfaction_level      1   7277.9 7307.9
## 
## Step:  AIC=6242.3
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        6212.3 6242.3
## - salesmarketing          1   6214.9 6242.9
## - salesmanagement         1   6215.6 6243.6
## - saleshr                 1   6215.7 6243.7
## - salestechnical          1   6216.2 6244.2
## - salesRandD              1   6216.5 6244.5
## - last_evaluation         1   6222.1 6250.1
## - salary.Q                1   6233.2 6261.2
## - promotion_last_5years1  1   6241.4 6269.4
## - average_montly_hours    1   6249.3 6277.3
## - number_project          1   6306.0 6334.0
## - time_spend_company      1   6327.6 6355.6
## - salary.L                1   6355.0 6383.0
## - Work_accident1          1   6380.4 6408.4
## - satisfaction_level      1   7279.0 7307.0
## Start:  AIC=6151.75
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   6113.8 6149.8
## - salesIT                 1   6113.8 6149.8
## - salessales              1   6114.1 6150.1
## - salesmanagement         1   6115.1 6151.1
## - salesRandD              1   6115.4 6151.4
## - salessupport            1   6115.5 6151.5
## - saleshr                 1   6115.5 6151.5
## <none>                        6113.8 6151.8
## - salestechnical          1   6116.7 6152.7
## - salesmarketing          1   6118.9 6154.9
## - salary.Q                1   6124.2 6160.2
## - last_evaluation         1   6125.8 6161.8
## - average_montly_hours    1   6142.9 6178.9
## - promotion_last_5years1  1   6145.3 6181.3
## - number_project          1   6204.9 6240.9
## - time_spend_company      1   6242.4 6278.4
## - salary.L                1   6249.5 6285.5
## - Work_accident1          1   6311.2 6347.2
## - satisfaction_level      1   7242.8 7278.8
## 
## Step:  AIC=6149.81
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesRandD + salessales + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   6113.8 6147.8
## - salessales              1   6114.9 6148.9
## - salesmanagement         1   6115.2 6149.2
## - salesRandD              1   6115.6 6149.6
## <none>                        6113.8 6149.8
## - saleshr                 1   6116.7 6150.7
## - salessupport            1   6117.2 6151.2
## - salestechnical          1   6119.6 6153.6
## - salesmarketing          1   6121.5 6155.5
## - salary.Q                1   6124.3 6158.3
## - last_evaluation         1   6125.9 6159.9
## - average_montly_hours    1   6143.1 6177.1
## - promotion_last_5years1  1   6145.4 6179.4
## - number_project          1   6205.2 6239.2
## - time_spend_company      1   6242.5 6276.5
## - salary.L                1   6249.6 6283.6
## - Work_accident1          1   6311.2 6345.2
## - satisfaction_level      1   7244.7 7278.7
## 
## Step:  AIC=6147.84
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessales + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesmanagement         1   6115.2 6147.2
## - salessales              1   6115.7 6147.7
## - salesRandD              1   6115.7 6147.7
## <none>                        6113.8 6147.8
## - saleshr                 1   6117.4 6149.4
## - salessupport            1   6118.8 6150.8
## - salestechnical          1   6122.3 6154.3
## - salesmarketing          1   6123.3 6155.3
## - salary.Q                1   6124.3 6156.3
## - last_evaluation         1   6125.9 6157.9
## - average_montly_hours    1   6143.1 6175.1
## - promotion_last_5years1  1   6145.4 6177.4
## - number_project          1   6205.2 6237.2
## - time_spend_company      1   6242.5 6274.5
## - salary.L                1   6249.6 6281.6
## - Work_accident1          1   6311.3 6343.3
## - satisfaction_level      1   7244.7 7276.7
## 
## Step:  AIC=6147.21
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmarketing + salesRandD + 
##     salessales + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesRandD              1   6116.6 6146.6
## <none>                        6115.2 6147.2
## - salessales              1   6118.3 6148.3
## - saleshr                 1   6119.7 6149.7
## - salessupport            1   6121.9 6151.9
## - salestechnical          1   6126.3 6156.3
## - salesmarketing          1   6126.3 6156.3
## - salary.Q                1   6126.6 6156.6
## - last_evaluation         1   6127.2 6157.2
## - average_montly_hours    1   6144.6 6174.6
## - promotion_last_5years1  1   6148.4 6178.4
## - number_project          1   6206.3 6236.3
## - time_spend_company      1   6242.8 6272.8
## - salary.L                1   6259.8 6289.8
## - Work_accident1          1   6312.6 6342.6
## - satisfaction_level      1   7246.2 7276.2
## 
## Step:  AIC=6146.61
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmarketing + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        6116.6 6146.6
## - salessales              1   6121.6 6149.6
## - saleshr                 1   6122.2 6150.2
## - salessupport            1   6125.5 6153.5
## - salary.Q                1   6127.9 6155.9
## - last_evaluation         1   6128.7 6156.7
## - salesmarketing          1   6129.7 6157.7
## - salestechnical          1   6131.1 6159.1
## - average_montly_hours    1   6145.9 6173.9
## - promotion_last_5years1  1   6149.8 6177.8
## - number_project          1   6207.7 6235.7
## - time_spend_company      1   6244.6 6272.6
## - salary.L                1   6260.7 6288.7
## - Work_accident1          1   6315.1 6343.1
## - satisfaction_level      1   7250.3 7278.3
## Start:  AIC=6170.87
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   6132.9 6168.9
## - salesIT                 1   6133.5 6169.5
## - salesmarketing          1   6133.7 6169.7
## - salessales              1   6134.1 6170.1
## - salessupport            1   6134.5 6170.5
## - salesRandD              1   6134.5 6170.5
## <none>                        6132.9 6170.9
## - salesmanagement         1   6135.9 6171.9
## - salestechnical          1   6137.4 6173.4
## - saleshr                 1   6138.0 6174.0
## - salary.Q                1   6144.4 6180.4
## - last_evaluation         1   6147.8 6183.8
## - promotion_last_5years1  1   6151.6 6187.6
## - average_montly_hours    1   6163.8 6199.8
## - number_project          1   6236.6 6272.6
## - salary.L                1   6255.4 6291.4
## - time_spend_company      1   6272.8 6308.8
## - Work_accident1          1   6336.6 6372.6
## - satisfaction_level      1   7238.1 7274.1
## 
## Step:  AIC=6168.94
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesRandD + salessales + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   6133.6 6167.6
## - salesmarketing          1   6133.7 6167.7
## - salessales              1   6134.5 6168.5
## - salessupport            1   6134.9 6168.9
## <none>                        6132.9 6168.9
## - salesRandD              1   6135.7 6169.7
## - salesmanagement         1   6137.4 6171.4
## - saleshr                 1   6139.2 6173.2
## - salestechnical          1   6139.5 6173.5
## - salary.Q                1   6144.4 6178.4
## - last_evaluation         1   6147.9 6181.9
## - promotion_last_5years1  1   6151.8 6185.8
## - average_montly_hours    1   6163.8 6197.8
## - number_project          1   6236.6 6270.6
## - salary.L                1   6255.5 6289.5
## - time_spend_company      1   6272.9 6306.9
## - Work_accident1          1   6336.7 6370.7
## - satisfaction_level      1   7238.4 7272.4
## 
## Step:  AIC=6167.6
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessales + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesmarketing          1   6134.0 6166.0
## - salessales              1   6134.5 6166.5
## - salessupport            1   6134.9 6166.9
## <none>                        6133.6 6167.6
## - salesRandD              1   6138.0 6170.0
## - saleshr                 1   6139.2 6171.2
## - salesmanagement         1   6139.7 6171.7
## - salestechnical          1   6139.8 6171.8
## - salary.Q                1   6145.2 6177.2
## - last_evaluation         1   6148.5 6180.5
## - promotion_last_5years1  1   6152.5 6184.5
## - average_montly_hours    1   6164.5 6196.5
## - number_project          1   6237.3 6269.3
## - salary.L                1   6256.5 6288.5
## - time_spend_company      1   6273.6 6305.6
## - Work_accident1          1   6337.4 6369.4
## - satisfaction_level      1   7238.6 7270.6
## 
## Step:  AIC=6165.98
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesRandD + 
##     salessales + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessales              1   6134.6 6164.6
## - salessupport            1   6135.0 6165.0
## <none>                        6134.0 6166.0
## - salesRandD              1   6139.1 6169.1
## - saleshr                 1   6139.2 6169.2
## - salestechnical          1   6139.9 6169.9
## - salesmanagement         1   6140.9 6170.9
## - salary.Q                1   6145.5 6175.5
## - last_evaluation         1   6148.9 6178.9
## - promotion_last_5years1  1   6152.7 6182.7
## - average_montly_hours    1   6165.0 6195.0
## - number_project          1   6238.2 6268.2
## - salary.L                1   6256.7 6286.7
## - time_spend_company      1   6274.4 6304.4
## - Work_accident1          1   6337.6 6367.6
## - satisfaction_level      1   7238.8 7268.8
## 
## Step:  AIC=6164.62
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesRandD + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessupport            1   6135.2 6163.2
## <none>                        6134.6 6164.6
## - saleshr                 1   6139.2 6167.2
## - salestechnical          1   6140.0 6168.0
## - salesRandD              1   6141.5 6169.5
## - salesmanagement         1   6143.1 6171.1
## - salary.Q                1   6146.2 6174.2
## - last_evaluation         1   6149.5 6177.5
## - promotion_last_5years1  1   6153.4 6181.4
## - average_montly_hours    1   6165.6 6193.6
## - number_project          1   6239.0 6267.0
## - salary.L                1   6258.2 6286.2
## - time_spend_company      1   6275.3 6303.3
## - Work_accident1          1   6338.5 6366.5
## - satisfaction_level      1   7239.3 7267.3
## 
## Step:  AIC=6163.15
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesRandD + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        6135.2 6163.2
## - saleshr                 1   6139.4 6165.4
## - salestechnical          1   6140.1 6166.1
## - salesRandD              1   6142.7 6168.7
## - salesmanagement         1   6144.1 6170.1
## - salary.Q                1   6146.7 6172.7
## - last_evaluation         1   6150.2 6176.2
## - promotion_last_5years1  1   6154.0 6180.0
## - average_montly_hours    1   6166.1 6192.1
## - number_project          1   6239.6 6265.6
## - salary.L                1   6258.8 6284.8
## - time_spend_company      1   6275.5 6301.5
## - Work_accident1          1   6338.9 6364.9
## - satisfaction_level      1   7240.0 7266.0
## Start:  AIC=7753.99
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesIT + salesmanagement + 
##     salesmarketing + salesproduct_mng + salesRandD + salessales + 
##     salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesIT                 1   7716.0 7752.0
## - salesproduct_mng        1   7716.0 7752.0
## - salessales              1   7716.2 7752.2
## - salessupport            1   7717.4 7753.4
## <none>                        7716.0 7754.0
## - salesmarketing          1   7718.1 7754.1
## - salesmanagement         1   7718.7 7754.7
## - salestechnical          1   7719.0 7755.0
## - saleshr                 1   7719.5 7755.5
## - salesRandD              1   7719.7 7755.7
## - last_evaluation         1   7730.8 7766.8
## - salary.Q                1   7736.7 7772.7
## - promotion_last_5years1  1   7746.7 7782.7
## - average_montly_hours    1   7755.9 7791.9
## - number_project          1   7835.3 7871.3
## - time_spend_company      1   7873.4 7909.4
## - salary.L                1   7885.6 7921.6
## - Work_accident1          1   7951.7 7987.7
## - satisfaction_level      1   9068.4 9104.4
## 
## Step:  AIC=7752
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesproduct_mng + salesRandD + salessales + salessupport + 
##     salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salesproduct_mng        1   7716.0 7750.0
## - salessales              1   7716.6 7750.6
## <none>                        7716.0 7752.0
## - salessupport            1   7718.7 7752.7
## - salesmarketing          1   7719.1 7753.1
## - salesmanagement         1   7719.3 7753.3
## - salesRandD              1   7721.0 7755.0
## - saleshr                 1   7721.3 7755.3
## - salestechnical          1   7722.0 7756.0
## - last_evaluation         1   7730.8 7764.8
## - salary.Q                1   7736.7 7770.7
## - promotion_last_5years1  1   7746.7 7780.7
## - average_montly_hours    1   7755.9 7789.9
## - number_project          1   7835.3 7869.3
## - time_spend_company      1   7873.5 7907.5
## - salary.L                1   7885.6 7919.6
## - Work_accident1          1   7951.7 7985.7
## - satisfaction_level      1   9068.9 9102.9
## 
## Step:  AIC=7750
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessales + salessupport + salestechnical + 
##     salary.L + salary.Q
## 
##                          Df Deviance    AIC
## - salessales              1   7716.8 7748.8
## <none>                        7716.0 7750.0
## - salessupport            1   7719.4 7751.4
## - salesmanagement         1   7719.5 7751.5
## - salesmarketing          1   7719.5 7751.5
## - salesRandD              1   7721.3 7753.3
## - saleshr                 1   7721.9 7753.9
## - salestechnical          1   7723.4 7755.4
## - last_evaluation         1   7730.8 7762.8
## - salary.Q                1   7736.7 7768.7
## - promotion_last_5years1  1   7746.7 7778.7
## - average_montly_hours    1   7755.9 7787.9
## - number_project          1   7835.4 7867.4
## - time_spend_company      1   7873.5 7905.5
## - salary.L                1   7885.7 7917.7
## - Work_accident1          1   7951.7 7983.7
## - satisfaction_level      1   9069.5 9101.5
## 
## Step:  AIC=7748.82
## .outcome ~ satisfaction_level + last_evaluation + number_project + 
##     average_montly_hours + time_spend_company + Work_accident1 + 
##     promotion_last_5years1 + saleshr + salesmanagement + salesmarketing + 
##     salesRandD + salessupport + salestechnical + salary.L + salary.Q
## 
##                          Df Deviance    AIC
## <none>                        7716.8 7748.8
## - salessupport            1   7719.4 7749.4
## - salesmarketing          1   7719.6 7749.6
## - salesmanagement         1   7721.7 7751.7
## - saleshr                 1   7721.9 7751.9
## - salestechnical          1   7723.8 7753.8
## - salesRandD              1   7724.5 7754.5
## - last_evaluation         1   7731.5 7761.5
## - salary.Q                1   7737.5 7767.5
## - promotion_last_5years1  1   7747.4 7777.4
## - average_montly_hours    1   7756.7 7786.7
## - number_project          1   7836.4 7866.4
## - time_spend_company      1   7874.8 7904.8
## - salary.L                1   7887.1 7917.1
## - Work_accident1          1   7952.6 7982.6
## - satisfaction_level      1   9069.8 9099.8
summary(fit.lr)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1875  -0.6660  -0.4034  -0.1198   3.1421  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.3635173  0.1643413  -2.212 0.026969 *  
## satisfaction_level     -4.2133765  0.1272171 -33.120  < 2e-16 ***
## last_evaluation         0.7322465  0.1914385   3.825 0.000131 ***
## number_project         -0.2924439  0.0272724 -10.723  < 2e-16 ***
## average_montly_hours    0.0041632  0.0006624   6.285 3.29e-10 ***
## time_spend_company      0.2584489  0.0203247  12.716  < 2e-16 ***
## Work_accident1         -1.5560119  0.1165317 -13.353  < 2e-16 ***
## promotion_last_5years1 -1.5387365  0.3252853  -4.730 2.24e-06 ***
## saleshr                 0.2845959  0.1252536   2.272 0.023077 *  
## salesmanagement        -0.3735172  0.1721299  -2.170 0.030009 *  
## salesmarketing          0.2087527  0.1248078   1.673 0.094407 .  
## salesRandD             -0.3910256  0.1441249  -2.713 0.006666 ** 
## salessupport            0.1349733  0.0842858   1.601 0.109294    
## salestechnical          0.2048893  0.0773908   2.647 0.008110 ** 
## salary.L               -1.2652476  0.1136659 -11.131  < 2e-16 ***
## salary.Q               -0.3257378  0.0745541  -4.369 1.25e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 9880.1  on 8999  degrees of freedom
## Residual deviance: 7716.8  on 8984  degrees of freedom
## AIC: 7748.8
## 
## Number of Fisher Scoring iterations: 5

Regularized Logistic Regression

set.seed(1)
fit.glmnet <- train(left~., data = train, method = 'glmnet',
                trControl = objControl, metric = eval.metric)
## Loading required package: glmnet
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
## 
## Attaching package: 'glmnet'
## The following object is masked from 'package:pROC':
## 
##     auc

Classification and Regression Tree

set.seed(1)
fit.cart <- train(left~., data=train, method="rpart", trControl=objControl,
                  metric = eval.metric)
## Loading required package: rpart

k Nearest Neighbours

set.seed(1)
fit.knn <- train(left~., data=train, method="knn", trControl=objControl,
                 metric=eval.metric)

Random Forest

set.seed(1)
fit.rf <- train(left~., data=train, method="rf", trControl=objControl,
                metric = eval.metric)
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:Hmisc':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin

Gradient Boosting Machine

set.seed(1)
fit.gbm <- train(left~., data=train, method="gbm", trControl=objControl,
                 metric=eval.metric, verbose=FALSE)
## Loading required package: gbm
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize

Collect Resamples

models <- list(STEPLR= fit.lr,
               PENALIZED.LR = fit.glmnet,
               CART=fit.cart,
               KNN =fit.knn,
               RF =fit.rf, 
               GBM = fit.gbm)

results <- resamples(models)
summary(results)
## 
## Call:
## summary.resamples(object = results)
## 
## Models: STEPLR, PENALIZED.LR, CART, KNN, RF, GBM 
## Number of resamples: 5 
## 
## ROC 
##                Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## STEPLR       0.7995  0.8063 0.8118 0.8181  0.8340 0.8386    0
## PENALIZED.LR 0.8003  0.8063 0.8126 0.8183  0.8340 0.8386    0
## CART         0.8076  0.8355 0.9532 0.9039  0.9555 0.9676    0
## KNN          0.9558  0.9595 0.9640 0.9647  0.9698 0.9745    0
## RF           0.9821  0.9879 0.9896 0.9894  0.9910 0.9962    0
## GBM          0.9778  0.9835 0.9848 0.9851  0.9878 0.9919    0
## 
## Sens 
##                Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## STEPLR       0.9190  0.9242 0.9256 0.9290  0.9315 0.9446    0
## PENALIZED.LR 0.9212  0.9249 0.9271 0.9300  0.9300 0.9468    0
## CART         0.9438  0.9461 0.9519 0.9602  0.9781 0.9810    0
## KNN          0.9285  0.9373 0.9387 0.9370  0.9388 0.9416    0
## RF           0.9964  0.9971 0.9985 0.9978  0.9985 0.9985    0
## GBM          0.9876  0.9883 0.9891 0.9895  0.9898 0.9927    0
## 
## Spec 
##                Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## STEPLR       0.3124  0.3201 0.3497 0.3402  0.3566 0.3621    0
## PENALIZED.LR 0.3077  0.3271 0.3403 0.3346  0.3473 0.3505    0
## CART         0.6410  0.6752 0.9089 0.8189  0.9254 0.9441    0
## KNN          0.9114  0.9136 0.9184 0.9202  0.9229 0.9347    0
## RF           0.9394  0.9486 0.9556 0.9557  0.9604 0.9744    0
## GBM          0.9044  0.9089 0.9138 0.9165  0.9159 0.9394    0

Graphs

# Box Plot 
bwplot(results)

# Dot Plot
dotplot(results)

Based on ROC, we will choose Random Forest.

Important Variables as per Random Forest

rfImp <- varImp(fit.rf)
rfImp
## rf variable importance
## 
##                          Overall
## satisfaction_level     100.00000
## time_spend_company      40.72710
## number_project          37.78372
## average_montly_hours    30.95402
## last_evaluation         26.92210
## salary.L                 1.07552
## salestechnical           0.82107
## salessupport             0.54383
## salessales               0.53846
## salary.Q                 0.51994
## Work_accident1           0.49381
## salesIT                  0.20821
## salesmanagement          0.19807
## saleshr                  0.14748
## salesmarketing           0.13394
## salesRandD               0.12028
## promotion_last_5years1   0.02603
## salesproduct_mng         0.00000
plot(rfImp, top=5)

Random Forest with only Important Variables

set.seed(1)
fit.imprf <- train(left~., data=train[,c("satisfaction_level","time_spend_company","number_project","average_montly_hours","last_evaluation","left")],
                   method="rf", trControl=objControl,
                   metric = eval.metric)

Comparison of Random Forest Models

rfmodels <- list( RF =fit.rf, 
                  RF.IMP = fit.imprf)

rfresults <- resamples(rfmodels)
summary(rfresults)
## 
## Call:
## summary.resamples(object = rfresults)
## 
## Models: RF, RF.IMP 
## Number of resamples: 5 
## 
## ROC 
##          Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## RF     0.9821  0.9879 0.9896 0.9894  0.9910 0.9962    0
## RF.IMP 0.9820  0.9875 0.9880 0.9888  0.9892 0.9974    0
## 
## Sens 
##          Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## RF     0.9964  0.9971 0.9985 0.9978  0.9985 0.9985    0
## RF.IMP 0.9964  0.9978 0.9978 0.9978  0.9985 0.9985    0
## 
## Spec 
##          Min. 1st Qu. Median   Mean 3rd Qu.   Max. NA's
## RF     0.9394  0.9486 0.9556 0.9557  0.9604 0.9744    0
## RF.IMP 0.9394  0.9486 0.9556 0.9547  0.9557 0.9744    0
bwplot(rfresults)

dotplot(rfresults)

As there is no sigificant decrease in ROC, we shall use Random Forest Model with Important Variables only

fit.imprf
## Random Forest 
## 
## 9000 samples
##    5 predictor
##    2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 7200, 7199, 7201, 7200, 7200 
## Resampling results across tuning parameters:
## 
##   mtry  ROC        Sens       Spec     
##   2     0.9888145  0.9978123  0.9547339
##   3     0.9886353  0.9972292  0.9542677
##   5     0.9866702  0.9944584  0.9556685
## 
## ROC was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 2.
rf1.Imp <- varImp(fit.imprf)
rf1.Imp
## rf variable importance
## 
##                      Overall
## satisfaction_level    100.00
## number_project         25.41
## time_spend_company     23.63
## average_montly_hours   12.99
## last_evaluation         0.00
plot(rf1.Imp)

Tuning Random Forest Model by adjusting mtry

rfgridControl <- trainControl(method="cv", number=5, search="grid",
                              summaryFunction = twoClassSummary, 
                              savePredictions = TRUE,
                              classProbs = TRUE)

rfGrid <- expand.grid(.mtry=c(1:4))

set.seed(1)
fit.tunedrf <- train( left~., 
                      data=train[,c("satisfaction_level","time_spend_company","number_project","average_montly_hours","left","last_evaluation")],
                      method = "rf", metric = eval.metric,
                      trControl = rfgridControl, 
                      tuneGrid = rfGrid)

fit.tunedrf
## Random Forest 
## 
## 9000 samples
##    5 predictor
##    2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 7200, 7199, 7201, 7200, 7200 
## Resampling results across tuning parameters:
## 
##   mtry  ROC        Sens       Spec     
##   1     0.9879859  0.9969371  0.9197340
##   2     0.9891889  0.9978123  0.9547339
##   3     0.9886951  0.9972290  0.9542677
##   4     0.9879759  0.9959168  0.9556674
## 
## ROC was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 2.

Prediction using Random Forest on Test Data

rf.predict <- predict(fit.tunedrf, test, type = "raw")
confusionMatrix(rf.predict,test$left)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   No  Yes
##        No  4561   49
##        Yes   10 1379
##                                           
##                Accuracy : 0.9902          
##                  95% CI : (0.9873, 0.9925)
##     No Information Rate : 0.762           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9726          
##  Mcnemar's Test P-Value : 7.53e-07        
##                                           
##             Sensitivity : 0.9978          
##             Specificity : 0.9657          
##          Pos Pred Value : 0.9894          
##          Neg Pred Value : 0.9928          
##              Prevalence : 0.7620          
##          Detection Rate : 0.7603          
##    Detection Prevalence : 0.7685          
##       Balanced Accuracy : 0.9817          
##                                           
##        'Positive' Class : No              
## 

ROC Curve on Test Data

rf.auc <- roc(as.numeric(test$left), as.numeric(rf.predict),  ci=TRUE)
plot(rf.auc, ylim=c(0,1), print.thres=TRUE, 
     main=paste('Test Data - AUC - Using Random Forest:',
                round(rf.auc$auc[[1]],3)),col = 'blue')