library(caret) # Model Building
library(pROC) # ROC Curves
library(corrplot)
library(Hmisc)
library(dplyr)
data <- read.csv("HR data.csv")
str(data)
## 'data.frame': 14999 obs. of 10 variables:
## $ satisfaction_level : num 0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
## $ last_evaluation : num 0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
## $ number_project : int 2 5 7 5 2 2 6 5 5 2 ...
## $ average_montly_hours : int 157 262 272 223 159 153 247 259 224 142 ...
## $ time_spend_company : int 3 6 4 5 3 3 4 5 5 3 ...
## $ Work_accident : int 0 0 0 0 0 0 0 0 0 0 ...
## $ left : int 1 1 1 1 1 1 1 1 1 1 ...
## $ promotion_last_5years: int 0 0 0 0 0 0 0 0 0 0 ...
## $ sales : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ salary : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...
data$salary<-ordered(data$salary,levels=c("low","medium","high"))
data$left <- factor(data$left, levels = c(0,1), labels = c("No","Yes"))
data$Work_accident <- factor(data$Work_accident)
data$promotion_last_5years <- factor(data$promotion_last_5years)
describe(data)
## data
##
## 10 Variables 14999 Observations
## ---------------------------------------------------------------------------
## satisfaction_level
## n missing distinct Info Mean Gmd .05 .10
## 14999 0 92 1 0.6128 0.2823 0.11 0.21
## .25 .50 .75 .90 .95
## 0.44 0.64 0.82 0.92 0.96
##
## lowest : 0.09 0.10 0.11 0.12 0.13, highest: 0.96 0.97 0.98 0.99 1.00
## ---------------------------------------------------------------------------
## last_evaluation
## n missing distinct Info Mean Gmd .05 .10
## 14999 0 65 1 0.7161 0.1973 0.46 0.49
## .25 .50 .75 .90 .95
## 0.56 0.72 0.87 0.95 0.98
##
## lowest : 0.36 0.37 0.38 0.39 0.40, highest: 0.96 0.97 0.98 0.99 1.00
## ---------------------------------------------------------------------------
## number_project
## n missing distinct Info Mean Gmd
## 14999 0 6 0.945 3.803 1.367
##
## Value 2 3 4 5 6 7
## Frequency 2388 4055 4365 2761 1174 256
## Proportion 0.159 0.270 0.291 0.184 0.078 0.017
## ---------------------------------------------------------------------------
## average_montly_hours
## n missing distinct Info Mean Gmd .05 .10
## 14999 0 215 1 201.1 57.48 130 137
## .25 .50 .75 .90 .95
## 156 200 245 267 275
##
## lowest : 96 97 98 99 100, highest: 306 307 308 309 310
## ---------------------------------------------------------------------------
## time_spend_company
## n missing distinct Info Mean Gmd
## 14999 0 8 0.905 3.498 1.43
##
## Value 2 3 4 5 6 7 8 10
## Frequency 3244 6443 2557 1473 718 188 162 214
## Proportion 0.216 0.430 0.170 0.098 0.048 0.013 0.011 0.014
## ---------------------------------------------------------------------------
## Work_accident
## n missing distinct
## 14999 0 2
##
## Value 0 1
## Frequency 12830 2169
## Proportion 0.855 0.145
## ---------------------------------------------------------------------------
## left
## n missing distinct
## 14999 0 2
##
## Value No Yes
## Frequency 11428 3571
## Proportion 0.762 0.238
## ---------------------------------------------------------------------------
## promotion_last_5years
## n missing distinct
## 14999 0 2
##
## Value 0 1
## Frequency 14680 319
## Proportion 0.979 0.021
## ---------------------------------------------------------------------------
## sales
## n missing distinct
## 14999 0 10
##
## Value accounting hr IT management marketing
## Frequency 767 739 1227 630 858
## Proportion 0.051 0.049 0.082 0.042 0.057
##
## Value product_mng RandD sales support technical
## Frequency 902 787 4140 2229 2720
## Proportion 0.060 0.052 0.276 0.149 0.181
## ---------------------------------------------------------------------------
## salary
## n missing distinct
## 14999 0 3
##
## Value low medium high
## Frequency 7316 6446 1237
## Proportion 0.488 0.430 0.082
## ---------------------------------------------------------------------------
correlationMatrix <- cor(data[,c(1:5)])
print(correlationMatrix)
## satisfaction_level last_evaluation number_project
## satisfaction_level 1.00000000 0.1050212 -0.1429696
## last_evaluation 0.10502121 1.0000000 0.3493326
## number_project -0.14296959 0.3493326 1.0000000
## average_montly_hours -0.02004811 0.3397418 0.4172106
## time_spend_company -0.10086607 0.1315907 0.1967859
## average_montly_hours time_spend_company
## satisfaction_level -0.02004811 -0.1008661
## last_evaluation 0.33974180 0.1315907
## number_project 0.41721063 0.1967859
## average_montly_hours 1.00000000 0.1277549
## time_spend_company 0.12775491 1.0000000
corrplot(correlationMatrix, method="circle")
highlyCorrelated <- findCorrelation(correlationMatrix, cutoff = 0.75, names = TRUE)
print(highlyCorrelated)
## character(0)
left.emps <- data %>% filter(left == "Yes")
nrow(left.emps)
## [1] 3571
val.emps <- left.emps %>% filter(last_evaluation >= 0.5)
nrow(val.emps)
## [1] 2982
Out of 3571 employees who had left, 2845 ARE VALUABLE !
If low performers are not our concern, We can perform analyis on only Valuable employees
set.seed(1)
splitIndex <- createDataPartition(data$left, times = 1, p=0.6, list = FALSE)
train <- data[splitIndex,]
test <- data[-splitIndex,]
# Check for proportion
describe(train$left)
## train$left
## n missing distinct
## 9000 0 2
##
## Value No Yes
## Frequency 6857 2143
## Proportion 0.762 0.238
describe(test$left)
## test$left
## n missing distinct
## 5999 0 2
##
## Value No Yes
## Frequency 4571 1428
## Proportion 0.762 0.238
- Create Multiple Models
- Compare using metric ROC and pick the best algorithm
objControl <- trainControl(method='cv', number=5,
summaryFunction = twoClassSummary,
savePredictions = TRUE,
classProbs = TRUE)
eval.metric <- "ROC"
set.seed(1)
fit.lr <- train(left~., data = train, method = 'glmStepAIC',
trControl = objControl, metric = eval.metric)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Start: AIC=6196.14
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessales 1 6158.2 6194.2
## - salesIT 1 6158.3 6194.3
## - salesproduct_mng 1 6158.3 6194.3
## - salessupport 1 6158.6 6194.6
## - salesmarketing 1 6159.0 6195.0
## <none> 6158.1 6196.1
## - salesmanagement 1 6160.3 6196.3
## - saleshr 1 6160.6 6196.6
## - salestechnical 1 6161.0 6197.0
## - salesRandD 1 6166.6 6202.6
## - last_evaluation 1 6168.8 6204.8
## - promotion_last_5years1 1 6174.0 6210.0
## - salary.Q 1 6182.7 6218.7
## - average_montly_hours 1 6192.4 6228.4
## - number_project 1 6258.5 6294.5
## - time_spend_company 1 6272.5 6308.5
## - salary.L 1 6310.1 6346.1
## - Work_accident1 1 6351.5 6387.5
## - satisfaction_level 1 7239.8 7275.8
##
## Step: AIC=6194.16
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 6158.6 6192.6
## - salesIT 1 6158.6 6192.6
## - salessupport 1 6159.0 6193.0
## - salesmarketing 1 6159.5 6193.5
## <none> 6158.2 6194.2
## - salesmanagement 1 6161.6 6195.6
## - saleshr 1 6161.9 6195.9
## - salestechnical 1 6165.0 6199.0
## - last_evaluation 1 6168.8 6202.8
## - salesRandD 1 6172.4 6206.4
## - promotion_last_5years1 1 6174.0 6208.0
## - salary.Q 1 6182.7 6216.7
## - average_montly_hours 1 6192.5 6226.5
## - number_project 1 6258.5 6292.5
## - time_spend_company 1 6272.5 6306.5
## - salary.L 1 6310.2 6344.2
## - Work_accident1 1 6351.6 6385.6
## - satisfaction_level 1 7240.1 7274.1
##
## Step: AIC=6192.58
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesRandD + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 6158.9 6190.9
## - salessupport 1 6159.8 6191.8
## - salesmarketing 1 6160.1 6192.1
## <none> 6158.6 6192.6
## - salesmanagement 1 6161.8 6193.8
## - saleshr 1 6162.7 6194.7
## - salestechnical 1 6166.7 6198.7
## - last_evaluation 1 6169.2 6201.2
## - salesRandD 1 6172.5 6204.5
## - promotion_last_5years1 1 6174.3 6206.3
## - salary.Q 1 6183.2 6215.2
## - average_montly_hours 1 6193.0 6225.0
## - number_project 1 6259.2 6291.2
## - time_spend_company 1 6273.1 6305.1
## - salary.L 1 6311.2 6343.2
## - Work_accident1 1 6352.1 6384.1
## - satisfaction_level 1 7241.1 7273.1
##
## Step: AIC=6190.9
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessupport 1 6160.4 6190.4
## - salesmarketing 1 6160.7 6190.7
## <none> 6158.9 6190.9
## - salesmanagement 1 6161.9 6191.9
## - saleshr 1 6163.5 6193.5
## - salestechnical 1 6168.3 6198.3
## - last_evaluation 1 6169.5 6199.5
## - salesRandD 1 6172.5 6202.5
## - promotion_last_5years1 1 6174.6 6204.6
## - salary.Q 1 6183.5 6213.5
## - average_montly_hours 1 6193.4 6223.4
## - number_project 1 6259.7 6289.7
## - time_spend_company 1 6273.5 6303.5
## - salary.L 1 6311.6 6341.6
## - Work_accident1 1 6352.5 6382.5
## - satisfaction_level 1 7242.1 7272.1
##
## Step: AIC=6190.45
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesmarketing 1 6161.8 6189.8
## <none> 6160.4 6190.4
## - salesmanagement 1 6164.0 6192.0
## - saleshr 1 6164.3 6192.3
## - salestechnical 1 6168.5 6196.5
## - last_evaluation 1 6171.1 6199.1
## - salesRandD 1 6175.7 6203.7
## - promotion_last_5years1 1 6176.2 6204.2
## - salary.Q 1 6184.8 6212.8
## - average_montly_hours 1 6194.9 6222.9
## - number_project 1 6261.3 6289.3
## - time_spend_company 1 6274.5 6302.5
## - salary.L 1 6313.2 6341.2
## - Work_accident1 1 6353.2 6381.2
## - satisfaction_level 1 7244.0 7272.0
##
## Step: AIC=6189.8
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesRandD +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 6161.8 6189.8
## - saleshr 1 6165.3 6191.3
## - salesmanagement 1 6165.7 6191.7
## - salestechnical 1 6169.1 6195.1
## - last_evaluation 1 6172.5 6198.5
## - promotion_last_5years1 1 6177.3 6203.3
## - salesRandD 1 6177.8 6203.8
## - salary.Q 1 6186.1 6212.1
## - average_montly_hours 1 6196.4 6222.4
## - number_project 1 6263.1 6289.1
## - time_spend_company 1 6276.3 6302.3
## - salary.L 1 6314.2 6340.2
## - Work_accident1 1 6354.4 6380.4
## - satisfaction_level 1 7244.3 7270.3
## Start: AIC=6264.01
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessales 1 6226.0 6262.0
## - salesproduct_mng 1 6226.1 6262.1
## - salesIT 1 6226.1 6262.1
## - salestechnical 1 6226.8 6262.8
## - salesmarketing 1 6226.8 6262.8
## - salessupport 1 6227.5 6263.5
## <none> 6226.0 6264.0
## - saleshr 1 6228.5 6264.5
## - salesmanagement 1 6228.9 6264.9
## - salesRandD 1 6229.7 6265.7
## - last_evaluation 1 6238.0 6274.0
## - salary.Q 1 6244.3 6280.3
## - average_montly_hours 1 6254.7 6290.7
## - promotion_last_5years1 1 6255.1 6291.1
## - number_project 1 6315.9 6351.9
## - salary.L 1 6355.0 6391.0
## - time_spend_company 1 6360.3 6396.3
## - Work_accident1 1 6408.0 6444.0
## - satisfaction_level 1 7259.4 7295.4
##
## Step: AIC=6262.02
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 6226.2 6260.2
## - salesIT 1 6226.2 6260.2
## - salesmarketing 1 6227.4 6261.4
## <none> 6226.0 6262.0
## - salestechnical 1 6228.4 6262.4
## - salesmanagement 1 6229.9 6263.9
## - salessupport 1 6230.2 6264.2
## - saleshr 1 6230.5 6264.5
## - salesRandD 1 6231.8 6265.8
## - last_evaluation 1 6238.0 6272.0
## - salary.Q 1 6244.3 6278.3
## - average_montly_hours 1 6254.7 6288.7
## - promotion_last_5years1 1 6255.1 6289.1
## - number_project 1 6315.9 6349.9
## - salary.L 1 6355.0 6389.0
## - time_spend_company 1 6360.3 6394.3
## - Work_accident1 1 6408.0 6442.0
## - satisfaction_level 1 7260.1 7294.1
##
## Step: AIC=6260.19
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesRandD + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 6226.4 6258.4
## - salesmarketing 1 6227.5 6259.5
## <none> 6226.2 6260.2
## - salestechnical 1 6228.4 6260.4
## - salessupport 1 6230.2 6262.2
## - salesmanagement 1 6230.3 6262.3
## - saleshr 1 6230.5 6262.5
## - salesRandD 1 6232.3 6264.3
## - last_evaluation 1 6238.2 6270.2
## - salary.Q 1 6244.5 6276.5
## - average_montly_hours 1 6254.8 6286.8
## - promotion_last_5years1 1 6255.4 6287.4
## - number_project 1 6315.9 6347.9
## - salary.L 1 6355.1 6387.1
## - time_spend_company 1 6360.3 6392.3
## - Work_accident1 1 6408.1 6440.1
## - satisfaction_level 1 7260.3 7292.3
##
## Step: AIC=6258.45
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesmarketing 1 6228.0 6258.0
## <none> 6226.4 6258.4
## - salestechnical 1 6229.1 6259.1
## - salesmanagement 1 6230.4 6260.4
## - salessupport 1 6231.2 6261.2
## - saleshr 1 6231.2 6261.2
## - salesRandD 1 6232.3 6262.3
## - last_evaluation 1 6238.4 6268.4
## - salary.Q 1 6244.7 6274.7
## - average_montly_hours 1 6255.1 6285.1
## - promotion_last_5years1 1 6255.6 6285.6
## - number_project 1 6316.3 6346.3
## - salary.L 1 6355.3 6385.3
## - time_spend_company 1 6360.8 6390.8
## - Work_accident1 1 6408.4 6438.4
## - satisfaction_level 1 7260.3 7290.3
##
## Step: AIC=6257.96
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesRandD +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 6228.0 6258.0
## - salestechnical 1 6230.1 6258.1
## - salessupport 1 6232.0 6260.0
## - saleshr 1 6232.2 6260.2
## - salesmanagement 1 6232.4 6260.4
## - salesRandD 1 6234.5 6262.5
## - last_evaluation 1 6240.0 6268.0
## - salary.Q 1 6246.2 6274.2
## - promotion_last_5years1 1 6256.5 6284.5
## - average_montly_hours 1 6256.7 6284.7
## - number_project 1 6318.0 6346.0
## - salary.L 1 6356.7 6384.7
## - time_spend_company 1 6362.7 6390.7
## - Work_accident1 1 6409.7 6437.7
## - satisfaction_level 1 7260.6 7288.6
## Start: AIC=6247.98
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 6210.0 6246.0
## - salesproduct_mng 1 6210.0 6246.0
## - salessales 1 6210.2 6246.2
## - salessupport 1 6210.8 6246.8
## - salesmanagement 1 6211.5 6247.5
## - salesRandD 1 6211.6 6247.6
## <none> 6210.0 6248.0
## - salestechnical 1 6212.1 6248.1
## - salesmarketing 1 6212.2 6248.2
## - saleshr 1 6212.7 6248.7
## - last_evaluation 1 6219.7 6255.7
## - salary.Q 1 6231.1 6267.1
## - promotion_last_5years1 1 6239.1 6275.1
## - average_montly_hours 1 6247.2 6283.2
## - number_project 1 6303.2 6339.2
## - time_spend_company 1 6325.1 6361.1
## - salary.L 1 6352.4 6388.4
## - Work_accident1 1 6378.6 6414.6
## - satisfaction_level 1 7276.9 7312.9
##
## Step: AIC=6245.99
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesproduct_mng + salesRandD + salessales + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 6210.1 6244.1
## - salessales 1 6210.5 6244.5
## - salessupport 1 6211.6 6245.6
## - salesmanagement 1 6211.8 6245.8
## <none> 6210.0 6246.0
## - salesRandD 1 6212.0 6246.0
## - salesmarketing 1 6213.4 6247.4
## - saleshr 1 6214.1 6248.1
## - salestechnical 1 6214.2 6248.2
## - last_evaluation 1 6219.7 6253.7
## - salary.Q 1 6231.1 6265.1
## - promotion_last_5years1 1 6239.1 6273.1
## - average_montly_hours 1 6247.2 6281.2
## - number_project 1 6303.2 6337.2
## - time_spend_company 1 6325.1 6359.1
## - salary.L 1 6352.4 6386.4
## - Work_accident1 1 6378.6 6412.6
## - satisfaction_level 1 7277.4 7311.4
##
## Step: AIC=6244.05
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessales + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salessales 1 6210.9 6242.9
## - salesmanagement 1 6211.8 6243.8
## <none> 6210.1 6244.1
## - salesRandD 1 6212.1 6244.1
## - salessupport 1 6212.2 6244.2
## - salesmarketing 1 6214.1 6246.1
## - saleshr 1 6215.0 6247.0
## - salestechnical 1 6215.6 6247.6
## - last_evaluation 1 6219.8 6251.8
## - salary.Q 1 6231.2 6263.2
## - promotion_last_5years1 1 6239.2 6271.2
## - average_montly_hours 1 6247.4 6279.4
## - number_project 1 6303.3 6335.3
## - time_spend_company 1 6325.3 6357.3
## - salary.L 1 6352.5 6384.5
## - Work_accident1 1 6378.6 6410.6
## - satisfaction_level 1 7277.6 7309.6
##
## Step: AIC=6242.95
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessupport 1 6212.3 6242.3
## <none> 6210.9 6242.9
## - salesmanagement 1 6213.7 6243.7
## - salesmarketing 1 6214.1 6244.1
## - salesRandD 1 6214.4 6244.4
## - saleshr 1 6215.0 6245.0
## - salestechnical 1 6215.8 6245.8
## - last_evaluation 1 6220.6 6250.6
## - salary.Q 1 6232.0 6262.0
## - promotion_last_5years1 1 6239.9 6269.9
## - average_montly_hours 1 6248.1 6278.1
## - number_project 1 6304.3 6334.3
## - time_spend_company 1 6326.8 6356.8
## - salary.L 1 6353.8 6383.8
## - Work_accident1 1 6379.6 6409.6
## - satisfaction_level 1 7277.9 7307.9
##
## Step: AIC=6242.3
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 6212.3 6242.3
## - salesmarketing 1 6214.9 6242.9
## - salesmanagement 1 6215.6 6243.6
## - saleshr 1 6215.7 6243.7
## - salestechnical 1 6216.2 6244.2
## - salesRandD 1 6216.5 6244.5
## - last_evaluation 1 6222.1 6250.1
## - salary.Q 1 6233.2 6261.2
## - promotion_last_5years1 1 6241.4 6269.4
## - average_montly_hours 1 6249.3 6277.3
## - number_project 1 6306.0 6334.0
## - time_spend_company 1 6327.6 6355.6
## - salary.L 1 6355.0 6383.0
## - Work_accident1 1 6380.4 6408.4
## - satisfaction_level 1 7279.0 7307.0
## Start: AIC=6151.75
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 6113.8 6149.8
## - salesIT 1 6113.8 6149.8
## - salessales 1 6114.1 6150.1
## - salesmanagement 1 6115.1 6151.1
## - salesRandD 1 6115.4 6151.4
## - salessupport 1 6115.5 6151.5
## - saleshr 1 6115.5 6151.5
## <none> 6113.8 6151.8
## - salestechnical 1 6116.7 6152.7
## - salesmarketing 1 6118.9 6154.9
## - salary.Q 1 6124.2 6160.2
## - last_evaluation 1 6125.8 6161.8
## - average_montly_hours 1 6142.9 6178.9
## - promotion_last_5years1 1 6145.3 6181.3
## - number_project 1 6204.9 6240.9
## - time_spend_company 1 6242.4 6278.4
## - salary.L 1 6249.5 6285.5
## - Work_accident1 1 6311.2 6347.2
## - satisfaction_level 1 7242.8 7278.8
##
## Step: AIC=6149.81
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesRandD + salessales + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 6113.8 6147.8
## - salessales 1 6114.9 6148.9
## - salesmanagement 1 6115.2 6149.2
## - salesRandD 1 6115.6 6149.6
## <none> 6113.8 6149.8
## - saleshr 1 6116.7 6150.7
## - salessupport 1 6117.2 6151.2
## - salestechnical 1 6119.6 6153.6
## - salesmarketing 1 6121.5 6155.5
## - salary.Q 1 6124.3 6158.3
## - last_evaluation 1 6125.9 6159.9
## - average_montly_hours 1 6143.1 6177.1
## - promotion_last_5years1 1 6145.4 6179.4
## - number_project 1 6205.2 6239.2
## - time_spend_company 1 6242.5 6276.5
## - salary.L 1 6249.6 6283.6
## - Work_accident1 1 6311.2 6345.2
## - satisfaction_level 1 7244.7 7278.7
##
## Step: AIC=6147.84
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessales + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salesmanagement 1 6115.2 6147.2
## - salessales 1 6115.7 6147.7
## - salesRandD 1 6115.7 6147.7
## <none> 6113.8 6147.8
## - saleshr 1 6117.4 6149.4
## - salessupport 1 6118.8 6150.8
## - salestechnical 1 6122.3 6154.3
## - salesmarketing 1 6123.3 6155.3
## - salary.Q 1 6124.3 6156.3
## - last_evaluation 1 6125.9 6157.9
## - average_montly_hours 1 6143.1 6175.1
## - promotion_last_5years1 1 6145.4 6177.4
## - number_project 1 6205.2 6237.2
## - time_spend_company 1 6242.5 6274.5
## - salary.L 1 6249.6 6281.6
## - Work_accident1 1 6311.3 6343.3
## - satisfaction_level 1 7244.7 7276.7
##
## Step: AIC=6147.21
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmarketing + salesRandD +
## salessales + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesRandD 1 6116.6 6146.6
## <none> 6115.2 6147.2
## - salessales 1 6118.3 6148.3
## - saleshr 1 6119.7 6149.7
## - salessupport 1 6121.9 6151.9
## - salestechnical 1 6126.3 6156.3
## - salesmarketing 1 6126.3 6156.3
## - salary.Q 1 6126.6 6156.6
## - last_evaluation 1 6127.2 6157.2
## - average_montly_hours 1 6144.6 6174.6
## - promotion_last_5years1 1 6148.4 6178.4
## - number_project 1 6206.3 6236.3
## - time_spend_company 1 6242.8 6272.8
## - salary.L 1 6259.8 6289.8
## - Work_accident1 1 6312.6 6342.6
## - satisfaction_level 1 7246.2 7276.2
##
## Step: AIC=6146.61
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmarketing + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 6116.6 6146.6
## - salessales 1 6121.6 6149.6
## - saleshr 1 6122.2 6150.2
## - salessupport 1 6125.5 6153.5
## - salary.Q 1 6127.9 6155.9
## - last_evaluation 1 6128.7 6156.7
## - salesmarketing 1 6129.7 6157.7
## - salestechnical 1 6131.1 6159.1
## - average_montly_hours 1 6145.9 6173.9
## - promotion_last_5years1 1 6149.8 6177.8
## - number_project 1 6207.7 6235.7
## - time_spend_company 1 6244.6 6272.6
## - salary.L 1 6260.7 6288.7
## - Work_accident1 1 6315.1 6343.1
## - satisfaction_level 1 7250.3 7278.3
## Start: AIC=6170.87
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 6132.9 6168.9
## - salesIT 1 6133.5 6169.5
## - salesmarketing 1 6133.7 6169.7
## - salessales 1 6134.1 6170.1
## - salessupport 1 6134.5 6170.5
## - salesRandD 1 6134.5 6170.5
## <none> 6132.9 6170.9
## - salesmanagement 1 6135.9 6171.9
## - salestechnical 1 6137.4 6173.4
## - saleshr 1 6138.0 6174.0
## - salary.Q 1 6144.4 6180.4
## - last_evaluation 1 6147.8 6183.8
## - promotion_last_5years1 1 6151.6 6187.6
## - average_montly_hours 1 6163.8 6199.8
## - number_project 1 6236.6 6272.6
## - salary.L 1 6255.4 6291.4
## - time_spend_company 1 6272.8 6308.8
## - Work_accident1 1 6336.6 6372.6
## - satisfaction_level 1 7238.1 7274.1
##
## Step: AIC=6168.94
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesRandD + salessales + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 6133.6 6167.6
## - salesmarketing 1 6133.7 6167.7
## - salessales 1 6134.5 6168.5
## - salessupport 1 6134.9 6168.9
## <none> 6132.9 6168.9
## - salesRandD 1 6135.7 6169.7
## - salesmanagement 1 6137.4 6171.4
## - saleshr 1 6139.2 6173.2
## - salestechnical 1 6139.5 6173.5
## - salary.Q 1 6144.4 6178.4
## - last_evaluation 1 6147.9 6181.9
## - promotion_last_5years1 1 6151.8 6185.8
## - average_montly_hours 1 6163.8 6197.8
## - number_project 1 6236.6 6270.6
## - salary.L 1 6255.5 6289.5
## - time_spend_company 1 6272.9 6306.9
## - Work_accident1 1 6336.7 6370.7
## - satisfaction_level 1 7238.4 7272.4
##
## Step: AIC=6167.6
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessales + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salesmarketing 1 6134.0 6166.0
## - salessales 1 6134.5 6166.5
## - salessupport 1 6134.9 6166.9
## <none> 6133.6 6167.6
## - salesRandD 1 6138.0 6170.0
## - saleshr 1 6139.2 6171.2
## - salesmanagement 1 6139.7 6171.7
## - salestechnical 1 6139.8 6171.8
## - salary.Q 1 6145.2 6177.2
## - last_evaluation 1 6148.5 6180.5
## - promotion_last_5years1 1 6152.5 6184.5
## - average_montly_hours 1 6164.5 6196.5
## - number_project 1 6237.3 6269.3
## - salary.L 1 6256.5 6288.5
## - time_spend_company 1 6273.6 6305.6
## - Work_accident1 1 6337.4 6369.4
## - satisfaction_level 1 7238.6 7270.6
##
## Step: AIC=6165.98
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesRandD +
## salessales + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessales 1 6134.6 6164.6
## - salessupport 1 6135.0 6165.0
## <none> 6134.0 6166.0
## - salesRandD 1 6139.1 6169.1
## - saleshr 1 6139.2 6169.2
## - salestechnical 1 6139.9 6169.9
## - salesmanagement 1 6140.9 6170.9
## - salary.Q 1 6145.5 6175.5
## - last_evaluation 1 6148.9 6178.9
## - promotion_last_5years1 1 6152.7 6182.7
## - average_montly_hours 1 6165.0 6195.0
## - number_project 1 6238.2 6268.2
## - salary.L 1 6256.7 6286.7
## - time_spend_company 1 6274.4 6304.4
## - Work_accident1 1 6337.6 6367.6
## - satisfaction_level 1 7238.8 7268.8
##
## Step: AIC=6164.62
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesRandD +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salessupport 1 6135.2 6163.2
## <none> 6134.6 6164.6
## - saleshr 1 6139.2 6167.2
## - salestechnical 1 6140.0 6168.0
## - salesRandD 1 6141.5 6169.5
## - salesmanagement 1 6143.1 6171.1
## - salary.Q 1 6146.2 6174.2
## - last_evaluation 1 6149.5 6177.5
## - promotion_last_5years1 1 6153.4 6181.4
## - average_montly_hours 1 6165.6 6193.6
## - number_project 1 6239.0 6267.0
## - salary.L 1 6258.2 6286.2
## - time_spend_company 1 6275.3 6303.3
## - Work_accident1 1 6338.5 6366.5
## - satisfaction_level 1 7239.3 7267.3
##
## Step: AIC=6163.15
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesRandD +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 6135.2 6163.2
## - saleshr 1 6139.4 6165.4
## - salestechnical 1 6140.1 6166.1
## - salesRandD 1 6142.7 6168.7
## - salesmanagement 1 6144.1 6170.1
## - salary.Q 1 6146.7 6172.7
## - last_evaluation 1 6150.2 6176.2
## - promotion_last_5years1 1 6154.0 6180.0
## - average_montly_hours 1 6166.1 6192.1
## - number_project 1 6239.6 6265.6
## - salary.L 1 6258.8 6284.8
## - time_spend_company 1 6275.5 6301.5
## - Work_accident1 1 6338.9 6364.9
## - satisfaction_level 1 7240.0 7266.0
## Start: AIC=7753.99
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesIT + salesmanagement +
## salesmarketing + salesproduct_mng + salesRandD + salessales +
## salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesIT 1 7716.0 7752.0
## - salesproduct_mng 1 7716.0 7752.0
## - salessales 1 7716.2 7752.2
## - salessupport 1 7717.4 7753.4
## <none> 7716.0 7754.0
## - salesmarketing 1 7718.1 7754.1
## - salesmanagement 1 7718.7 7754.7
## - salestechnical 1 7719.0 7755.0
## - saleshr 1 7719.5 7755.5
## - salesRandD 1 7719.7 7755.7
## - last_evaluation 1 7730.8 7766.8
## - salary.Q 1 7736.7 7772.7
## - promotion_last_5years1 1 7746.7 7782.7
## - average_montly_hours 1 7755.9 7791.9
## - number_project 1 7835.3 7871.3
## - time_spend_company 1 7873.4 7909.4
## - salary.L 1 7885.6 7921.6
## - Work_accident1 1 7951.7 7987.7
## - satisfaction_level 1 9068.4 9104.4
##
## Step: AIC=7752
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesproduct_mng + salesRandD + salessales + salessupport +
## salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## - salesproduct_mng 1 7716.0 7750.0
## - salessales 1 7716.6 7750.6
## <none> 7716.0 7752.0
## - salessupport 1 7718.7 7752.7
## - salesmarketing 1 7719.1 7753.1
## - salesmanagement 1 7719.3 7753.3
## - salesRandD 1 7721.0 7755.0
## - saleshr 1 7721.3 7755.3
## - salestechnical 1 7722.0 7756.0
## - last_evaluation 1 7730.8 7764.8
## - salary.Q 1 7736.7 7770.7
## - promotion_last_5years1 1 7746.7 7780.7
## - average_montly_hours 1 7755.9 7789.9
## - number_project 1 7835.3 7869.3
## - time_spend_company 1 7873.5 7907.5
## - salary.L 1 7885.6 7919.6
## - Work_accident1 1 7951.7 7985.7
## - satisfaction_level 1 9068.9 9102.9
##
## Step: AIC=7750
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessales + salessupport + salestechnical +
## salary.L + salary.Q
##
## Df Deviance AIC
## - salessales 1 7716.8 7748.8
## <none> 7716.0 7750.0
## - salessupport 1 7719.4 7751.4
## - salesmanagement 1 7719.5 7751.5
## - salesmarketing 1 7719.5 7751.5
## - salesRandD 1 7721.3 7753.3
## - saleshr 1 7721.9 7753.9
## - salestechnical 1 7723.4 7755.4
## - last_evaluation 1 7730.8 7762.8
## - salary.Q 1 7736.7 7768.7
## - promotion_last_5years1 1 7746.7 7778.7
## - average_montly_hours 1 7755.9 7787.9
## - number_project 1 7835.4 7867.4
## - time_spend_company 1 7873.5 7905.5
## - salary.L 1 7885.7 7917.7
## - Work_accident1 1 7951.7 7983.7
## - satisfaction_level 1 9069.5 9101.5
##
## Step: AIC=7748.82
## .outcome ~ satisfaction_level + last_evaluation + number_project +
## average_montly_hours + time_spend_company + Work_accident1 +
## promotion_last_5years1 + saleshr + salesmanagement + salesmarketing +
## salesRandD + salessupport + salestechnical + salary.L + salary.Q
##
## Df Deviance AIC
## <none> 7716.8 7748.8
## - salessupport 1 7719.4 7749.4
## - salesmarketing 1 7719.6 7749.6
## - salesmanagement 1 7721.7 7751.7
## - saleshr 1 7721.9 7751.9
## - salestechnical 1 7723.8 7753.8
## - salesRandD 1 7724.5 7754.5
## - last_evaluation 1 7731.5 7761.5
## - salary.Q 1 7737.5 7767.5
## - promotion_last_5years1 1 7747.4 7777.4
## - average_montly_hours 1 7756.7 7786.7
## - number_project 1 7836.4 7866.4
## - time_spend_company 1 7874.8 7904.8
## - salary.L 1 7887.1 7917.1
## - Work_accident1 1 7952.6 7982.6
## - satisfaction_level 1 9069.8 9099.8
summary(fit.lr)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1875 -0.6660 -0.4034 -0.1198 3.1421
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3635173 0.1643413 -2.212 0.026969 *
## satisfaction_level -4.2133765 0.1272171 -33.120 < 2e-16 ***
## last_evaluation 0.7322465 0.1914385 3.825 0.000131 ***
## number_project -0.2924439 0.0272724 -10.723 < 2e-16 ***
## average_montly_hours 0.0041632 0.0006624 6.285 3.29e-10 ***
## time_spend_company 0.2584489 0.0203247 12.716 < 2e-16 ***
## Work_accident1 -1.5560119 0.1165317 -13.353 < 2e-16 ***
## promotion_last_5years1 -1.5387365 0.3252853 -4.730 2.24e-06 ***
## saleshr 0.2845959 0.1252536 2.272 0.023077 *
## salesmanagement -0.3735172 0.1721299 -2.170 0.030009 *
## salesmarketing 0.2087527 0.1248078 1.673 0.094407 .
## salesRandD -0.3910256 0.1441249 -2.713 0.006666 **
## salessupport 0.1349733 0.0842858 1.601 0.109294
## salestechnical 0.2048893 0.0773908 2.647 0.008110 **
## salary.L -1.2652476 0.1136659 -11.131 < 2e-16 ***
## salary.Q -0.3257378 0.0745541 -4.369 1.25e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9880.1 on 8999 degrees of freedom
## Residual deviance: 7716.8 on 8984 degrees of freedom
## AIC: 7748.8
##
## Number of Fisher Scoring iterations: 5
set.seed(1)
fit.glmnet <- train(left~., data = train, method = 'glmnet',
trControl = objControl, metric = eval.metric)
## Loading required package: glmnet
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-5
##
## Attaching package: 'glmnet'
## The following object is masked from 'package:pROC':
##
## auc
set.seed(1)
fit.cart <- train(left~., data=train, method="rpart", trControl=objControl,
metric = eval.metric)
## Loading required package: rpart
set.seed(1)
fit.knn <- train(left~., data=train, method="knn", trControl=objControl,
metric=eval.metric)
set.seed(1)
fit.rf <- train(left~., data=train, method="rf", trControl=objControl,
metric = eval.metric)
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:Hmisc':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
set.seed(1)
fit.gbm <- train(left~., data=train, method="gbm", trControl=objControl,
metric=eval.metric, verbose=FALSE)
## Loading required package: gbm
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:Hmisc':
##
## is.discrete, summarize
models <- list(STEPLR= fit.lr,
PENALIZED.LR = fit.glmnet,
CART=fit.cart,
KNN =fit.knn,
RF =fit.rf,
GBM = fit.gbm)
results <- resamples(models)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: STEPLR, PENALIZED.LR, CART, KNN, RF, GBM
## Number of resamples: 5
##
## ROC
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## STEPLR 0.7995 0.8063 0.8118 0.8181 0.8340 0.8386 0
## PENALIZED.LR 0.8003 0.8063 0.8126 0.8183 0.8340 0.8386 0
## CART 0.8076 0.8355 0.9532 0.9039 0.9555 0.9676 0
## KNN 0.9558 0.9595 0.9640 0.9647 0.9698 0.9745 0
## RF 0.9821 0.9879 0.9896 0.9894 0.9910 0.9962 0
## GBM 0.9778 0.9835 0.9848 0.9851 0.9878 0.9919 0
##
## Sens
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## STEPLR 0.9190 0.9242 0.9256 0.9290 0.9315 0.9446 0
## PENALIZED.LR 0.9212 0.9249 0.9271 0.9300 0.9300 0.9468 0
## CART 0.9438 0.9461 0.9519 0.9602 0.9781 0.9810 0
## KNN 0.9285 0.9373 0.9387 0.9370 0.9388 0.9416 0
## RF 0.9964 0.9971 0.9985 0.9978 0.9985 0.9985 0
## GBM 0.9876 0.9883 0.9891 0.9895 0.9898 0.9927 0
##
## Spec
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## STEPLR 0.3124 0.3201 0.3497 0.3402 0.3566 0.3621 0
## PENALIZED.LR 0.3077 0.3271 0.3403 0.3346 0.3473 0.3505 0
## CART 0.6410 0.6752 0.9089 0.8189 0.9254 0.9441 0
## KNN 0.9114 0.9136 0.9184 0.9202 0.9229 0.9347 0
## RF 0.9394 0.9486 0.9556 0.9557 0.9604 0.9744 0
## GBM 0.9044 0.9089 0.9138 0.9165 0.9159 0.9394 0
# Box Plot
bwplot(results)
# Dot Plot
dotplot(results)
Based on ROC, we will choose Random Forest.
rfImp <- varImp(fit.rf)
rfImp
## rf variable importance
##
## Overall
## satisfaction_level 100.00000
## time_spend_company 40.72710
## number_project 37.78372
## average_montly_hours 30.95402
## last_evaluation 26.92210
## salary.L 1.07552
## salestechnical 0.82107
## salessupport 0.54383
## salessales 0.53846
## salary.Q 0.51994
## Work_accident1 0.49381
## salesIT 0.20821
## salesmanagement 0.19807
## saleshr 0.14748
## salesmarketing 0.13394
## salesRandD 0.12028
## promotion_last_5years1 0.02603
## salesproduct_mng 0.00000
plot(rfImp, top=5)
set.seed(1)
fit.imprf <- train(left~., data=train[,c("satisfaction_level","time_spend_company","number_project","average_montly_hours","last_evaluation","left")],
method="rf", trControl=objControl,
metric = eval.metric)
rfmodels <- list( RF =fit.rf,
RF.IMP = fit.imprf)
rfresults <- resamples(rfmodels)
summary(rfresults)
##
## Call:
## summary.resamples(object = rfresults)
##
## Models: RF, RF.IMP
## Number of resamples: 5
##
## ROC
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## RF 0.9821 0.9879 0.9896 0.9894 0.9910 0.9962 0
## RF.IMP 0.9820 0.9875 0.9880 0.9888 0.9892 0.9974 0
##
## Sens
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## RF 0.9964 0.9971 0.9985 0.9978 0.9985 0.9985 0
## RF.IMP 0.9964 0.9978 0.9978 0.9978 0.9985 0.9985 0
##
## Spec
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## RF 0.9394 0.9486 0.9556 0.9557 0.9604 0.9744 0
## RF.IMP 0.9394 0.9486 0.9556 0.9547 0.9557 0.9744 0
bwplot(rfresults)
dotplot(rfresults)
As there is no sigificant decrease in ROC, we shall use Random Forest Model with Important Variables only
fit.imprf
## Random Forest
##
## 9000 samples
## 5 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 7200, 7199, 7201, 7200, 7200
## Resampling results across tuning parameters:
##
## mtry ROC Sens Spec
## 2 0.9888145 0.9978123 0.9547339
## 3 0.9886353 0.9972292 0.9542677
## 5 0.9866702 0.9944584 0.9556685
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
rf1.Imp <- varImp(fit.imprf)
rf1.Imp
## rf variable importance
##
## Overall
## satisfaction_level 100.00
## number_project 25.41
## time_spend_company 23.63
## average_montly_hours 12.99
## last_evaluation 0.00
plot(rf1.Imp)
rfgridControl <- trainControl(method="cv", number=5, search="grid",
summaryFunction = twoClassSummary,
savePredictions = TRUE,
classProbs = TRUE)
rfGrid <- expand.grid(.mtry=c(1:4))
set.seed(1)
fit.tunedrf <- train( left~.,
data=train[,c("satisfaction_level","time_spend_company","number_project","average_montly_hours","left","last_evaluation")],
method = "rf", metric = eval.metric,
trControl = rfgridControl,
tuneGrid = rfGrid)
fit.tunedrf
## Random Forest
##
## 9000 samples
## 5 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 7200, 7199, 7201, 7200, 7200
## Resampling results across tuning parameters:
##
## mtry ROC Sens Spec
## 1 0.9879859 0.9969371 0.9197340
## 2 0.9891889 0.9978123 0.9547339
## 3 0.9886951 0.9972290 0.9542677
## 4 0.9879759 0.9959168 0.9556674
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
rf.predict <- predict(fit.tunedrf, test, type = "raw")
confusionMatrix(rf.predict,test$left)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 4561 49
## Yes 10 1379
##
## Accuracy : 0.9902
## 95% CI : (0.9873, 0.9925)
## No Information Rate : 0.762
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9726
## Mcnemar's Test P-Value : 7.53e-07
##
## Sensitivity : 0.9978
## Specificity : 0.9657
## Pos Pred Value : 0.9894
## Neg Pred Value : 0.9928
## Prevalence : 0.7620
## Detection Rate : 0.7603
## Detection Prevalence : 0.7685
## Balanced Accuracy : 0.9817
##
## 'Positive' Class : No
##
rf.auc <- roc(as.numeric(test$left), as.numeric(rf.predict), ci=TRUE)
plot(rf.auc, ylim=c(0,1), print.thres=TRUE,
main=paste('Test Data - AUC - Using Random Forest:',
round(rf.auc$auc[[1]],3)),col = 'blue')