data_final<-read.csv("HR Analytics_Logit Model_Final.csv")
data_final$Attrition <- factor(data_final$Attrition)
data_final$Gender <- factor(data_final$Gender)
data_final$Perfrormance.Rating <-factor(data_final$Perfrormance.Rating)
data_final$Marital.Status<-factor(data_final$Marital.Status)
mylogit <- glm(Attrition ~ Salary + Gender + Number.of.Years.of.Experience + Perfrormance.Rating + Marital.Status + Education.in.Number.of.Years, data = data_final, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = Attrition ~ Salary + Gender + Number.of.Years.of.Experience +
## Perfrormance.Rating + Marital.Status + Education.in.Number.of.Years,
## family = "binomial", data = data_final)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9473 -0.9895 0.5064 1.1397 1.6069
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.612e-01 8.582e-01 0.654 0.513
## Salary 1.202e-06 1.271e-06 0.946 0.344
## Gender2 -1.652e+00 2.293e-01 -7.204 5.85e-13 ***
## Number.of.Years.of.Experience -2.333e-02 2.142e-02 -1.089 0.276
## Perfrormance.Rating2 4.973e-01 3.035e-01 1.639 0.101
## Perfrormance.Rating3 -2.668e-01 2.966e-01 -0.900 0.368
## Perfrormance.Rating4 8.926e-02 3.008e-01 0.297 0.767
## Perfrormance.Rating5 -1.440e-01 3.043e-01 -0.473 0.636
## Marital.Status2 2.286e-01 1.931e-01 1.184 0.236
## Education.in.Number.of.Years 2.987e-02 4.264e-02 0.700 0.484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.12 on 499 degrees of freedom
## Residual deviance: 623.10 on 490 degrees of freedom
## AIC: 643.1
##
## Number of Fisher Scoring iterations: 4
exp(coef(mylogit))
## (Intercept) Salary
## 1.7527010 1.0000012
## Gender2 Number.of.Years.of.Experience
## 0.1917382 0.9769396
## Perfrormance.Rating2 Perfrormance.Rating3
## 1.6443321 0.7658333
## Perfrormance.Rating4 Perfrormance.Rating5
## 1.0933694 0.8658683
## Marital.Status2 Education.in.Number.of.Years
## 1.2568403 1.0303170
exp(cbind(OR = coef(mylogit), confint(mylogit)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 1.7527010 0.3267087 9.4916977
## Salary 1.0000012 0.9999987 1.0000037
## Gender2 0.1917382 0.1208944 0.2975612
## Number.of.Years.of.Experience 0.9769396 0.9365263 1.0186987
## Perfrormance.Rating2 1.6443321 0.9095399 2.9950697
## Perfrormance.Rating3 0.7658333 0.4269830 1.3683851
## Perfrormance.Rating4 1.0933694 0.6060339 1.9747156
## Perfrormance.Rating5 0.8658683 0.4758616 1.5717977
## Marital.Status2 1.2568403 0.8611915 1.8370597
## Education.in.Number.of.Years 1.0303170 0.9477446 1.1204413
##If Performance Rating goes higher than 2, then there are 1.64 times more chances to retain that employee
newdata1 <- with(data_final, data.frame(Salary = mean(Salary), Gender = "1", Number.of.Years.of.Experience = 15, Perfrormance.Rating= "1",Marital.Status="2", Education.in.Number.of.Years=15))
newdata1
## Salary Gender Number.of.Years.of.Experience Perfrormance.Rating
## 1 149713.7 1 15 1
## Marital.Status Education.in.Number.of.Years
## 1 2 15
##If we want to predict using the above data, we could get the probability of retention
newdata1$Attrition <- predict(mylogit, newdata = newdata1, type = "response")
newdata1
## Salary Gender Number.of.Years.of.Experience Perfrormance.Rating
## 1 149713.7 1 15 1
## Marital.Status Education.in.Number.of.Years Attrition
## 1 2 15 0.7441839
##The probability of retention of this individual is 74%