INTRO

How does the relationship between a promotion, avg monthly hours worked and time spent with a company effect salary ?

library(Zelig)
library(ZeligChoice)
library(faraway)
library(dplyr)
library(tidyr)
library(survival)

Data Upload - Human Resource Data

library(radiant.data)
library(readr)
  jquit <- read_csv("/Users/Cruz/Desktop/jquit.csv", col_names = TRUE)
Parsed with column specification:
cols(
  satisfaction_level = col_double(),
  last_evaluation = col_double(),
  number_project = col_integer(),
  average_montly_hours = col_integer(),
  time_spend_company = col_integer(),
  Work_accident = col_integer(),
  left = col_integer(),
  promotion_last_5years = col_integer(),
  sales = col_character(),
  salary = col_character()
)
 head(jquit)

Cleaning data set

jquit<-na.omit(jquit)
head(jquit)

Running a sample regression to get an idea of some of the variables in the data set.

glm.model <- glm(left ~ ., data = jquit, family = "binomial")
summary(glm.model)

Call:
glm(formula = left ~ ., family = "binomial", data = jquit)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2248  -0.6645  -0.4026  -0.1177   3.0688  

Coefficients:
                        Estimate Std. Error z value             Pr(>|z|)    
(Intercept)           -1.4762862  0.1938373  -7.616   0.0000000000000261 ***
satisfaction_level    -4.1356889  0.0980538 -42.178 < 0.0000000000000002 ***
last_evaluation        0.7309032  0.1491787   4.900   0.0000009607392419 ***
number_project        -0.3150787  0.0213248 -14.775 < 0.0000000000000002 ***
average_montly_hours   0.0044603  0.0005161   8.643 < 0.0000000000000002 ***
time_spend_company     0.2677537  0.0155736  17.193 < 0.0000000000000002 ***
Work_accident         -1.5298283  0.0895473 -17.084 < 0.0000000000000002 ***
promotion_last_5years -1.4301364  0.2574958  -5.554   0.0000000279174879 ***
saleshr                0.2323779  0.1313084   1.770              0.07678 .  
salesIT               -0.1807179  0.1221276  -1.480              0.13894    
salesmanagement       -0.4484236  0.1598254  -2.806              0.00502 ** 
salesmarketing        -0.0120882  0.1319304  -0.092              0.92700    
salesproduct_mng      -0.1532529  0.1301538  -1.177              0.23901    
salesRandD            -0.5823659  0.1448848  -4.020   0.0000583195125819 ***
salessales            -0.0387859  0.1024006  -0.379              0.70486    
salessupport           0.0500251  0.1092834   0.458              0.64713    
salestechnical         0.0701464  0.1065379   0.658              0.51027    
salarylow              1.9440627  0.1286272  15.114 < 0.0000000000000002 ***
salarymedium           1.4132244  0.1293534  10.925 < 0.0000000000000002 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 16465  on 14998  degrees of freedom
Residual deviance: 12850  on 14980  degrees of freedom
AIC: 12888

Number of Fisher Scoring iterations: 5
xtabs(~time_spend_company, jquit)
time_spend_company
   2    3    4    5    6    7    8   10 
3244 6443 2557 1473  718  188  162  214 
xtabs(~promotion_last_5years, jquit)
promotion_last_5years
    0     1 
14680   319 
xtabs(~salary, jquit)
salary
  high    low medium 
  1237   7316   6446 
xtabs(~left, jquit)
left
    0     1 
11428  3571 

Preping data for ologit statistical model.

jquit$salary <- factor(jquit$salary, ordered = TRUE,
                         levels = c("low", "medium", "high"))
jquit <- jquit%>%
  mutate(promotion_last_5years = as.factor(promotion_last_5years))

Statistical Model, ologit

z.ord <- zelig(salary ~ promotion_last_5years + time_spend_company + average_montly_hours, model = "ologit",
               data = jquit, cite = F)
summary(z.ord)
Model: 
Call:
z5$zelig(formula = salary ~ promotion_last_5years + time_spend_company + 
    average_montly_hours, data = jquit)

Coefficients:
                            Value Std. Error t value
promotion_last_5years1  1.2365287  0.1087756 11.3677
time_spend_company      0.0583458  0.0110104  5.2991
average_montly_hours   -0.0002667  0.0003201 -0.8332

Intercepts:
            Value   Std. Error t value
low|medium   0.1245  0.0722     1.7232
medium|high  2.6071  0.0770    33.8654

Residual Deviance: 27402.43 
AIC: 27412.43 
Next step: Use 'setx' method
x.no <- setx(z.ord, promotion_last_5years = 0)
x.yes <- setx(z.ord, promotion_last_5years = 1)
s.ord <- sim(z.ord, x = x.no, x1 = x.yes)
summary(s.ord)

 sim x :
 -----
ev
             mean          sd        50%      2.5%     97.5%
low    0.49357425 0.004222154 0.49363490 0.4855003 0.5015480
medium 0.42599681 0.014192172 0.42670095 0.3961678 0.4519043
high   0.08042894 0.014225950 0.07984794 0.0544646 0.1098004
pv
     mean        sd 50% 2.5% 97.5%
[1,] 1.59 0.6326929   2    1     3

 sim x1 :
 -----
ev
            mean         sd       50%      2.5%     97.5%
low    0.2215416 0.01899078 0.2210196 0.1848516 0.2598762
medium 0.5478459 0.03385728 0.5474324 0.4800402 0.6105912
high   0.2306125 0.03880620 0.2284118 0.1580320 0.3108975
pv
      mean        sd 50% 2.5% 97.5%
[1,] 2.032 0.6718835   2    1     3
fd
             mean         sd        50%        2.5%      97.5%
low    -0.2720326 0.01909066 -0.2726565 -0.30695112 -0.2331672
medium  0.1218491 0.02029567  0.1211630  0.08295922  0.1608032
high    0.1501836 0.02748536  0.1484966  0.10036369  0.2066461
graphics.off()
 par("mar")
[1] 5.1 4.1 2.1 2.1
 par(mar=c(1,1,1,1))
plot(s.ord)

c1x <- setx(z.ord, promotion_last_5years = "0", jquit )
c1x1 <- setx(z.ord, promotion_last_5years = "1", jquit)
c1s <- sim(z.ord, x = c1x, x1 = c1x1)
graphics.off()
 par("mar")
[1] 5.1 4.1 2.1 2.1
 par(mar=c(1,1,1,1))
plot(c1s)

d1 <- c1s$get_qi(xvalue="x1", qi="fd")
dfd <- as.data.frame(cbind(d1))
head(dfd)

Difference between those who did and did not receive a promo in last 5 months in Likelihood of being in Low income group = mean = (-0.268) units

library(ggplot2)
ggplot(dfd)+
  geom_histogram(aes(x=low),color = "black", fill = "aqua marine1")

Difference between those who did and did not receive a promo in last 5 months in Likelihood of being in Medium income group = mean = (0.126)

ggplot(dfd)+
  geom_histogram(aes(x=medium),color = "black", fill = "aqua marine1")

Difference between those who did and did not receive a promo in last 5 months in Likelihood of being in High income group = mean = (0.156)

ggplot(dfd)+
  geom_histogram(aes(x=high),color = "black", fill = "aqua marine1") 

LS0tCnRpdGxlOiAiQ1JVWl9NQTcxMl9IVzdIUkFfU1RBVElTVElDQUxNT0RFTFNfT0xPR0lUIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCiNJTlRSTwoKIyBIb3cgZG9lcyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gYSBwcm9tb3Rpb24sIGF2ZyBtb250aGx5IGhvdXJzIHdvcmtlZCBhbmQgdGltZSBzcGVudCB3aXRoIGEgY29tcGFueSBlZmZlY3Qgc2FsYXJ5ID8KCmBgYHtyfQpsaWJyYXJ5KFplbGlnKQpsaWJyYXJ5KFplbGlnQ2hvaWNlKQpsaWJyYXJ5KGZhcmF3YXkpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoc3Vydml2YWwpCmBgYAojRGF0YSBVcGxvYWQgLSBIdW1hbiBSZXNvdXJjZSBEYXRhCmBgYHtyfQpsaWJyYXJ5KHJhZGlhbnQuZGF0YSkKbGlicmFyeShyZWFkcikKICBqcXVpdCA8LSByZWFkX2NzdigiL1VzZXJzL0NydXovRGVza3RvcC9qcXVpdC5jc3YiLCBjb2xfbmFtZXMgPSBUUlVFKQogaGVhZChqcXVpdCkKYGBgCiNDbGVhbmluZyBkYXRhIHNldApgYGB7cn0KanF1aXQ8LW5hLm9taXQoanF1aXQpCmhlYWQoanF1aXQpCmBgYAojUnVubmluZyBhIHNhbXBsZSByZWdyZXNzaW9uIHRvIGdldCBhbiBpZGVhIG9mIHNvbWUgb2YgdGhlIHZhcmlhYmxlcyBpbiB0aGUgZGF0YSBzZXQuCmBgYHtyfQpnbG0ubW9kZWwgPC0gZ2xtKGxlZnQgfiAuLCBkYXRhID0ganF1aXQsIGZhbWlseSA9ICJiaW5vbWlhbCIpCnN1bW1hcnkoZ2xtLm1vZGVsKQpgYGAKCgpgYGB7cn0KeHRhYnMofnRpbWVfc3BlbmRfY29tcGFueSwganF1aXQpCgp4dGFicyh+cHJvbW90aW9uX2xhc3RfNXllYXJzLCBqcXVpdCkKCnh0YWJzKH5zYWxhcnksIGpxdWl0KQoKeHRhYnMofmxlZnQsIGpxdWl0KQoKYGBgCiNQcmVwaW5nIGRhdGEgZm9yIG9sb2dpdCBzdGF0aXN0aWNhbCBtb2RlbC4KYGBge3J9CmpxdWl0JHNhbGFyeSA8LSBmYWN0b3IoanF1aXQkc2FsYXJ5LCBvcmRlcmVkID0gVFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgIGxldmVscyA9IGMoImxvdyIsICJtZWRpdW0iLCAiaGlnaCIpKQoKanF1aXQgPC0ganF1aXQlPiUKICBtdXRhdGUocHJvbW90aW9uX2xhc3RfNXllYXJzID0gYXMuZmFjdG9yKHByb21vdGlvbl9sYXN0XzV5ZWFycykpCmBgYAojU3RhdGlzdGljYWwgTW9kZWwsIG9sb2dpdApgYGB7cn0Kei5vcmQgPC0gemVsaWcoc2FsYXJ5IH4gcHJvbW90aW9uX2xhc3RfNXllYXJzICsgdGltZV9zcGVuZF9jb21wYW55ICsgYXZlcmFnZV9tb250bHlfaG91cnMsIG1vZGVsID0gIm9sb2dpdCIsCiAgICAgICAgICAgICAgIGRhdGEgPSBqcXVpdCwgY2l0ZSA9IEYpCnN1bW1hcnkoei5vcmQpCmBgYAoKYGBge3J9Cngubm8gPC0gc2V0eCh6Lm9yZCwgcHJvbW90aW9uX2xhc3RfNXllYXJzID0gMCkKeC55ZXMgPC0gc2V0eCh6Lm9yZCwgcHJvbW90aW9uX2xhc3RfNXllYXJzID0gMSkKYGBgCgpgYGB7cn0Kcy5vcmQgPC0gc2ltKHoub3JkLCB4ID0geC5ubywgeDEgPSB4LnllcykKc3VtbWFyeShzLm9yZCkKYGBgCgpgYGB7cn0KZ3JhcGhpY3Mub2ZmKCkKIHBhcigibWFyIikKIHBhcihtYXI9YygxLDEsMSwxKSkKcGxvdChzLm9yZCkKYGBgCgoKYGBge3J9CmMxeCA8LSBzZXR4KHoub3JkLCBwcm9tb3Rpb25fbGFzdF81eWVhcnMgPSAiMCIsIGpxdWl0ICkKYzF4MSA8LSBzZXR4KHoub3JkLCBwcm9tb3Rpb25fbGFzdF81eWVhcnMgPSAiMSIsIGpxdWl0KQpjMXMgPC0gc2ltKHoub3JkLCB4ID0gYzF4LCB4MSA9IGMxeDEpCmdyYXBoaWNzLm9mZigpCiBwYXIoIm1hciIpCmBgYAoKCmBgYHtyfQogcGFyKG1hcj1jKDEsMSwxLDEpKQpwbG90KGMxcykKYGBgCgoKCgpgYGB7cn0KZDEgPC0gYzFzJGdldF9xaSh4dmFsdWU9IngxIiwgcWk9ImZkIikKZGZkIDwtIGFzLmRhdGEuZnJhbWUoY2JpbmQoZDEpKQpoZWFkKGRmZCkKCmBgYAoKCgojRGlmZmVyZW5jZSBiZXR3ZWVuIHRob3NlIHdobyBkaWQgYW5kIGRpZCBub3QgcmVjZWl2ZSBhIHByb21vIGluIGxhc3QgNSBtb250aHMgaW4gTGlrZWxpaG9vZCBvZiBiZWluZyBpbiBMb3cgaW5jb21lIGdyb3VwID0gbWVhbiA9ICgtMC4yNjgpIHVuaXRzCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKZ2dwbG90KGRmZCkrCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHg9bG93KSxjb2xvciA9ICJibGFjayIsIGZpbGwgPSAiYXF1YSBtYXJpbmUxIikKCmBgYAoKCiNEaWZmZXJlbmNlIGJldHdlZW4gdGhvc2Ugd2hvIGRpZCBhbmQgZGlkIG5vdCByZWNlaXZlIGEgcHJvbW8gaW4gbGFzdCA1IG1vbnRocyBpbiBMaWtlbGlob29kIG9mIGJlaW5nIGluIE1lZGl1bSBpbmNvbWUgZ3JvdXAgPSBtZWFuID0gKDAuMTI2KQpgYGB7cn0KZ2dwbG90KGRmZCkrCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHg9bWVkaXVtKSxjb2xvciA9ICJibGFjayIsIGZpbGwgPSAiYXF1YSBtYXJpbmUxIikKYGBgCgojRGlmZmVyZW5jZSBiZXR3ZWVuIHRob3NlIHdobyBkaWQgYW5kIGRpZCBub3QgcmVjZWl2ZSBhIHByb21vIGluIGxhc3QgNSBtb250aHMgaW4gTGlrZWxpaG9vZCBvZiBiZWluZyBpbiBIaWdoIGluY29tZSBncm91cCA9IG1lYW4gPSAoMC4xNTYpCmBgYHtyfQpnZ3Bsb3QoZGZkKSsKICBnZW9tX2hpc3RvZ3JhbShhZXMoeD1oaWdoKSxjb2xvciA9ICJibGFjayIsIGZpbGwgPSAiYXF1YSBtYXJpbmUxIikgCmBgYAoKCgoK