library(Zelig)
library(ZeligChoice)
library(faraway)
library(dplyr)
library(tidyr)
library(survival)
library(radiant.data)
library(readr)
jquit <- read_csv("/Users/Cruz/Desktop/jquit.csv", col_names = TRUE)
Parsed with column specification:
cols(
satisfaction_level = col_double(),
last_evaluation = col_double(),
number_project = col_integer(),
average_montly_hours = col_integer(),
time_spend_company = col_integer(),
Work_accident = col_integer(),
left = col_integer(),
promotion_last_5years = col_integer(),
sales = col_character(),
salary = col_character()
)
head(jquit)
jquit<-na.omit(jquit)
head(jquit)
jquit <- jquit%>%
mutate(salary = as.factor(salary))
(l <- sapply(jquit, function(x) is.factor(x)))
satisfaction_level last_evaluation number_project
FALSE FALSE FALSE
average_montly_hours time_spend_company Work_accident
FALSE FALSE FALSE
left promotion_last_5years sales
FALSE FALSE FALSE
salary
TRUE
xtabs(~left, jquit)
left
0 1
11428 3571
glm.model <- glm(left ~ ., data = jquit, family = "binomial")
summary(glm.model)
Call:
glm(formula = left ~ ., family = "binomial", data = jquit)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2248 -0.6645 -0.4026 -0.1177 3.0688
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.4677765 0.1495486 3.128 0.00176 **
satisfaction_level -4.1356889 0.0980538 -42.178 < 0.0000000000000002 ***
last_evaluation 0.7309032 0.1491787 4.900 0.0000009607 ***
number_project -0.3150787 0.0213248 -14.775 < 0.0000000000000002 ***
average_montly_hours 0.0044603 0.0005161 8.643 < 0.0000000000000002 ***
time_spend_company 0.2677537 0.0155736 17.193 < 0.0000000000000002 ***
Work_accident -1.5298283 0.0895473 -17.084 < 0.0000000000000002 ***
promotion_last_5years -1.4301364 0.2574958 -5.554 0.0000000279 ***
saleshr 0.2323779 0.1313084 1.770 0.07678 .
salesIT -0.1807179 0.1221276 -1.480 0.13894
salesmanagement -0.4484236 0.1598254 -2.806 0.00502 **
salesmarketing -0.0120882 0.1319304 -0.092 0.92700
salesproduct_mng -0.1532529 0.1301538 -1.177 0.23901
salesRandD -0.5823659 0.1448848 -4.020 0.0000583195 ***
salessales -0.0387859 0.1024006 -0.379 0.70486
salessupport 0.0500251 0.1092834 0.458 0.64713
salestechnical 0.0701464 0.1065379 0.658 0.51027
salary2 -0.5308384 0.0456886 -11.619 < 0.0000000000000002 ***
salary3 -1.9440627 0.1286272 -15.114 < 0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 16465 on 14998 degrees of freedom
Residual deviance: 12850 on 14980 degrees of freedom
AIC: 12888
Number of Fisher Scoring iterations: 5
library(sjmisc)
jquit$salary <- recode(jquit$salary, "low" =1, "medium" =2, "high" =3)
head(jquit)
z.multi <- zelig(as.factor(left) ~ salary + satisfaction_level + average_montly_hours, model = "mlogit", data = jquit, cite = F)
summary(z.multi)
Model:
Call:
z5$zelig(formula = as.factor(left) ~ salary + satisfaction_level +
average_montly_hours, data = jquit)
Pearson residuals:
Min 1Q Median 3Q Max
log(mu[,1]/mu[,2]) -6.884 0.14 0.3496 0.5258 1.663
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.7939062 0.0997076 -7.962 0.00000000000000169
salary2 0.4985851 0.0438951 11.359 < 0.0000000000000002
salary3 1.8287732 0.1218011 15.014 < 0.0000000000000002
satisfaction_level 3.8058604 0.0887482 42.884 < 0.0000000000000002
average_montly_hours -0.0023689 0.0004092 -5.790 0.00000000705791948
Number of linear predictors: 1
Name of linear predictor: log(mu[,1]/mu[,2])
Residual deviance: 13777.33 on 14994 degrees of freedom
Log-likelihood: -6888.664 on 14994 degrees of freedom
Number of iterations: 6
Reference group is level 2 of the response
Next step: Use 'setx' method
x.low <- setx(z.multi, salary = "1")
x.high <- setx(z.multi, salary = "3")
s.multi <- sim(z.multi, x = x.high, x1 = x.low)
summary(s.multi)
sim x :
-----
ev
mean sd 50% 2.5% 97.5%
Pr(Y=0) 0.94668994 0.005853851 0.94691403 0.93417394 0.95801524
Pr(Y=1) 0.05331006 0.005853851 0.05308597 0.04198476 0.06582606
pv
1 2
[1,] 0.945 0.055
sim x1 :
-----
ev
mean sd 50% 2.5% 97.5%
Pr(Y=0) 0.7432765 0.005565909 0.7433088 0.7322855 0.7539784
Pr(Y=1) 0.2567235 0.005565909 0.2566912 0.2460216 0.2677145
pv
1 2
[1,] 0.76 0.24
fd
mean sd 50% 2.5% 97.5%
Pr(Y=0) -0.2034134 0.007834403 -0.2036135 -0.2178998 -0.1879264
Pr(Y=1) 0.2034134 0.007834403 0.2036135 0.1879264 0.2178998
graphics.off()
par("mar")
[1] 5.1 4.1 2.1 2.1
par(mar=c(1,1,1,1))
plot(s.multi)

ggplot(jquit, aes(x = salary, y = satisfaction_level)) + geom_jitter(alpha = 0.5)

ggplot(jquit, aes(x = average_montly_hours, y = satisfaction_level)) + geom_jitter(alpha = 0.5)

ggplot(jquit, aes(x = satisfaction_level, y = left)) + geom_jitter(alpha = 0.5)

ggplot(jquit, aes(x = average_montly_hours, y = left)) + geom_jitter(alpha = 0.5)

ggplot(jquit, aes(x =left , y = salary , color= "orange" )) + geom_jitter(alpha = 0.5)

LS0tCnRpdGxlOiAiQ3J1el9NQTcxMl9IVzdfSFJBIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShaZWxpZykKbGlicmFyeShaZWxpZ0Nob2ljZSkKbGlicmFyeShmYXJhd2F5KQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHN1cnZpdmFsKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHJhZGlhbnQuZGF0YSkKbGlicmFyeShyZWFkcikKICBqcXVpdCA8LSByZWFkX2NzdigiL1VzZXJzL0NydXovRGVza3RvcC9qcXVpdC5jc3YiLCBjb2xfbmFtZXMgPSBUUlVFKQogaGVhZChqcXVpdCkKYGBgCgoKYGBge3J9CmpxdWl0PC1uYS5vbWl0KGpxdWl0KQpoZWFkKGpxdWl0KQpgYGAKCmBgYHtyfQoKanF1aXQgPC0ganF1aXQlPiUKICBtdXRhdGUoc2FsYXJ5ID0gYXMuZmFjdG9yKHNhbGFyeSkpCgoobCA8LSBzYXBwbHkoanF1aXQsIGZ1bmN0aW9uKHgpIGlzLmZhY3Rvcih4KSkpCmBgYApgYGB7cn0KeHRhYnMofmxlZnQsIGpxdWl0KQpgYGAKCgpgYGB7cn0KZ2xtLm1vZGVsIDwtIGdsbShsZWZ0IH4gLiwgZGF0YSA9IGpxdWl0LCBmYW1pbHkgPSAiYmlub21pYWwiKQpzdW1tYXJ5KGdsbS5tb2RlbCkKCmBgYAoKCmBgYHtyfQpsaWJyYXJ5KHNqbWlzYykKanF1aXQkc2FsYXJ5IDwtIHJlY29kZShqcXVpdCRzYWxhcnksICJsb3ciID0xLCAibWVkaXVtIiA9MiwgImhpZ2giID0zKQoKaGVhZChqcXVpdCkKYGBgCgoKCmBgYHtyfQp6Lm11bHRpIDwtIHplbGlnKGFzLmZhY3RvcihsZWZ0KSB+IHNhbGFyeSArIHNhdGlzZmFjdGlvbl9sZXZlbCArIGF2ZXJhZ2VfbW9udGx5X2hvdXJzLCBtb2RlbCA9ICJtbG9naXQiLCBkYXRhID0ganF1aXQsIGNpdGUgPSBGKQpzdW1tYXJ5KHoubXVsdGkpCmBgYAoKCmBgYHtyfQp4LmxvdyA8LSBzZXR4KHoubXVsdGksIHNhbGFyeSA9ICIxIikKeC5oaWdoIDwtIHNldHgoei5tdWx0aSwgc2FsYXJ5ID0gIjMiKQpgYGAKCmBgYHtyfQpzLm11bHRpIDwtIHNpbSh6Lm11bHRpLCB4ID0geC5oaWdoLCB4MSA9IHgubG93KQpzdW1tYXJ5KHMubXVsdGkpCmBgYAoKCmBgYHtyfQpncmFwaGljcy5vZmYoKQogcGFyKCJtYXIiKQogcGFyKG1hcj1jKDEsMSwxLDEpKQpwbG90KHMubXVsdGkpCmBgYAoKCgpgYGB7cn0KZ2dwbG90KGpxdWl0LCBhZXMoeCA9IHNhbGFyeSwgeSA9IHNhdGlzZmFjdGlvbl9sZXZlbCkpICsgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUpCmBgYAoKYGBge3J9CmdncGxvdChqcXVpdCwgYWVzKHggPSBhdmVyYWdlX21vbnRseV9ob3VycywgeSA9IHNhdGlzZmFjdGlvbl9sZXZlbCkpICsgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUpCmBgYApgYGB7cn0KZ2dwbG90KGpxdWl0LCBhZXMoeCA9IHNhdGlzZmFjdGlvbl9sZXZlbCwgeSA9IGxlZnQpKSArIGdlb21faml0dGVyKGFscGhhID0gMC41KQpgYGAKCmBgYHtyfQpnZ3Bsb3QoanF1aXQsIGFlcyh4ID0gYXZlcmFnZV9tb250bHlfaG91cnMsIHkgPSBsZWZ0KSkgKyBnZW9tX2ppdHRlcihhbHBoYSA9IDAuNSkKYGBgCgoKYGBge3J9CmdncGxvdChqcXVpdCwgYWVzKHggPWxlZnQgLCB5ID0gc2FsYXJ5ICwgY29sb3I9ICJvcmFuZ2UiICkpICsgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUpCmBgYAoK