hr1 <- hr %>%
mutate(left = ifelse(left == 1 , 'Left' , 'Stayed'))
t.test(hr$satisfaction_level ~ hr$left,
conf.level = 0.999)
##
## Welch Two Sample t-test
##
## data: hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 99.9 percent confidence interval:
## 0.2107063 0.2427168
## sample estimates:
## mean in group 0 mean in group 1
## 0.6668096 0.4400980
plot_ly(hr %>%
mutate(Status = ifelse(left == 0, "Stayed", "Left")),
x = ~Status,
y = ~satisfaction_level,
type = "box") %>%
layout(title = "Employees who stayed are on average more satisfied",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Satisfaction Level"))
hr2 <- hr %>%
mutate(left = ifelse(left == 1 , 'Left' , 'Stayed'))
t.test(hr$average_montly_hours ~ hr$left,
conf.level = 0.999)
##
## Welch Two Sample t-test
##
## data: hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 99.9 percent confidence interval:
## -12.012907 -4.705107
## sample estimates:
## mean in group 0 mean in group 1
## 199.0602 207.4192
plot_ly(hr %>%
mutate(Status = ifelse(left == 0, "Stayed", "Left")),
x = ~Status,
y = ~average_montly_hours,
type = "box",
boxmean = TRUE) %>%
layout(title = "Employees who left worked more hours on average",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Average Monthly Hours"))
###There is a difference between the average monthly hours of employees that left and those that stayed. Employees that left worked more hours on average.
hr3 <- hr %>%
filter(salary != "medium") %>% # keep only low vs high for clean t-test
mutate(Salary = ifelse(salary == "low", "Low Salary", "High Salary"))
t.test(number_project ~ Salary,
data = hr3,
conf.level = 0.999)
##
## Welch Two Sample t-test
##
## data: number_project by Salary
## t = -0.95118, df = 1866, p-value = 0.3416
## alternative hypothesis: true difference in means between group High Salary and group Low Salary is not equal to 0
## 99.9 percent confidence interval:
## -0.14605586 0.08063187
## sample estimates:
## mean in group High Salary mean in group Low Salary
## 3.767179 3.799891
plot_ly(hr3,
x = ~Salary,
y = ~number_project,
type = "box",
boxmean = TRUE) %>%
layout(title = "Employees with lower salaries handled more projects",
xaxis = list(title = "Salary Level"),
yaxis = list(title = "Number of Projects"))
hr4 <- hr %>%
mutate(promotion = ifelse(promotion_last_5years == 1 , 'Promoted' , 'Not Promoted'))
t.test(hr$time_spend_company ~ hr$promotion_last_5years,
conf.level = 0.999)
##
## Welch Two Sample t-test
##
## data: hr$time_spend_company by hr$promotion_last_5years
## t = -5.6111, df = 324.14, p-value = 4.316e-08
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 99.9 percent confidence interval:
## -1.0863008 -0.2785489
## sample estimates:
## mean in group 0 mean in group 1
## 3.483719 4.166144
plot_ly(hr %>%
mutate(Status = ifelse(promotion_last_5years == 1, "Promoted", "Not Promoted")),
x = ~Status,
y = ~time_spend_company,
type = "box",
boxmean = TRUE,
boxpoints = "outliers") %>%
layout(title = "Employees who were promoted stayed longer at the company",
xaxis = list(title = "Promotion Status"),
yaxis = list(title = "Time Spent at Company"))