hr1 <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'),
WorkAccidents = ifelse(Work_accident == 0, 'none', 'yes'))
1a.
t.test(hr1$average_montly_hours ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr1$average_montly_hours by hr1$Employee_Status
## t = 7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## 6.183384 10.534631
## sample estimates:
## mean in group Left mean in group Stayed
## 207.4192 199.0602
1b.
1c.
1d.
plot_ly(hr1 ,
x = ~Employee_Status ,
y = ~average_montly_hours ,
type = 'box',
color = ~Employee_Status,
colors= c('#29a21a','blue')
) %>%
layout(title = 'employees that left on average, work more hours, at least 3% more',
yaxis = list(title = 'Average Monthly Hours', range = c(0,350)),
xaxis = list(title = 'Employee Status'))
2a.
t.test(hr1$average_montly_hours ~ hr1$promotion_last_5years)
##
## Welch Two Sample t-test
##
## data: hr1$average_montly_hours by hr1$promotion_last_5years
## t = 0.44937, df = 333.03, p-value = 0.6535
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -4.143788 6.597589
## sample estimates:
## mean in group 0 mean in group 1
## 201.0764 199.8495
2b.
2c.
2d.
plot_ly(hr1 ,
x = ~promotion_last_5years ,
y = ~average_montly_hours ,
type = 'box',
color = ~as.factor(promotion_last_5years),
colors= c('#ff7c00','#00e2ca')
) %>%
layout(title = 'Working more hours per month does not increase the chances of receiving a promotion ',
yaxis = list(title = 'Average Monthly Hours', range = c(0,350)),
xaxis = list(title = 'Promotion in Last 5 Years'))
3a.
dept_data <- hr1 %>%
filter(Department %in% c("marketing", "IT"))
t.test(satisfaction_level ~ Department, data = dept_data)
##
## Welch Two Sample t-test
##
## data: satisfaction_level by Department
## t = -0.041877, df = 1870.2, p-value = 0.9666
## alternative hypothesis: true difference in means between group IT and group marketing is not equal to 0
## 95 percent confidence interval:
## -0.02198374 0.02106456
## sample estimates:
## mean in group IT mean in group marketing
## 0.6181418 0.6186014
3b.
3c.
3d.
plot_ly(dept_data,
x = ~Department,
y = ~satisfaction_level,
type = 'box',
color = ~Department,
colors = c('purple','green')) %>%
layout(title = 'Working in IT or Marketing will result is same satisfaction level',
yaxis = list(title = 'Satisfaction Level', range = c(0,1.2)),
xaxis = list(title = 'Department'))
4a.
t.test( hr1$time_spend_company ~ hr1$Work_accident)
##
## Welch Two Sample t-test
##
## data: hr1$time_spend_company by hr1$Work_accident
## t = -0.23359, df = 2738.4, p-value = 0.8153
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.08269677 0.06509122
## sample estimates:
## mean in group 0 mean in group 1
## 3.496960 3.505763
4b.
4c.
4d.
plot_ly(hr1,
x = ~WorkAccidents,
y = ~time_spend_company,
type = 'box',
color = ~WorkAccidents,
colors = c('red','blue')) %>%
layout(title = 'Time at the company isn’t affected by work accidents',
yaxis = list(title = 'Time Spent at Company'),
xaxis = list(title = 'Work Accidents'))