library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hr1 <- hr %>%
mutate(Employee_Status = ifelse(left == 0 , 'Stayed' , 'Left'))
t.test(hr1$average_montly_hours ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr1$average_montly_hours by hr1$Employee_Status
## t = 7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## 6.183384 10.534631
## sample estimates:
## mean in group Left mean in group Stayed
## 207.4192 199.0602
There is a significant difference between means, where employees
that left work at least 6 hours more.
Descriptive: employees that left, on average, work
more hours, at least 3% more.
Prescriptive: To reduce employee attrition, average
monthly hours can be reduced
by 3%, for those that work longer hours.
plot_ly(hr1,
x= ~Employee_Status ,
y = ~average_montly_hours ,
type = 'box' ,
color = ~Employee_Status ,
colors = c('#1e9b20' , 'blue')
)%>%
layout(title = 'employees that left, on average, work more hours, at least 3% more')
hr1 <- hr %>%
mutate(Employee_Status = ifelse(left == 0 , 'Stayed' , 'Left'))
t.test(hr1$satisfaction_level ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr1$satisfaction_level by hr1$Employee_Status
## t = -46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## -0.2362417 -0.2171815
## sample estimates:
## mean in group Left mean in group Stayed
## 0.4400980 0.6668096
plot_ly(hr1,
x = ~Employee_Status,
y = ~satisfaction_level,
type = 'box',
color = ~Employee_Status,
colors = c('#1e9b20', 'blue')
) %>%
layout(title = 'Employees that left, on average, have lower satisfaction levels')
Employees who left the company had significantly lower satisfaction
levels and higher last evaluation scores compared to those who
stayed.
Descriptive: Employees who were less satisfied or
those with high performance scores were # more likely to leave.
Prescriptive: To reduce turnover, focus on
improving job satisfaction for all employees
Employees who left the company had significantly different last
evaluation scores compared to those who stayed.
Descriptive: Employees who left generally had different evaluation
scores, indicating that performance may have influenced their decision
to leave.
Employees who left the company had significantly different numbers
of projects compared to those who stayed.
Descriptive: Employees who left, on average, had a different number
of projects compared to employees who stayed.
Prescriptive: To reduce turnover, consider managing the workload by
adjusting the number of projects for employees, particularly for those
who may be overwhelmed.