Load necessary libraries and datasets
library(readr)
library(dplyr)
library(plotly)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
Question 1
hr1 <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
t.test(hr1$average_montly_hours ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr1$average_montly_hours by hr1$Employee_Status
## t = 7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## 6.183384 10.534631
## sample estimates:
## mean in group Left mean in group Stayed
## 207.4192 199.0602
# Significant difference between averages, where employees that left work, worked at least 6 hours more
# Non-Technical: employees that left, on average, work more hours, at least 3% more.
Plot 1:
# Plot 1: Employee Status vs Average Monthy Hours
plot_ly(hr1,
x = ~Employee_Status,
y = ~average_montly_hours,
type = "box",
color = ~Employee_Status,
colors = c('maroon','turquoise')) %>%
layout(title = 'Employees that left on average, work more hours, at least 6 hours more')
Question 2
hr2 <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
t.test(hr2$last_evaluation ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr2$last_evaluation by hr1$Employee_Status
## t = 0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## -0.004493874 0.009772224
## sample estimates:
## mean in group Left mean in group Stayed
## 0.7181126 0.7154734
# Technical Interpretation: The high p-value tells us that that the difference is not significant.
# Non-technical Interpretation: Employees who left had different but not significant evaluation scores compared to those who stayed.
Plot 2:
# Plot 2: Employee Status vs Last Evaluation
plot_ly(hr2,
x = ~Employee_Status,
y = ~last_evaluation,
type = "box",
color = ~Employee_Status,
colors = c('gold','grey')) %>%
layout(title = '')
Question 3
hr3 <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
t.test(hr3$number_project ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr3$number_project by hr1$Employee_Status
## t = 2.1663, df = 4236.5, p-value = 0.03034
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## 0.006540119 0.131136535
## sample estimates:
## mean in group Left mean in group Stayed
## 3.855503 3.786664
# Technical interpretation: A small p-value and positive, significant relationship between employee status and number of projects worked.
# Non-technical interpretation: Employees who left generally worked on more projects than those who stayed.
Plot 3:
# Plot 3: Employee Status vs Number of Projects
plot_ly(hr3,
x = ~Employee_Status,
y = ~number_project,
type = "box",
color = ~Employee_Status,
colors = c('purple', 'blue')) %>%
layout(title = 'Employees who left generally worked on more projects')
Question 4
hr4 <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
t.test(hr4$satisfaction_level ~ hr1$Employee_Status)
##
## Welch Two Sample t-test
##
## data: hr4$satisfaction_level by hr1$Employee_Status
## t = -46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
## -0.2362417 -0.2171815
## sample estimates:
## mean in group Left mean in group Stayed
## 0.4400980 0.6668096
# Technical interpretation: The p-value indicates there’s a significant difference in satisfaction levels.
# Non-technical Interpretation: Employees who left tend to have lower satisfaction levels than those who stayed.
Plot 4:
# Plot 4: Number of Projects vs Average Monthly Hours
plot_ly(hr4,
x = ~Employee_Status,
y = ~satisfaction_level,
type = "box",
color = ~Employee_Status,
colors = c('green', 'magenta')) %>%
layout(title = 'Employees who left tend to have lower satisfaction levels')