Load necessary libraries and datasets

library(readr)
library(dplyr)
library(plotly)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

Question 1

hr1 <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

t.test(hr1$average_montly_hours ~ hr1$Employee_Status)
## 
##  Welch Two Sample t-test
## 
## data:  hr1$average_montly_hours by hr1$Employee_Status
## t = 7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
##   6.183384 10.534631
## sample estimates:
##   mean in group Left mean in group Stayed 
##             207.4192             199.0602
# Significant difference between averages, where employees that left work, worked at least 6 hours more
# Non-Technical: employees that left, on average, work more hours, at least 3% more.

Plot 1:

# Plot 1: Employee Status vs Average Monthy Hours
plot_ly(hr1,
        x = ~Employee_Status,
        y = ~average_montly_hours,
        type = "box",
        color = ~Employee_Status,
        colors = c('maroon','turquoise')) %>%
  layout(title = 'Employees that left on average, work more hours, at least 6 hours more')

Question 2

hr2 <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

t.test(hr2$last_evaluation ~ hr1$Employee_Status)
## 
##  Welch Two Sample t-test
## 
## data:  hr2$last_evaluation by hr1$Employee_Status
## t = 0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
##  -0.004493874  0.009772224
## sample estimates:
##   mean in group Left mean in group Stayed 
##            0.7181126            0.7154734
# Technical Interpretation: The high p-value tells us that that the difference is not significant.
# Non-technical Interpretation: Employees who left had different but not significant evaluation scores compared to those who stayed.

Plot 2:

# Plot 2: Employee Status vs Last Evaluation
plot_ly(hr2,
        x = ~Employee_Status,
        y = ~last_evaluation,
        type = "box",
        color = ~Employee_Status,
        colors = c('gold','grey')) %>%
  layout(title = '')

Question 3

hr3 <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

t.test(hr3$number_project ~ hr1$Employee_Status)
## 
##  Welch Two Sample t-test
## 
## data:  hr3$number_project by hr1$Employee_Status
## t = 2.1663, df = 4236.5, p-value = 0.03034
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
##  0.006540119 0.131136535
## sample estimates:
##   mean in group Left mean in group Stayed 
##             3.855503             3.786664
# Technical interpretation: A small p-value and positive, significant relationship between employee status and number of projects worked.

# Non-technical interpretation: Employees who left generally worked on more projects than those who stayed.

Plot 3:

# Plot 3: Employee Status vs Number of Projects
plot_ly(hr3,
        x = ~Employee_Status,
        y = ~number_project,
        type = "box",
        color = ~Employee_Status,
        colors = c('purple', 'blue')) %>%
  layout(title = 'Employees who left generally worked on more projects')

Question 4

hr4 <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

t.test(hr4$satisfaction_level ~ hr1$Employee_Status)
## 
##  Welch Two Sample t-test
## 
## data:  hr4$satisfaction_level by hr1$Employee_Status
## t = -46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
##  -0.2362417 -0.2171815
## sample estimates:
##   mean in group Left mean in group Stayed 
##            0.4400980            0.6668096
# Technical interpretation: The p-value indicates there’s a significant difference in satisfaction levels.
# Non-technical Interpretation: Employees who left tend to have lower satisfaction levels than those who stayed.

Plot 4:

# Plot 4: Number of Projects vs Average Monthly Hours
plot_ly(hr4,
        x = ~Employee_Status,
        y = ~satisfaction_level,
        type = "box",
        color = ~Employee_Status,
        colors = c('green', 'magenta')) %>%
  layout(title = 'Employees who left tend to have lower satisfaction levels')