library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hr1 <- hr %>% 
  mutate(Employee_status = (ifelse(left == 0 , 'Stayed' , 'Left')))
t.test(hr1$average_montly_hours ~ hr1$Employee_status)
## 
##  Welch Two Sample t-test
## 
## data:  hr1$average_montly_hours by hr1$Employee_status
## t = 7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Left and group Stayed is not equal to 0
## 95 percent confidence interval:
##   6.183384 10.534631
## sample estimates:
##   mean in group Left mean in group Stayed 
##             207.4192             199.0602

The p-value is decently large, meaning there is a semi large difference in hours worked

The difference of hours worked for employees that left then stayed is significant, where the difference is at least 10.5 hours worked.

Employees that left worked more.

plot_ly(hr1 ,
        x = ~Employee_status ,
        y = ~average_montly_hours ,
        type = 'box' ,
        color = ~Employee_status ,
        colors = c('green' , 'blue'))%>%
  layout(title = 'employees that left, on average, work more hours, then employees that stayed')
t.test(hr1$satisfaction_level ~ hr1$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr1$satisfaction_level by hr1$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1 
##       0.6668096       0.4400980

The p-value is small, meaning there is a significant difference in the satisfaction level of employees who left vs employees that stayed

The employees that stayed on average is .23 more satisfied then employees who left

Employees that stayed are more satisfied

plot_ly(hr, 
        x = ~factor(left), 
        y = ~satisfaction_level, 
        type = 'box', 
        color = ~factor(left),             
        colors = c('lightblue', 'lightcoral')) %>%  
  layout(title = 'Employees who Left vs. Stayed have Different Satisfaction Levels',
         xaxis = list(title = 'Attrition Status (0 = Stayed, 1 = Left)'),
         yaxis = list(title = 'Satisfaction Level'))
t.test(hr1$time_spend_company ~ hr1$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr1$time_spend_company by hr1$left
## t = -22.631, df = 9625.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.5394767 -0.4534706
## sample estimates:
## mean in group 0 mean in group 1 
##        3.380032        3.876505

The p-value is small, meaning there is a significant difference in the amount of years spent at the company for employees that stayed vs left

The means for employees that left were at the company for .45 more years then employees that stayed

Employees that left the company worked there for longer

plot_ly(hr, 
        x = ~factor(left), 
        y = ~time_spend_company, 
        type = 'box', 
        color = ~factor(left),                
        colors = c('orange', 'red')) %>%  
  layout(title = 'Employees who Stayed Tend to Have More Time Spent at the Company',
         xaxis = list(title = 'Attrition Status (0 = Stayed, 1 = Left)'),
         yaxis = list(title = 'Time Spent at Company (Years)'))
t.test(hr1$last_evaluation ~ hr1$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr1$last_evaluation by hr1$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.009772224  0.004493874
## sample estimates:
## mean in group 0 mean in group 1 
##       0.7154734       0.7181126

The p-value is small, meaning a large difference in average last evaluation score for employees that stayed vs left

Employees that left were on average .46 lower then employees that stayed

Employees that left were much lower on their last evaluation score

plot_ly(hr, 
        x = ~factor(left), 
        y = ~last_evaluation, 
        type = 'box', 
        color = ~factor(left),  
        colors = c('lightgreen', 'yellow')) %>%  
  layout(title = 'Employees who Left have Lower Last Evaluation Scores',
         xaxis = list(title = 'Attrition Status (0 = Stayed, 1 = Left)'),
         yaxis = list(title = 'Last Evaluation Score'))