library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hr$left <- factor(hr$left, levels = c(0, 1), labels = c("Stayed", "Left"))

**1. Satisfaction Level vs Left*

t.test(hr$satisfaction_level ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Stayed and group Left is not equal to 0
## 95 percent confidence interval:
##  0.2171815 0.2362417
## sample estimates:
## mean in group Stayed   mean in group Left 
##            0.6668096            0.4400980

The p-value is small, therefore the difference between means of satisfaction level by left is significant.

The difference in mean satisfaction level between staying and leaving is significant, where the difference is .28.

Employees who stayed had a higher satisfaction level.

plot_ly(hr , 
        x = ~left ,
        y = ~satisfaction_level ,
        type = 'box')

2. Average Monthly Hours vs Left

t.test(hr$average_montly_hours ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Stayed and group Left is not equal to 0
## 95 percent confidence interval:
##  -10.534631  -6.183384
## sample estimates:
## mean in group Stayed   mean in group Left 
##             199.0602             207.4192

The p-value is small, therefore the difference between means of average monthly hours and left is significant

The difference in mean average monthly hours between staying and leaving is significant, where the difference is 26.

Employees who stayed had lower average monthly hours .

plot_ly(hr , 
        x = ~left ,
        y = ~average_montly_hours ,
        type = 'box')

3. Last Evaluation vs Left

t.test(hr$last_evaluation ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$last_evaluation by hr$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group Stayed and group Left is not equal to 0
## 95 percent confidence interval:
##  -0.009772224  0.004493874
## sample estimates:
## mean in group Stayed   mean in group Left 
##            0.7154734            0.7181126

The p-value is high, therefore the difference between means of last evaluation and left is not significant

The difference in mean last evaluation between staying and leaving is not significant, where the difference is .08.

Last evaluation scores had little correlation with staying and leaving .

plot_ly(hr , 
        x = ~left ,
        y = ~last_evaluation ,
        type = 'box')

4. Time Spend Company vs Left

t.test(hr$time_spend_company ~ hr$left)
## 
##  Welch Two Sample t-test
## 
## data:  hr$time_spend_company by hr$left
## t = -22.631, df = 9625.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group Stayed and group Left is not equal to 0
## 95 percent confidence interval:
##  -0.5394767 -0.4534706
## sample estimates:
## mean in group Stayed   mean in group Left 
##             3.380032             3.876505

The p-value is small, therefore the difference between mean time spend company and left is significant

The difference in mean time spend company between staying and leaving is significant, where the difference is 1.

Employees who stayed had lower time spend company .

plot_ly(hr , 
        x = ~left ,
        y = ~time_spend_company ,
        type = 'box')