library(readr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(hr)
## # A tibble: 6 × 10
##   satisfaction_level last_evaluation number_project average_montly_hours
##                <dbl>           <dbl>          <dbl>                <dbl>
## 1               0.38            0.53              2                  157
## 2               0.8             0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.5               2                  153
## # ℹ 6 more variables: time_spend_company <dbl>, Work_accident <dbl>,
## #   left <dbl>, promotion_last_5years <dbl>, Department <chr>, salary <chr>

1.

t_test_satisfaction <- t.test(satisfaction_level ~ left, data = hr)
t_test_satisfaction
## 
##  Welch Two Sample t-test
## 
## data:  satisfaction_level by left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1 
##       0.6668096       0.4400980

significance of p-value and technical terms: The p-value is less than 0.01, meaning there is a difference in satisfaction levels between employees who left the company and those who stayed.

The median satisfaction level for employees who left (0.41) is lower than that for employees who stayed (0.69), so employees who left the company were less satisfied than those who stayed.

non-technical terms: Employees who left were less satisfied

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_data <- hr %>%
  mutate(Left_Status = as.factor(ifelse(left == 0, 'Stayed', 'Left')))
plot_ly(plot_data, 
        x = ~Left_Status,
        y = ~satisfaction_level, 
        type = 'box') %>%
  layout(
    title = "Employees who left were less satisfied",
    xaxis = list(title = "Employee Status"),
    yaxis = list(title = "Satisfaction Level") 
  )

2.

t_test_evaluation <- t.test(last_evaluation ~ left, data = hr)
t_test_evaluation
## 
##  Welch Two Sample t-test
## 
## data:  last_evaluation by left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.009772224  0.004493874
## sample estimates:
## mean in group 0 mean in group 1 
##       0.7154734       0.7181126

p-value significance and technical terms: The p-value is greater than 0.01, meaning there is no difference between the last evaluation score between employees who left the company and those who stayed.

The two variables are not statistically different.

library(dplyr)
library(plotly)
plot_data <- hr %>%
  mutate(Left_Status = as.factor(ifelse(left == 0, 'Stayed', 'Left')))
plot_ly(plot_data, 
        x = ~Left_Status,
        y = ~last_evaluation, 
        type = 'box') %>%
  layout(
    title = "XXX",
    xaxis = list(title = "Employee Status"),
    yaxis = list(title = "Last Evaluation Score")
  )

3.

t_test_hours <- t.test(average_montly_hours ~ left, data = hr)
t_test_hours
## 
##  Welch Two Sample t-test
## 
## data:  average_montly_hours by left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -10.534631  -6.183384
## sample estimates:
## mean in group 0 mean in group 1 
##        199.0602        207.4192

p-value significance and technical terms: The p-value is less than 0.01, meaning there is a difference in the average monthly hours worked at the company between employees who left the company and those who stayed.

The median average monthly hours for employees who left (224) is higher than that for employees who stayed (198), so employees who left the company worked more hours in a month on average than those who stayed.

non-technical terms: Employees who left worked more hours

library(dplyr)
library(plotly)
plot_data <- hr %>%
  mutate(Left_Status = as.factor(ifelse(left == 0, 'Stayed', 'Left')))
plot_ly(plot_data, 
        x = ~Left_Status,
        y = ~average_montly_hours, 
        type = 'box') %>%
  layout(
    title = "Employees who left worked more hours",
    xaxis = list(title = "Employee Status"),
    yaxis = list(title = "Average Monthly Hours") 
  )

4.

t_test_time <- t.test(time_spend_company ~ left, data = hr)
t_test_time
## 
##  Welch Two Sample t-test
## 
## data:  time_spend_company by left
## t = -22.631, df = 9625.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.5394767 -0.4534706
## sample estimates:
## mean in group 0 mean in group 1 
##        3.380032        3.876505

p-value significance and technical terms: The p-value is less than 0.01, meaning there is a difference in the time spent at the company between employees who left the company and those who stayed.

The median time spent at the company for employees who left (4) is higher than that for employees who stayed (3), so employees who left spent more time at the company than those who stayed.

non-technical terms: Employees who left spent more time at the company

library(dplyr)
library(plotly)
plot_data <- hr %>%
  mutate(Left_Status = as.factor(ifelse(left == 0, 'Stayed', 'Left')))
plot_ly(plot_data, 
        x = ~Left_Status,
        y = ~time_spend_company,
        type = 'box') %>%
  layout(
    title = "Employees who left spent more time at the company",
    xaxis = list(title = "Employee Status"),
    yaxis = list(title = "Time Spent at Company") 
  )