Load Dataset

library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.1.0
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Test 1

chisq.test(hr$Work_accident , hr$left)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$Work_accident and hr$left
## X-squared = 357.56, df = 1, p-value < 2.2e-16

The p-value is extremeley small, therefore the probabilty of these results being random is very small

There is a dependence between having a work accident and employees leaving the company

Employees who have a work accident are more likely to leave the company

#Graph

table(hr$Work_accident, hr$left)
##    
##        0    1
##   0 9428 3402
##   1 2000  169
prop_workacc <- hr %>%
  group_by(Work_accident) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left_prop = sum(left == 1) / n()
  )

prop_workacc <- prop_workacc %>%
  mutate(
    Work_accident = if_else(Work_accident == 1, "Had accident", "No accident")
  )

plot_ly(prop_workacc) %>%
  add_bars(x = ~Work_accident, y = ~stayed, name = "Stayed",
           marker = list(color = "#1f77b4")) %>%
  add_bars(x = ~Work_accident, y = ~left_prop, name = "Left",
           marker = list(color = "#ff7f0e")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Work accident"),
    yaxis = list(title = "Proportion of employees", tickformat = ",.0%"),
    title = "Employees who have a work accident are more likely to leave the company"
  )

Test 2

tab2 <- table(hr$promotion_last_5years, hr$left)
chisq.test(tab2)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tab2
## X-squared = 56.262, df = 1, p-value = 6.344e-14

The p-value (6.344e-14) is far below 0.01, so we reject the null hypothesis.

This means promotion history and leaving the company are statistically dependent.

Employees who have not been promoted in the last 5 years are more likely to leave the company.

Grapp 2

prop_promo <- hr %>%
  group_by(promotion_last_5years) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left_prop = sum(left == 1) / n()
  ) %>%
  mutate(
    promotion_last_5years = if_else(promotion_last_5years == 1,
                                    "Promoted",
                                    "No Promotion")
  )

plot_ly(prop_promo) %>%
  add_bars(x = ~promotion_last_5years, y = ~stayed, name = "Stayed",
           marker = list(color = "#1f77b4")) %>%
  add_bars(x = ~promotion_last_5years, y = ~left_prop, name = "Left",
           marker = list(color = "#ff7f0e")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Promotion Status"),
    yaxis = list(title = "Proportion of Employees", tickformat = ",.0%"),
    title = "Employees without recent promotions are more likely to leave the company"
  )

Test 3

chisq.test(table(hr$salary, hr$left))
## 
##  Pearson's Chi-squared test
## 
## data:  table(hr$salary, hr$left)
## X-squared = 381.23, df = 2, p-value < 2.2e-16

The p-value is far below 0.01, so we reject the null hypothesis.

This means salary level and leaving the company are statistically dependent.

Employees with lower salaries are more likely to leave the company.

Graph

prop_salary <- hr %>%
  group_by(salary) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left_prop = sum(left == 1) / n()
  ) %>%
  mutate(
    salary = factor(salary,
                    levels = c("low", "medium", "high"),
                    labels = c("Low", "Medium", "High"))
  )

plot_ly(prop_salary) %>%
  add_bars(x = ~salary, y = ~stayed, name = "Stayed",
           marker = list(color = "#1f77b4")) %>%
  add_bars(x = ~salary, y = ~left_prop, name = "Left",
           marker = list(color = "#ff7f0e")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Salary Level"),
    yaxis = list(title = "Proportion of Employees", tickformat = ",.0%"),
    title = "Employees with lower salaries are more likely to leave the company"
  )

Test 4

tab4 <- table(hr$Department, hr$left)
chisq.test(tab4)
## 
##  Pearson's Chi-squared test
## 
## data:  tab4
## X-squared = 86.825, df = 9, p-value = 7.042e-15

The p-value is far below 0.01, so we reject the null hypothesis.

This means department and leaving the company are statistically dependent.

Employee turnover rates differ across departments.

#Graph 4

prop_dept <- hr %>%
  group_by(Department) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left_prop = sum(left == 1) / n()
  )


plot_ly(prop_dept) %>%
  add_bars(x = ~Department, y = ~stayed, name = "Stayed",
           marker = list(color = "#1f77b4")) %>%
  add_bars(x = ~Department, y = ~left_prop, name = "Left",
           marker = list(color = "#ff7f0e")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Proportion of Employees", tickformat = ",.0%"),
    title = "Employee turnover rates differ across departments"
  )