library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Turn ‘left’ into a factor

hr$left <- factor(hr$left, levels = c(0, 1), labels = c("Stayed", "Left"))
str(hr$left)
##  Factor w/ 2 levels "Stayed","Left": 2 2 2 2 2 2 2 2 2 2 ...
levels(hr$left)
## [1] "Stayed" "Left"

1. Left vs Work Accident

hr$Work_accident <- factor(hr$Work_accident, levels = c(0, 1), labels = c("No Accident", "Accident"))
str(hr$Work_accident)
##  Factor w/ 2 levels "No Accident",..: 1 1 1 1 1 1 1 1 1 1 ...
levels(hr$Work_accident)
## [1] "No Accident" "Accident"
table_left_accident <- table(hr$left, hr$Work_accident)
table_left_accident
##         
##          No Accident Accident
##   Stayed        9428     2000
##   Left          3402      169
chisq_test_result1 <- chisq.test(table_left_accident)
print(chisq_test_result1)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_left_accident
## X-squared = 357.56, df = 1, p-value < 2.2e-16
  1. The p-value is very small, therefore the probability of these results being random is very small.

  2. There is not a connection with having a work accident and leaving the company

ggplot(hr, aes(x = Work_accident, fill = left)) +
  geom_bar(position = "fill") +
  labs(title = "Employee Leave Based on Accidents",
       y = "Proportion",
       x = "Accidents") +
  theme_minimal()

2. Left vs Department

table_left_department <- table(hr$left , hr$Department)
table_left_department
##         
##          accounting   hr   IT management marketing product_mng RandD sales
##   Stayed        563  524  954        539       655         704   666  3126
##   Left          204  215  273         91       203         198   121  1014
##         
##          support technical
##   Stayed    1674      2023
##   Left       555       697
chisq_test_result2 <- chisq.test(table_left_department)
print(chisq_test_result2)
## 
##  Pearson's Chi-squared test
## 
## data:  table_left_department
## X-squared = 86.825, df = 9, p-value = 7.042e-15
  1. The p-value is very small, therefore the probability of these results being random is very small.

  2. There is not a connection bwtween department and leaving the company

ggplot(hr, aes(x = Department, fill = left)) +
  geom_bar(position = "fill") +
  labs(title = "Employee Leave Based on Department",
       y = "Proportion",
       x = "Department") +
  theme_minimal()

3. Left vs Salary

table_left_salary <- table(hr$left , hr$salary)
table_left_salary
##         
##          high  low medium
##   Stayed 1155 5144   5129
##   Left     82 2172   1317
chisq_test_result3 <- chisq.test(table_left_salary)
chisq_test_result3
## 
##  Pearson's Chi-squared test
## 
## data:  table_left_salary
## X-squared = 381.23, df = 2, p-value < 2.2e-16
  1. The p-value is very small, therefore the probability of these results being random is very small.

  2. There is not a connection between salary and leaving the company.

ggplot(hr, aes(x = salary, fill = left)) +
  geom_bar(position = "fill") +
  labs(title = "Employee Leave Based on Salary",
       y = "Proportion",
       x = "Salary") +
  theme_minimal()

4. Left vs Number of Projects

table_left_numproject <- table(hr$left , hr$number_project)
table_left_numproject
##         
##             2    3    4    5    6    7
##   Stayed  821 3983 3956 2149  519    0
##   Left   1567   72  409  612  655  256
chisq_test_result4 <- chisq.test(table_left_numproject)
chisq_test_result4
## 
##  Pearson's Chi-squared test
## 
## data:  table_left_numproject
## X-squared = 5373.6, df = 5, p-value < 2.2e-16
  1. The p-value is very small, therefore the probability of these results being random is very small.

  2. There is not a connection between number of projects and leaving the company.

ggplot(hr, aes(x = number_project, fill = left)) +
  geom_bar(position = "fill") +
  labs(title = "Employee Leave Based on Number of Projects",
       y = "Proportion",
       x = "Number of Projects") +
  theme_minimal()