library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Test 1

q1 <- chisq.test(hr$Department , hr$left)

P-value interpretation: The low p-value tells us there is a relation between deparment and employee attrition.

Non-technical interpretation: The department a employee works in influences the chance of them leaving the company.

prop_q1 <- hr %>%
  group_by(Department, left) %>%
  summarise(count = n()) %>%
  mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
ggplot(prop_q1, aes(x = Department, y = prop, fill = as.factor(left))) +
  geom_bar(stat = "identity", position = "fill") +
  labs(title = "The department a employee works in influences the chance of them leaving the company.",
       x = "Department", y = "Proportion") +
  scale_fill_manual(name = "Left", labels = c("Stayed", "Left"), values = c("blue", "red")) +
  theme_minimal()

Test 2

q2 <- chisq.test(table(hr$salary, hr$left))

P-value interpretation: The low p-value tells us there is a relation between deparment and employee attrition.

Non-technical interpretation: The salary a employee recieves influences the chance of them leaving the company.

prop_q2 <- hr %>%
  group_by(salary, left) %>%
  summarise(count = n()) %>%
  mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'salary'. You can override using the
## `.groups` argument.
ggplot(prop_q2, aes(x = salary, y = prop, fill = as.factor(left))) +
  geom_bar(stat = "identity", position = "fill") +
  labs(title = "The salary a employee recieves influences the chance of them leaving the company.",
       x = "Salary Level", y = "Proportion") +
  scale_fill_manual(name = "Left", labels = c("Stayed", "Left"), values = c("blue", "red")) +
  theme_minimal()

Test 3

q3 <- chisq.test(table(hr$promotion_last_5years, hr$left))

P-value interpretation: The low p-value tells us there is a relation between promotion history and employee attrition.

Non-technical interpretation: If an employee recieves a promotion it influences the chance of them leaving the company.

prop_q3 <- hr %>%
  group_by(promotion_last_5years, left) %>%
  summarise(count = n()) %>%
  mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'promotion_last_5years'. You can override
## using the `.groups` argument.
ggplot(prop_q3, aes(x = promotion_last_5years, y = prop, fill = as.factor(left))) +
  geom_bar(stat = "identity", position = "fill") +
  labs(title = "If an employee recieves a promotion it influences the chance of them leaving the company.",
       x = "Promotion History", y = "Proportion") +
  scale_fill_manual(name = "Left", labels = c("Stayed", "Left"), values = c("blue", "red")) +
  theme_minimal()

Test 4

q4 <- chisq.test(table(hr$Work_accident, hr$left))

P-value interpretation: The low p-value tells us there is a relation between work accident history and employee attrition.

Non-technical interpretation: If an employee experiences a work accident it influences the chance of them leaving the company.

prop_q4 <- hr %>%
  group_by(Work_accident, left) %>%
  summarise(count = n()) %>%
  mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'Work_accident'. You can override using the
## `.groups` argument.
ggplot(prop_q4, aes(x = Work_accident, y = prop, fill = as.factor(left))) +
  geom_bar(stat = "identity", position = "fill") +
  labs(title = "If an employee experiences a work accident it influences the chance of them leaving the company.",
       x = "Work Accident", y = "Proportion") +
  scale_fill_manual(name = "Left", labels = c("Stayed", "Left"), values = c("blue", "red")) +
  theme_minimal()