library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chi-square Test 1: left vs salary

table_salary <- table(hr$left, hr$salary)
chisq_salary <- chisq.test(table_salary)
chisq_salary
## 
##  Pearson's Chi-squared test
## 
## data:  table_salary
## X-squared = 381.23, df = 2, p-value < 2.2e-16
library(dplyr)
salary_proportions <- as.data.frame(prop.table(table_salary, 2))
colnames(salary_proportions) <- c("Left", "Salary", "Proportion")

library(plotly)
plot_ly(salary_proportions, x= ~Salary, y = ~Proportion, type = 'bar',
        color = ~as.factor(Left)) %>%
  layout(barmode = "stack",
         title = "Employees with lower salaries are more likely to leave the company",
         xaxis = list(title = "Salary"),
         yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

p-value interpretation:

The p-value is very small, indicating the results are unlikely due to random chance and are significant.

chi-square test interpretation:

There is a dependence between employee attrition and salary.

non-technical interpretation:

Employees with lower salaries are more likely to leave the company.

Chi-square Test 2: left vs promotion_last_5years

table_promotion <- table(hr$left, hr$promotion_last_5years)
chisq_promotion <- chisq.test(table_promotion)
chisq_promotion
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_promotion
## X-squared = 56.262, df = 1, p-value = 6.344e-14
library(dplyr)
promotion_proportions <- as.data.frame(prop.table(table_promotion, 2))
colnames(promotion_proportions) <- c("Left", "Promotion", "Proportion")

library(plotly)
plot_ly(promotion_proportions, x = ~Promotion, y = ~Proportion, type = 'bar', 
        color = ~as.factor(Left)) %>%
  layout(barmode = "stack",
         title = "Employees who were not promoted in the last 5 years are more likely to leave",
         xaxis = list(title = "Promotion Last 5 Years"),
         yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

p-value interpretation:

The p-value is very small, indicating the results are unlikely due to random chance and are significant.

chi-square test interpretation:

There is a dependence between employee attrition and recent promotions.

non-technical interpretation:

Employees who were not promoted in the last 5 years are more likely to leave.

Chi-square Test 3: left vs department

table_department <- table(hr$left, hr$Department)
chisq_department <- chisq.test(table_department)
chisq_department
## 
##  Pearson's Chi-squared test
## 
## data:  table_department
## X-squared = 86.825, df = 9, p-value = 7.042e-15
library(dplyr)
department_proportions <- as.data.frame(prop.table(table_department, 2))
colnames(department_proportions) <- c("Left", "Department", "Proportion")
department_proportions$Left <- as.factor(department_proportions$Left)

library(plotly)
plot_ly(department_proportions, 
        x = ~Department, 
        y = ~Proportion, 
        type = 'bar', 
        color = ~Left) %>%
  layout(barmode = "stack",
         title = "Attrition rates vary significantly across departments",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

p-value interpretation:

The p-value is very small, indicating the results are unlikely due to random chance and are significant.

chi-square test interpretation:

There is a dependence between employee attrition and department.

non-technical interpretation:

Attrition rates vary significantly across departments.

Chi-square Test 4: left vs work_accident

table_accident <- table(hr$left, hr$Work_accident)
chisq_accident <- chisq.test(table_accident)
chisq_accident
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table_accident
## X-squared = 357.56, df = 1, p-value < 2.2e-16
library(dplyr)
accident_proportions <- as.data.frame(prop.table(table_accident, 2))
colnames(accident_proportions) <- c("Left", "Work Accident", "Proportion")

library(plotly)
plot_ly(accident_proportions, x = ~`Work Accident`, y = ~Proportion, type = 'bar', 
        color = ~as.factor(Left)) %>%
  layout(barmode = "stack",
         title = "Employees who have experienced a workplace accident are less likely to leave the company",
         xaxis = list(title = "Work Accident"),
         yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

p-value interpretation:

The p-value is very small, indicating the results are unlikely due to random chance and are significant.

chi-square test interpretation:

There is a dependence between employee attrition and workplace accidents.

non-technical interpretation:

Employees who have experienced a workplace accident are less likely to leave the company.