library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Chi-square Test 1: left vs salary
table_salary <- table(hr$left, hr$salary)
chisq_salary <- chisq.test(table_salary)
chisq_salary
##
## Pearson's Chi-squared test
##
## data: table_salary
## X-squared = 381.23, df = 2, p-value < 2.2e-16
library(dplyr)
salary_proportions <- as.data.frame(prop.table(table_salary, 2))
colnames(salary_proportions) <- c("Left", "Salary", "Proportion")
library(plotly)
plot_ly(salary_proportions, x= ~Salary, y = ~Proportion, type = 'bar',
color = ~as.factor(Left)) %>%
layout(barmode = "stack",
title = "Employees with lower salaries are more likely to leave the company",
xaxis = list(title = "Salary"),
yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
p-value interpretation:
The p-value is very small, indicating the results are unlikely due
to random chance and are significant.
chi-square test interpretation:
There is a dependence between employee attrition and salary.
non-technical interpretation:
Employees with lower salaries are more likely to leave the
company.
Chi-square Test 3: left vs department
table_department <- table(hr$left, hr$Department)
chisq_department <- chisq.test(table_department)
chisq_department
##
## Pearson's Chi-squared test
##
## data: table_department
## X-squared = 86.825, df = 9, p-value = 7.042e-15
library(dplyr)
department_proportions <- as.data.frame(prop.table(table_department, 2))
colnames(department_proportions) <- c("Left", "Department", "Proportion")
department_proportions$Left <- as.factor(department_proportions$Left)
library(plotly)
plot_ly(department_proportions,
x = ~Department,
y = ~Proportion,
type = 'bar',
color = ~Left) %>%
layout(barmode = "stack",
title = "Attrition rates vary significantly across departments",
xaxis = list(title = "Department"),
yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
p-value interpretation:
The p-value is very small, indicating the results are unlikely due
to random chance and are significant.
chi-square test interpretation:
There is a dependence between employee attrition and
department.
non-technical interpretation:
Attrition rates vary significantly across departments.
Chi-square Test 4: left vs work_accident
table_accident <- table(hr$left, hr$Work_accident)
chisq_accident <- chisq.test(table_accident)
chisq_accident
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table_accident
## X-squared = 357.56, df = 1, p-value < 2.2e-16
library(dplyr)
accident_proportions <- as.data.frame(prop.table(table_accident, 2))
colnames(accident_proportions) <- c("Left", "Work Accident", "Proportion")
library(plotly)
plot_ly(accident_proportions, x = ~`Work Accident`, y = ~Proportion, type = 'bar',
color = ~as.factor(Left)) %>%
layout(barmode = "stack",
title = "Employees who have experienced a workplace accident are less likely to leave the company",
xaxis = list(title = "Work Accident"),
yaxis = list(title = "Proportion"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
p-value interpretation:
The p-value is very small, indicating the results are unlikely due
to random chance and are significant.
chi-square test interpretation:
There is a dependence between employee attrition and workplace
accidents.
non-technical interpretation:
Employees who have experienced a workplace accident are less likely
to leave the company.