library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
chisq.test(table(hr$left, hr$salary))
##
## Pearson's Chi-squared test
##
## data: table(hr$left, hr$salary)
## X-squared = 381.23, df = 2, p-value < 2.2e-16
hr %>%
group_by(salary, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count)) %>%
plot_ly(x = ~salary, y = ~prop, color = ~factor(left),
type = 'bar', name = ~ifelse(left == 1, "Left", "Stayed")) %>%
layout(barmode = 'stack',
title = "Employees with low salaries are more likely to leave the company",
yaxis = list(title = "Proportion", tickformat = ",.0%"),
xaxis = list(title = "Salary Level"))
## `summarise()` has grouped output by 'salary'. You can override using the
## `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
#Technical Interpretation
#A small p-value (e.g., < 0.05) means that employee attrition depends on salary level, the distribution of those who left is significantly different across salary levels.
#Non-Technical Interpretation
#Employees with low salaries are more likely to leave the company.
chisq.test(table(hr$left, hr$promotion_last_5years))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(hr$left, hr$promotion_last_5years)
## X-squared = 56.262, df = 1, p-value = 6.344e-14
hr %>%
group_by(promotion_last_5years, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count)) %>%
plot_ly(x = ~factor(promotion_last_5years), y = ~prop, color = ~factor(left),
type = 'bar', name = ~ifelse(left == 1, "Left", "Stayed")) %>%
layout(barmode = 'stack',
title = "Employees who haven't been promoted in 5 years are more likely to leave",
yaxis = list(title = "Proportion", tickformat = ",.0%"),
xaxis = list(title = "Promotion in Last 5 Years (0 = No, 1 = Yes)"))
## `summarise()` has grouped output by 'promotion_last_5years'. You can override
## using the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
#Technical Interpretation
#A small p-value here shows that attrition is related to whether an employee received a promotion in the last 5 years.
#Non-Technical Interpretation
#Employees who have not been promoted in the last 5 years are more likely to leave.
chisq.test(table(hr$left, hr$Work_accident))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(hr$left, hr$Work_accident)
## X-squared = 357.56, df = 1, p-value < 2.2e-16
hr %>%
group_by(Work_accident, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count)) %>%
plot_ly(x = ~factor(Work_accident), y = ~prop, color = ~factor(left),
type = 'bar', name = ~ifelse(left == 1, "Left", "Stayed")) %>%
layout(barmode = 'stack',
title = "Employees without a work accident are more likely to leave",
yaxis = list(title = "Proportion", tickformat = ",.0%"),
xaxis = list(title = "Work Accident (0 = No, 1 = Yes)"))
## `summarise()` has grouped output by 'Work_accident'. You can override using the
## `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
#Technical Interpretation
#A significant p-value indicates that having a work accident is associated with a different likelihood of leaving.
#Non-Technical Interpretation
#Employees who never had a work accident are more likely to leave the company.
chisq.test(table(hr$left, hr$number_project))
##
## Pearson's Chi-squared test
##
## data: table(hr$left, hr$number_project)
## X-squared = 5373.6, df = 5, p-value < 2.2e-16
hr %>%
group_by(number_project, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count)) %>%
plot_ly(x = ~factor(number_project), y = ~prop, color = ~factor(left),
type = 'bar', name = ~ifelse(left == 1, "Left", "Stayed")) %>%
layout(barmode = 'stack',
title = "Employees with more projects are more likely to leave",
yaxis = list(title = "Proportion", tickformat = ",.0%"),
xaxis = list(title = "Number of Projects"))
## `summarise()` has grouped output by 'number_project'. You can override using
## the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
#Technical Interpretation
#The test yields a small p-value, so we reject the null hypothesis. There is a statistically significant association between the number of projects and whether an employee left the company.
#Non-Technical Interpretation
#Employees who worked on more projects are more likely to leave the company. This means that workload (represented by project count) might be a factor in employee dissatisfaction or burnout. If you're seeing higher attrition among those with 6 or 7+ projects, it's a red flag that overloading staff could be leading to higher turnover.