library(readr)
library(dplyr)
library(plotly)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
# Convert 'left' to a labeled factor for readability in plots
hr$left_label <- ifelse(hr$left == 1, "Left", "Stayed")
head(hr)
## # A tibble: 6 × 11
## satisfaction_level last_evaluation number_project average_montly_hours
## <dbl> <dbl> <dbl> <dbl>
## 1 0.38 0.53 2 157
## 2 0.8 0.86 5 262
## 3 0.11 0.88 7 272
## 4 0.72 0.87 5 223
## 5 0.37 0.52 2 159
## 6 0.41 0.5 2 153
## # ℹ 7 more variables: time_spend_company <dbl>, Work_accident <dbl>,
## # left <dbl>, promotion_last_5years <dbl>, Department <chr>, salary <chr>,
## # left_label <chr>
chisq.test(hr$Department, hr$left)
##
## Pearson's Chi-squared test
##
## data: hr$Department and hr$left
## X-squared = 86.825, df = 9, p-value = 7.042e-15
The p-value is extremely small (well below 0.05), meaning the probability of observing this association between department and attrition by random chance alone is essentially zero. We reject the null hypothesis that department and employee attrition are independent.
Employees in certain departments — particularly HR and accounting — are significantly more likely to leave the company than those in technical or management roles.
prop_dept <- hr %>%
group_by(Department) %>%
summarise(
Stayed = sum(left == 0) / n(),
Left = sum(left == 1) / n()
)
plot_ly(prop_dept) %>%
add_bars(x = ~Department, y = ~Stayed, name = "Stayed",
marker = list(color = "#2ecc71")) %>%
add_bars(x = ~Department, y = ~Left, name = "Left",
marker = list(color = "#e74c3c")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Department", tickangle = -30),
yaxis = list(title = "Proportion", tickformat = ",.0%"),
title = "Employees in HR and Accounting are more likely to leave the company"
)
chisq.test(hr$salary, hr$left)
##
## Pearson's Chi-squared test
##
## data: hr$salary and hr$left
## X-squared = 381.23, df = 2, p-value < 2.2e-16
The p-value is extremely small (far below the 0.05 significance threshold), indicating strong statistical evidence that salary level and employee attrition are not independent. We reject the null hypothesis.
Employees with low salaries are much more likely to leave the company, while high-salary employees rarely leave.
prop_salary <- hr %>%
mutate(salary = factor(salary, levels = c("low", "medium", "high"))) %>%
group_by(salary) %>%
summarise(
Stayed = sum(left == 0) / n(),
Left = sum(left == 1) / n()
)
plot_ly(prop_salary) %>%
add_bars(x = ~salary, y = ~Stayed, name = "Stayed",
marker = list(color = "#2ecc71")) %>%
add_bars(x = ~salary, y = ~Left, name = "Left",
marker = list(color = "#e74c3c")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Salary Level"),
yaxis = list(title = "Proportion", tickformat = ",.0%"),
title = "Low-salary employees are much more likely to leave the company"
)
chisq.test(hr$Work_accident, hr$left)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: hr$Work_accident and hr$left
## X-squared = 357.56, df = 1, p-value < 2.2e-16
The p-value is extremely small (well below 0.05), indicating a statistically significant relationship between whether an employee experienced a work accident and whether they left the company. We reject the null hypothesis of independence.
Employees who have had a workplace accident are actually less likely to leave the company — perhaps because they feel more supported or are receiving compensation and benefits.
prop_accident <- hr %>%
mutate(Work_accident = ifelse(Work_accident == 1, "Had Accident", "No Accident")) %>%
group_by(Work_accident) %>%
summarise(
Stayed = sum(left == 0) / n(),
Left = sum(left == 1) / n()
)
plot_ly(prop_accident) %>%
add_bars(x = ~Work_accident, y = ~Stayed, name = "Stayed",
marker = list(color = "#2ecc71")) %>%
add_bars(x = ~Work_accident, y = ~Left, name = "Left",
marker = list(color = "#e74c3c")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Work Accident Status"),
yaxis = list(title = "Proportion", tickformat = ",.0%"),
title = "Employees who had a workplace accident are less likely to leave the company"
)
chisq.test(hr$promotion_last_5years, hr$left)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: hr$promotion_last_5years and hr$left
## X-squared = 56.262, df = 1, p-value = 6.344e-14
The p-value is extremely small (far below 0.05), indicating a highly statistically significant association between receiving a promotion in the last 5 years and employee attrition. We reject the null hypothesis that these variables are independent.
Employees who were NOT promoted in the last 5 years are far more likely to leave the company — suggesting that lack of career growth is a major driver of employee turnover.
prop_promo <- hr %>%
mutate(promotion_last_5years = ifelse(promotion_last_5years == 1, "Promoted", "Not Promoted")) %>%
group_by(promotion_last_5years) %>%
summarise(
Stayed = sum(left == 0) / n(),
Left = sum(left == 1) / n()
)
plot_ly(prop_promo) %>%
add_bars(x = ~promotion_last_5years, y = ~Stayed, name = "Stayed",
marker = list(color = "#2ecc71")) %>%
add_bars(x = ~promotion_last_5years, y = ~Left, name = "Left",
marker = list(color = "#e74c3c")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Promotion Status (Last 5 Years)"),
yaxis = list(title = "Proportion", tickformat = ",.0%"),
title = "Employees not promoted in the last 5 years are more likely to leave"
)
All four chi-square tests produced extremely low p-values, indicating statistically significant relationships between each tested variable and employee attrition. Key takeaways: