library(readr)
library(dplyr)
library(plotly)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
dept <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
dept_table <- table(dept$Employee_Status, dept$Department)
chi1 <- chisq.test(dept_table)
print(chi1)
##
## Pearson's Chi-squared test
##
## data: dept_table
## X-squared = 86.825, df = 9, p-value = 7.042e-15
# Technical Interpretation: The p-value from the chi test is very small and therefore significant
# Non-technical Interpretation: Employees who left could have left due to certain departments.
# Plot 1: Employee Status vs Department
ggplot(dept, aes(x = Department, fill = Employee_Status)) +
geom_bar(position = "fill") +
labs(title = "Department distribution between those who stayed and left vary significantly",
y = "Proportion", x = "Department") +
theme_minimal()
# Question 2
slry <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
salary_table <- table(slry$Employee_Status, slry$salary)
chi2 <- chisq.test(salary_table)
print(chi2)
##
## Pearson's Chi-squared test
##
## data: salary_table
## X-squared = 381.23, df = 2, p-value < 2.2e-16
# Technical Interpretation: The p-value is significant, it indicates whether salary level differs by Employee_Status.
# Non-technical Interpretation: Employees with lower salary levels are more likely to leave.
# Plot 2: Employee Status vs Salary
ggplot(slry, aes(x = salary, fill = Employee_Status)) +
geom_bar(position = "fill") +
labs(title = "Salary levels had a significant effect on an employee's likelihood of leaving",
y = "Proportion", x = "Salary")
# Question 3
promo <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
promo_table <- table(promo$Employee_Status, promo$promotion_last_5years)
chi3 <- chisq.test(promo_table)
print(chi3)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: promo_table
## X-squared = 56.262, df = 1, p-value = 6.344e-14
# Technical Interpretation: The p-value, which is significant shows if promotions influence employee retention.
# Non-technical Interpretation: Lack of promotions could lead to less employee retention and higher attrition.
# Plot 3: Employee Status vs Promotions
ggplot(promo, aes(x = as.factor(promotion_last_5years), fill = Employee_Status)) +
geom_bar(position = "fill") +
labs(title = "Promotions linked to retention",
y = "Proportion", x = "Promoted in Last 5 Years")
# Question 4
acidnt <- hr %>%
mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))
accident_table <- table(acidnt$Employee_Status, acidnt$Work_accident)
chi4 <- chisq.test(accident_table)
print(chi4)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: accident_table
## X-squared = 357.56, df = 1, p-value < 2.2e-16
# Technical Interpretation: The significant p-value indicates if work accidents are associated with leaving.
# Non-technical Interpretation: Employees involved in accidents tend to have different, slightly higher retention rates.
# Plot 4 Employee Status vs Work Accidents
ggplot(acidnt, aes(x = as.factor(Work_accident), fill = Employee_Status)) +
geom_bar(position = "fill") +
labs(title = "Work accidents and employee retention",
y = "Proportion", x = "Involved in Work Accident")