Load necessary libraries and datasets

library(readr)
library(dplyr)
library(plotly)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

Question 1

dept <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

dept_table <- table(dept$Employee_Status, dept$Department)
chi1 <- chisq.test(dept_table)
print(chi1)
## 
##  Pearson's Chi-squared test
## 
## data:  dept_table
## X-squared = 86.825, df = 9, p-value = 7.042e-15
# Technical Interpretation: The p-value from the chi test is very small and therefore significant
# Non-technical Interpretation: Employees who left could have left due to certain departments.

Plot 1: Employee Status vs Department

# Plot 1: Employee Status vs Department
ggplot(dept, aes(x = Department, fill = Employee_Status)) +
  geom_bar(position = "fill") +
  labs(title = "Department distribution between those who stayed and left vary significantly",
       y = "Proportion", x = "Department") +
       theme_minimal()

# Question 2

slry <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

salary_table <- table(slry$Employee_Status, slry$salary)
chi2 <- chisq.test(salary_table)
print(chi2)
## 
##  Pearson's Chi-squared test
## 
## data:  salary_table
## X-squared = 381.23, df = 2, p-value < 2.2e-16
# Technical Interpretation: The p-value is significant, it indicates whether salary level differs by Employee_Status.
# Non-technical Interpretation: Employees with lower salary levels are more likely to leave.

Plot 2: Employee Status vs Salary

# Plot 2: Employee Status vs Salary
ggplot(slry, aes(x = salary, fill = Employee_Status)) +
  geom_bar(position = "fill") +
  labs(title = "Salary levels had a significant effect on an employee's likelihood of leaving",
       y = "Proportion", x = "Salary")

# Question 3

promo <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

promo_table <- table(promo$Employee_Status, promo$promotion_last_5years)
chi3 <- chisq.test(promo_table)
print(chi3)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  promo_table
## X-squared = 56.262, df = 1, p-value = 6.344e-14
# Technical Interpretation: The p-value, which is significant shows if promotions influence employee retention.
# Non-technical Interpretation: Lack of promotions could lead to less employee retention and higher attrition.

Plot 3: Employee Status vs Promotions

# Plot 3: Employee Status vs Promotions
ggplot(promo, aes(x = as.factor(promotion_last_5years), fill = Employee_Status)) +
  geom_bar(position = "fill") +
  labs(title = "Promotions linked to retention",
       y = "Proportion", x = "Promoted in Last 5 Years")

# Question 4

acidnt <- hr %>%
  mutate(Employee_Status = ifelse(left == 0, 'Stayed', 'Left'))

accident_table <- table(acidnt$Employee_Status, acidnt$Work_accident)
chi4 <- chisq.test(accident_table)
print(chi4)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  accident_table
## X-squared = 357.56, df = 1, p-value < 2.2e-16
# Technical Interpretation: The significant p-value indicates if work accidents are associated with leaving.
# Non-technical Interpretation: Employees involved in accidents tend to have different, slightly higher retention rates.

Plot 4: Employee Status vs Work Accidents

# Plot 4 Employee Status vs Work Accidents
ggplot(acidnt, aes(x = as.factor(Work_accident), fill = Employee_Status)) +
  geom_bar(position = "fill") +
  labs(title = "Work accidents and employee retention",
       y = "Proportion", x = "Involved in Work Accident")