library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.2
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.2
# Load data
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Make variables factors with clear labels
hr$left <- factor(hr$left, levels = c(0, 1), labels = c("Stayed", "Left"))
hr$Work_accident <- factor(hr$Work_accident,
                           levels = c(0, 1),
                           labels = c("No accident", "Accident"))
hr$promotion_last_5years <- factor(hr$promotion_last_5years,
                                   levels = c(0, 1),
                                   labels = c("No promotion", "Promoted"))
hr$Department <- as.factor(hr$Department)

CHI-SQUARE TEST 1: Salary vs Leaving

test1 <- chisq.test(table(hr$left, hr$salary))
print(test1)
## 
##  Pearson's Chi-squared test
## 
## data:  table(hr$left, hr$salary)
## X-squared = 381.23, df = 2, p-value < 2.2e-16
cat("\nChi-square Test 1: Salary vs Leaving (Technical Interpretation)\n")
## 
## Chi-square Test 1: Salary vs Leaving (Technical Interpretation)
cat("Chi-square =", round(test1$statistic, 3),
"with df =", test1$parameter,
"and p-value =", test1$p.value, "\n")
## Chi-square = 381.225 with df = 2 and p-value = 1.652087e-83
cat("Because the p-value is less than .05, there is a statistically significant association\n")
## Because the p-value is less than .05, there is a statistically significant association
cat("between salary level and whether an employee leaves the company.\n\n")
## between salary level and whether an employee leaves the company.
nontech1 <- "Employees with low salaries are more likely to leave the company."

plot1 <- hr %>%
count(salary, left) %>%
group_by(salary) %>%
mutate(Proportion = n / sum(n)) %>%
plot_ly(
x = ~salary,
y = ~Proportion,
color = ~left,
type = "bar"
) %>%
layout(
barmode = "stack",
title = nontech1,
xaxis = list(title = "Salary Level"),
yaxis = list(title = "Proportion", tickformat = ",.0%")
)

plot1

Chi-square Test 2: Department vs Leaving

test2 <- chisq.test(table(hr$left, hr$Department))
print(test2)
## 
##  Pearson's Chi-squared test
## 
## data:  table(hr$left, hr$Department)
## X-squared = 86.825, df = 9, p-value = 7.042e-15
cat("\nChi-square Test 2: Department vs Leaving (Technical Interpretation)\n")
## 
## Chi-square Test 2: Department vs Leaving (Technical Interpretation)
cat("Chi-square =", round(test2$statistic, 3),
"with df =", test2$parameter,
"and p-value =", test2$p.value, "\n")
## Chi-square = 86.825 with df = 9 and p-value = 7.04213e-15
cat("Because the p-value is less than .05, there is a statistically significant association\n")
## Because the p-value is less than .05, there is a statistically significant association
cat("between department and whether an employee leaves the company.\n\n")
## between department and whether an employee leaves the company.
nontech2 <- "Turnover is higher in some departments than in others."

plot2 <- hr %>%
count(Department, left) %>%
group_by(Department) %>%
mutate(Proportion = n / sum(n)) %>%
plot_ly(
x = ~Department,
y = ~Proportion,
color = ~left,
type = "bar"
) %>%
layout(
barmode = "stack",
title = nontech2,
xaxis = list(title = "Department"),
yaxis = list(title = "Proportion", tickformat = ",.0%")
)

plot2

CHI-SQUARE TEST 3: Work Accident vs Leaving

test3 <- chisq.test(table(hr$left, hr$Work_accident))
print(test3)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(hr$left, hr$Work_accident)
## X-squared = 357.56, df = 1, p-value < 2.2e-16
cat("\nChi-square Test 3: Work Accident vs Leaving (Technical Interpretation)\n")
## 
## Chi-square Test 3: Work Accident vs Leaving (Technical Interpretation)
cat("Chi-square =", round(test3$statistic, 3),
"with df =", test3$parameter,
"and p-value =", test3$p.value, "\n")
## Chi-square = 357.562 with df = 1 and p-value = 9.55824e-80
cat("Because the p-value is less than .05, there is a statistically significant association\n")
## Because the p-value is less than .05, there is a statistically significant association
cat("between having a work accident and whether an employee leaves the company.\n\n")
## between having a work accident and whether an employee leaves the company.
nontech3 <- "Employees who had a work accident are less likely to leave the company."

plot3 <- hr %>%
count(Work_accident, left) %>%
group_by(Work_accident) %>%
mutate(Proportion = n / sum(n)) %>%
plot_ly(
x = ~Work_accident,
y = ~Proportion,
color = ~left,
type = "bar"
) %>%
layout(
barmode = "stack",
title = nontech3,
xaxis = list(title = "Work Accident"),
yaxis = list(title = "Proportion", tickformat = ",.0%")
)

plot3

CHI-SQUARE TEST 4: Promotion (last 5 years) vs Leaving

test4 <- chisq.test(table(hr$left, hr$promotion_last_5years))
print(test4)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(hr$left, hr$promotion_last_5years)
## X-squared = 56.262, df = 1, p-value = 6.344e-14
cat("\nChi-square Test 4: Promotion vs Leaving (Technical Interpretation)\n")
## 
## Chi-square Test 4: Promotion vs Leaving (Technical Interpretation)
cat("Chi-square =", round(test4$statistic, 3),
"with df =", test4$parameter,
"and p-value =", test4$p.value, "\n")
## Chi-square = 56.262 with df = 1 and p-value = 6.344155e-14
cat("Because the p-value is less than .05, there is a statistically significant association\n")
## Because the p-value is less than .05, there is a statistically significant association
cat("between being promoted in the last 5 years and whether an employee leaves the company.\n\n")
## between being promoted in the last 5 years and whether an employee leaves the company.
nontech4 <- "Employees who were promoted in the last 5 years are much less likely to leave."

plot4 <- hr %>%
count(promotion_last_5years, left) %>%
group_by(promotion_last_5years) %>%
mutate(Proportion = n / sum(n)) %>%
plot_ly(
x = ~promotion_last_5years,
y = ~Proportion,
color = ~left,
type = "bar"
) %>%
layout(
barmode = "stack",
title = nontech4,
xaxis = list(title = "Promotion in Last 5 Years"),
yaxis = list(title = "Proportion", tickformat = ",.0%")
)

plot4