salary_left_table <- table(hr$salary, hr$left)
chi_test_salary <- chisq.test(salary_left_table)
head(chi_test_salary)
## $statistic
## X-squared
## 381.225
##
## $parameter
## df
## 2
##
## $p.value
## [1] 1.652087e-83
##
## $method
## [1] "Pearson's Chi-squared test"
##
## $data.name
## [1] "salary_left_table"
##
## $observed
##
## 0 1
## high 1155 82
## low 5144 2172
## medium 5129 1317
The chi-square test resulted in a p-value of 1.652087e-83, which is far below the significance threshold of 0.05. This indicates a statistically significant relationship between salary levels and whether employees left the company, meaning salary is dependent on employee attrition.
Employees with different salary levels leave the company at different rates. Those with lower salaries are much more likely to leave compared to those with higher salaries
salary_proportions <- hr %>%
group_by(salary, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'salary'. You can override using the
## `.groups` argument.
t
## function (x)
## UseMethod("t")
## <bytecode: 0x10ae44250>
## <environment: namespace:base>
plot_ly(
salary_proportions,
x = ~salary,
y = ~prop,
color = ~factor(left, labels = c("Stayed", "Left")),
type = "bar",
text = ~paste0(round(prop * 100, 1), "%"),
textposition = "auto"
) %>%
layout(
barmode = "stack",
xaxis = list(title = "Salary Level"),
yaxis = list(title = "Proportion", tickformat = ".0%"),
title = "Employees with lower salaries are more likely to leave the company",
legend = list(title = list(text = "Employee Status"))
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
promotion_left_table <- table(hr$promotion_last_5years, hr$left)
chi_test_promotion <- chisq.test(promotion_left_table)
head(chi_test_promotion)
## $statistic
## X-squared
## 56.26163
##
## $parameter
## df
## 1
##
## $p.value
## [1] 6.344155e-14
##
## $method
## [1] "Pearson's Chi-squared test with Yates' continuity correction"
##
## $data.name
## [1] "promotion_left_table"
##
## $observed
##
## 0 1
## 0 11128 3552
## 1 300 19
The chi-square test produced a p-value of 6.344155e-14, which is far below the significance level of 0.05. This indicates a statistically significant relationship between being promoted in the last five years and whether employees left the company.
Employees who were not promoted in the last five years are much more likely to leave the company compared to those who were promoted. This suggests that lack of promotions contributes significantly to employee attrition.
promotion_proportions <- hr %>%
group_by(promotion_last_5years, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'promotion_last_5years'. You can override
## using the `.groups` argument.
plot_ly(
promotion_proportions,
x = ~factor(promotion_last_5years, labels = c("Not Promoted", "Promoted")),
y = ~prop,
color = ~factor(left, labels = c("Stayed", "Left")),
type = "bar",
text = ~paste0(round(prop * 100, 1), "%"),
textposition = "auto"
) %>%
layout(
barmode = "stack",
xaxis = list(title = "Promotion in Last 5 Years"),
yaxis = list(title = "Proportion", tickformat = ".0%"),
title = "Employees without promotions are more likely to leave the company",
legend = list(title = list(text = "Employee Status"))
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
department_left_table <- table(hr$Department, hr$left)
chi_test_department <- chisq.test(department_left_table)
head(chi_test_department)
## $statistic
## X-squared
## 86.82547
##
## $parameter
## df
## 9
##
## $p.value
## [1] 7.04213e-15
##
## $method
## [1] "Pearson's Chi-squared test"
##
## $data.name
## [1] "department_left_table"
##
## $observed
##
## 0 1
## accounting 563 204
## hr 524 215
## IT 954 273
## management 539 91
## marketing 655 203
## product_mng 704 198
## RandD 666 121
## sales 3126 1014
## support 1674 555
## technical 2023 697
The chi-square test produced a p-value of 7.04213e-15, which is far below the significance threshold of 0.05. This indicates a statistically significant relationship between an employee’s department and whether they left the company.
department_proportions <- hr %>%
group_by(Department, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
plot_ly(
department_proportions,
x = ~Department,
y = ~prop,
color = ~factor(left, labels = c("Stayed", "Left")),
type = "bar",
text = ~paste0(round(prop * 100, 1), "%"),
textposition = "auto"
) %>%
layout(
barmode = "stack",
xaxis = list(title = "Department"),
yaxis = list(title = "Proportion", tickformat = ".0%"),
title = "Employees in some departments are more likely to leave the company",
legend = list(title = list(text = "Employee Status"))
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
accident_left_table <- table(hr$Work_accident, hr$left)
chi_test_accident <- chisq.test(accident_left_table)
head(chi_test_accident)
## $statistic
## X-squared
## 357.5624
##
## $parameter
## df
## 1
##
## $p.value
## [1] 9.55824e-80
##
## $method
## [1] "Pearson's Chi-squared test with Yates' continuity correction"
##
## $data.name
## [1] "accident_left_table"
##
## $observed
##
## 0 1
## 0 9428 3402
## 1 2000 169
The chi-square test produced a p-value of 9.55824e-80, which is far below the significance threshold of 0.05. This indicates a statistically significant relationship between having a work accident and whether an employee left the company.
Employees who experienced work accidents are less likely to leave the company compared to those who did not have work accidents. This suggests that workplace accidents may not directly contribute to employee attrition
accident_proportions <- hr %>%
group_by(Work_accident, left) %>%
summarise(count = n()) %>%
mutate(prop = count / sum(count))
## `summarise()` has grouped output by 'Work_accident'. You can override using the
## `.groups` argument.
plot_ly(
accident_proportions,
x = ~factor(Work_accident, labels = c("No Accident", "Had Accident")),
y = ~prop,
color = ~factor(left, labels = c("Stayed", "Left")),
type = "bar",
text = ~paste0(round(prop * 100, 1), "%"),
textposition = "auto"
) %>%
layout(
barmode = "stack",
xaxis = list(title = "Work Accident"),
yaxis = list(title = "Proportion", tickformat = ".0%"),
title = "Employees who had work accidents are less likely to leave the company",
legend = list(title = list(text = "Employee Status"))
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels