1. Chi-Square Test: Left vs. Salary

CrossTable(hr$salary , hr$left , chisq = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  14999 
## 
##  
##              | hr$left 
##    hr$salary |         0 |         1 | Row Total | 
## -------------|-----------|-----------|-----------|
##         high |      1155 |        82 |      1237 | 
##              |    47.915 |   153.339 |           | 
##              |     0.934 |     0.066 |     0.082 | 
##              |     0.101 |     0.023 |           | 
##              |     0.077 |     0.005 |           | 
## -------------|-----------|-----------|-----------|
##          low |      5144 |      2172 |      7316 | 
##              |    33.200 |   106.247 |           | 
##              |     0.703 |     0.297 |     0.488 | 
##              |     0.450 |     0.608 |           | 
##              |     0.343 |     0.145 |           | 
## -------------|-----------|-----------|-----------|
##       medium |      5129 |      1317 |      6446 | 
##              |     9.648 |    30.876 |           | 
##              |     0.796 |     0.204 |     0.430 | 
##              |     0.449 |     0.369 |           | 
##              |     0.342 |     0.088 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |     11428 |      3571 |     14999 | 
##              |     0.762 |     0.238 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  381.225     d.f. =  2     p =  1.652087e-83 
## 
## 
## 

Technical interpretation: The p-value is smaller than alpha (0.001), therefore we reject the H0. Salary and leaving the company are dependent (associated).

Non-technical interpretation: Employees with low salaries are much more likely to leave the company. Low-salary employees are about 4.5 times more likely to leave than high-salary employees.

df <- hr %>%
  mutate(salary = factor(salary, levels = c("low", "medium", "high")))

# Compute % left by salary
summary_df <- df %>%
  group_by(salary) %>%
  summarise(
    pct_left = mean(left == 1) * 100
  )

# Bar plot
plot_ly(
  data = summary_df,
  x = ~salary,
  y = ~pct_left,
  type = "bar"
) %>%
  layout(
    title = "Employees with low salaries are much more likely to leave the company",
    xaxis = list(title = "Salary Level"),
    yaxis = list(title = "Percentage Left (%)")
  )

2. Chi-Square Test: Left vs. Department

CrossTable(hr$Department, hr$left, chisq = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  14999 
## 
##  
##               | hr$left 
## hr$Department |         0 |         1 | Row Total | 
## --------------|-----------|-----------|-----------|
##    accounting |       563 |       204 |       767 | 
##               |     0.783 |     2.506 |           | 
##               |     0.734 |     0.266 |     0.051 | 
##               |     0.049 |     0.057 |           | 
##               |     0.038 |     0.014 |           | 
## --------------|-----------|-----------|-----------|
##            hr |       524 |       215 |       739 | 
##               |     2.709 |     8.670 |           | 
##               |     0.709 |     0.291 |     0.049 | 
##               |     0.046 |     0.060 |           | 
##               |     0.035 |     0.014 |           | 
## --------------|-----------|-----------|-----------|
##            IT |       954 |       273 |      1227 | 
##               |     0.391 |     1.252 |           | 
##               |     0.778 |     0.222 |     0.082 | 
##               |     0.083 |     0.076 |           | 
##               |     0.064 |     0.018 |           | 
## --------------|-----------|-----------|-----------|
##    management |       539 |        91 |       630 | 
##               |     7.250 |    23.202 |           | 
##               |     0.856 |     0.144 |     0.042 | 
##               |     0.047 |     0.025 |           | 
##               |     0.036 |     0.006 |           | 
## --------------|-----------|-----------|-----------|
##     marketing |       655 |       203 |       858 | 
##               |     0.002 |     0.008 |           | 
##               |     0.763 |     0.237 |     0.057 | 
##               |     0.057 |     0.057 |           | 
##               |     0.044 |     0.014 |           | 
## --------------|-----------|-----------|-----------|
##   product_mng |       704 |       198 |       902 | 
##               |     0.408 |     1.307 |           | 
##               |     0.780 |     0.220 |     0.060 | 
##               |     0.062 |     0.055 |           | 
##               |     0.047 |     0.013 |           | 
## --------------|-----------|-----------|-----------|
##         RandD |       666 |       121 |       787 | 
##               |     7.346 |    23.510 |           | 
##               |     0.846 |     0.154 |     0.052 | 
##               |     0.058 |     0.034 |           | 
##               |     0.044 |     0.008 |           | 
## --------------|-----------|-----------|-----------|
##         sales |      3126 |      1014 |      4140 | 
##               |     0.255 |     0.815 |           | 
##               |     0.755 |     0.245 |     0.276 | 
##               |     0.274 |     0.284 |           | 
##               |     0.208 |     0.068 |           | 
## --------------|-----------|-----------|-----------|
##       support |      1674 |       555 |      2229 | 
##               |     0.348 |     1.114 |           | 
##               |     0.751 |     0.249 |     0.149 | 
##               |     0.146 |     0.155 |           | 
##               |     0.112 |     0.037 |           | 
## --------------|-----------|-----------|-----------|
##     technical |      2023 |       697 |      2720 | 
##               |     1.178 |     3.771 |           | 
##               |     0.744 |     0.256 |     0.181 | 
##               |     0.177 |     0.195 |           | 
##               |     0.135 |     0.046 |           | 
## --------------|-----------|-----------|-----------|
##  Column Total |     11428 |      3571 |     14999 | 
##               |     0.762 |     0.238 |           | 
## --------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  86.82547     d.f. =  9     p =  7.04213e-15 
## 
## 
## 

Technical interpretation: The p-value is extremely small, so we reject the H0. Department and leaving the company are statistically associated.

Non-technical interpretation: Employees from some departments leave much more frequently than others.

dept_df <- hr %>%
  group_by(Department) %>%
  summarise(
    pct_left = mean(left == 1) * 100
  )

# Bar plot
plot_ly(
  data = dept_df,
  x = ~Department,
  y = ~pct_left,
  type = "bar"
) %>%
  layout(
    title = "Employees from some departments leave much more often than others",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Percentage Who Left (%)")
  )

3. Chi-Square Test: Left vs. Promotion in Last 5 Years

CrossTable(hr$promotion_last_5years, hr$left, chisq = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  14999 
## 
##  
##                          | hr$left 
## hr$promotion_last_5years |         0 |         1 | Row Total | 
## -------------------------|-----------|-----------|-----------|
##                        0 |     11128 |      3552 |     14680 | 
##                          |     0.290 |     0.928 |           | 
##                          |     0.758 |     0.242 |     0.979 | 
##                          |     0.974 |     0.995 |           | 
##                          |     0.742 |     0.237 |           | 
## -------------------------|-----------|-----------|-----------|
##                        1 |       300 |        19 |       319 | 
##                          |    13.343 |    42.702 |           | 
##                          |     0.940 |     0.060 |     0.021 | 
##                          |     0.026 |     0.005 |           | 
##                          |     0.020 |     0.001 |           | 
## -------------------------|-----------|-----------|-----------|
##             Column Total |     11428 |      3571 |     14999 | 
##                          |     0.762 |     0.238 |           | 
## -------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  57.26273     d.f. =  1     p =  3.813123e-14 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  56.26163     d.f. =  1     p =  6.344155e-14 
## 
## 

Technical interpretation: The p-value is far below 0.001, so we reject the H0. Receiving a promotion and leaving the company are associated.

Non-technical interpretation: Employees who have not received a promotion are more likely to leave the company.

promo_df <- hr %>%
  group_by(promotion_last_5years) %>%
  summarise(
    pct_left = mean(left == 1) * 100
  )

# Bar plot
plot_ly(
  data = promo_df,
  x = ~promotion_last_5years,
  y = ~pct_left,
  type = "bar"
) %>%
  layout(
    title = "Employees without promotions are more likely to leave the company",
    xaxis = list(title = "Promotion in Last 5 Years (0 = No, 1 = Yes)"),
    yaxis = list(title = "Percentage Who Left (%)")
  )

4. Chi-Square Test: Left vs. Work Accident

CrossTable(hr$Work_accident, hr$left, chisq = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  14999 
## 
##  
##                  | hr$left 
## hr$Work_accident |         0 |         1 | Row Total | 
## -----------------|-----------|-----------|-----------|
##                0 |      9428 |      3402 |     12830 | 
##                  |    12.346 |    39.510 |           | 
##                  |     0.735 |     0.265 |     0.855 | 
##                  |     0.825 |     0.953 |           | 
##                  |     0.629 |     0.227 |           | 
## -----------------|-----------|-----------|-----------|
##                1 |      2000 |       169 |      2169 | 
##                  |    73.029 |   233.709 |           | 
##                  |     0.922 |     0.078 |     0.145 | 
##                  |     0.175 |     0.047 |           | 
##                  |     0.133 |     0.011 |           | 
## -----------------|-----------|-----------|-----------|
##     Column Total |     11428 |      3571 |     14999 | 
##                  |     0.762 |     0.238 |           | 
## -----------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  358.5938     d.f. =  1     p =  5.698673e-80 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  357.5624     d.f. =  1     p =  9.55824e-80 
## 
## 

Technical interpretation: The p-value is extremely small, so we reject the H0. Work accidents and leaving the company are statistically associated.

Non-technical interpretation: Employees who have had a work accident are LESS likely to leave the company.

acc_df <- hr %>%
  group_by(Work_accident) %>%
  summarise(
    pct_left = mean(left == 1) * 100
  )

# Bar plot
plot_ly(
  data = acc_df,
  x = ~Work_accident,
  y = ~pct_left,
  type = "bar"
) %>%
  layout(
    title = "Employees who experienced a work accident are less likely to leave",
    xaxis = list(title = "Work Accident (0 = No, 1 = Yes)"),
    yaxis = list(title = "Percentage Who Left (%)")
  )