library(readr)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Perform four (4) chi-square tests using any appropriate variables (categorical) by the variable left. Note that the variable left describes whether the employee left the company (left = 1), or not (left = 0).

For each of the four chi square tests:

Perform the chi-square test (.5 point) Choose any two appropriate variables from the data and perform the chi-square test, displaying the results.

table1 <- table(hr$left, hr$satisfaction_level)
chi1 <- chisq.test(table1)
print (chi1)
## 
##  Pearson's Chi-squared test
## 
## data:  table1
## X-squared = 7937.7, df = 91, p-value < 2.2e-16
table2 <- table(hr$left, hr$salary)
chi2 <- chisq.test(table2)
print(chi2)
## 
##  Pearson's Chi-squared test
## 
## data:  table2
## X-squared = 381.23, df = 2, p-value < 2.2e-16
table3 <- table(hr$left, hr$promotion_last_5years)
chi3 <- chisq.test(table3)
print (chi3)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table3
## X-squared = 56.262, df = 1, p-value = 6.344e-14
table4 <- table(hr$left, hr$Work_accident)
chi4 <- chisq.test(table4)
print (chi4)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table4
## X-squared = 357.56, df = 1, p-value < 2.2e-16

Interpret the results in technical terms (.5 point) For each chi-square test, explain what the test’s p-value means (significance).

There is a significant association between satisfaction level and whether employees leave the company

p-value < 0.05: There is a statistically significant association between salary level and employee turnover

p-value < 0.05: There is a significant relationship between recent promotions and turnover

p-value < 0.05: There is a statistically significant association between workplace accidents and employee turnover. Reject the null hypothesis.

Interpret the results in non-technical terms (1 point) For each chi-square test, what do the results mean in non-techical terms.

The satisfaction level shows where employees work influences the likelihood of leaving the company.

employees with lower salaries might leave more frequently due to financial dissatisfaction.

Employees without recent promotions might feel undervalued and leave, or promoted employees might face new stressors leading to turnover.

Employees who experience workplace accidents might leave more often, potentially due to dissatisfaction or health concerns.

Create a plot that helps visualize the chi-square test (.5 point) For each chi-square test, create a graph to help visualize the difference between means, if any. The title must be the non-technical interpretation.

ggplot(hr, aes(x = satisfaction_level, fill = as.factor(left))) +
  geom_bar(position = "dodge") +
  labs(
    title = "Employee Turnover by Satisfaction Level",
    x = "Satisfaction_Level",
    y = "Number of Employees",
    fill = "Left (0 = Stayed, 1 = Left)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(hr, aes(x = salary, fill = as.factor(left))) +
  geom_bar(position = "dodge") +
  labs(
    title = "Employee Turnover by Salary Level",
    x = "Salary Level",
    y = "Number of Employees",
    fill = "Left (0 = Stayed, 1 = Left)"
  ) +
  theme_minimal()

ggplot(hr, aes(x = promotion_last_5years, fill = as.factor(left))) +
  geom_bar(position = "dodge") +
  labs(
    title = "Employee Turnover and Promotions in Last 5 Years",
    x = "Promotion in Last 5 Years (0 = No, 1 = Yes)",
    y = "Number of Employees",
    fill = "Left (0 = Stayed, 1 = Left)"
  ) +
  theme_minimal()

ggplot(hr, aes(x = Work_accident, fill = as.factor(left))) +
  geom_bar(position = "dodge") +
  labs(
    title = "Employee Turnover and Workplace Accidents",
    x = "Work Accident (0 = No, 1 = Yes)",
    y = "Number of Employees",
    fill = "Left (0 = Stayed, 1 = Left)"
  ) +
  theme_minimal()