library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Perform four (4) chi-square tests using any appropriate variables
(categorical) by the variable left. Note that the variable left
describes whether the employee left the company (left = 1), or not (left
= 0).
For each of the four chi square tests:
Interpret the results in technical terms (.5 point) For each
chi-square test, explain what the test’s p-value means
(significance).
There is a significant association between satisfaction level and
whether employees leave the company
p-value < 0.05: There is a statistically significant association
between salary level and employee turnover
p-value < 0.05: There is a significant relationship between
recent promotions and turnover
p-value < 0.05: There is a statistically significant association
between workplace accidents and employee turnover. Reject the null
hypothesis.
Interpret the results in non-technical terms (1 point) For each
chi-square test, what do the results mean in non-techical terms.
The satisfaction level shows where employees work influences the
likelihood of leaving the company.
employees with lower salaries might leave more frequently due to
financial dissatisfaction.
Employees without recent promotions might feel undervalued and
leave, or promoted employees might face new stressors leading to
turnover.
Employees who experience workplace accidents might leave more often,
potentially due to dissatisfaction or health concerns.
Create a plot that helps visualize the chi-square test (.5 point)
For each chi-square test, create a graph to help visualize the
difference between means, if any. The title must be the non-technical
interpretation.
ggplot(hr, aes(x = satisfaction_level, fill = as.factor(left))) +
geom_bar(position = "dodge") +
labs(
title = "Employee Turnover by Satisfaction Level",
x = "Satisfaction_Level",
y = "Number of Employees",
fill = "Left (0 = Stayed, 1 = Left)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(hr, aes(x = salary, fill = as.factor(left))) +
geom_bar(position = "dodge") +
labs(
title = "Employee Turnover by Salary Level",
x = "Salary Level",
y = "Number of Employees",
fill = "Left (0 = Stayed, 1 = Left)"
) +
theme_minimal()

ggplot(hr, aes(x = promotion_last_5years, fill = as.factor(left))) +
geom_bar(position = "dodge") +
labs(
title = "Employee Turnover and Promotions in Last 5 Years",
x = "Promotion in Last 5 Years (0 = No, 1 = Yes)",
y = "Number of Employees",
fill = "Left (0 = Stayed, 1 = Left)"
) +
theme_minimal()

ggplot(hr, aes(x = Work_accident, fill = as.factor(left))) +
geom_bar(position = "dodge") +
labs(
title = "Employee Turnover and Workplace Accidents",
x = "Work Accident (0 = No, 1 = Yes)",
y = "Number of Employees",
fill = "Left (0 = Stayed, 1 = Left)"
) +
theme_minimal()
