Justin Kaplan
Assignment 9
library(readr)
library(ggplot2)
library(dplyr)
library(vcd)
library(plotly)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
Chi-Test Number 1
# Establish the proportion
One <- hr %>%
group_by(salary) %>%
summarise(
Stayed = sum(left == 0) / n(),
Gone = sum(left == 1) / n()
)
print(One)
## # A tibble: 3 × 3
## salary Stayed Gone
## <chr> <dbl> <dbl>
## 1 high 0.934 0.0663
## 2 low 0.703 0.297
## 3 medium 0.796 0.204
# Perform chi-square test
Chisq1 <- chisq.test(One$salary,One$Gone)
## Warning in chisq.test(One$salary, One$Gone): Chi-squared approximation may be
## incorrect
print(Chisq1)
##
## Pearson's Chi-squared test
##
## data: One$salary and One$Gone
## X-squared = 6, df = 4, p-value = 0.1991
P-Value Interpretation: A p-value of 0.19 means that there is a
small probability that these results are random and are not completely
relevant.
Ch-square test interpretation:
Non-Technical Interpretation: The higher that an employees salary
is, the less likely they are to leave a company
plot_ly(One) %>%
add_bars(x = ~salary, y = ~Gone, name = "Gone",
marker = list(color = "yellow")) %>%
add_bars(x = ~salary, y = ~Stayed, name = "Stayed",
marker = list(color = "purple")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Salary Level"),
yaxis = list(title = "Proportion", tickformat = ",.0%"),
title = "Employee Attrition by Salary Level"
)
Chi-Test Number 2
Two <- hr %>%
group_by(Department) %>%
summarise(
Stayed = sum(left == 0) / n(),
Gone = sum(left == 1) / n()
)
print(Two)
## # A tibble: 10 × 3
## Department Stayed Gone
## <chr> <dbl> <dbl>
## 1 IT 0.778 0.222
## 2 RandD 0.846 0.154
## 3 accounting 0.734 0.266
## 4 hr 0.709 0.291
## 5 management 0.856 0.144
## 6 marketing 0.763 0.237
## 7 product_mng 0.780 0.220
## 8 sales 0.755 0.245
## 9 support 0.751 0.249
## 10 technical 0.744 0.256
Chisq2 <- chisq.test(Two$Department,Two$Gone)
## Warning in chisq.test(Two$Department, Two$Gone): Chi-squared approximation may
## be incorrect
print(Chisq2)
##
## Pearson's Chi-squared test
##
## data: Two$Department and Two$Gone
## X-squared = 90, df = 81, p-value = 0.2313
P-Value Interpretation: The p-value of the Chi-Squared test being
.23 means that there is a small chance that the data would change given
more results
Chi-Square Test Interpretation:There is not a strong correlation
between department and employee attrition
Non-Technical Interpretation: All of the departments have relatively
thr
plot_ly(Two) %>%
add_bars(x = Two$Department, y = ~Gone, name = "Gone",
marker = list(color = "red")) %>%
add_bars(x = Two$Department, y = ~Stayed, name = "Stayed",
marker = list(color = "black")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Department"),
yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
title = "Employee Attrition by Department"
)
Chi-Test Number 3
Three <- hr %>%
group_by(hr$number_project) %>%
summarise(
Stayed = sum(left == 0) / n(),
Gone = sum(left == 1) / n()
)
Chisq3 <- chisq.test(Three$`hr$number_project`,Three$Stayed)
## Warning in chisq.test(Three$`hr$number_project`, Three$Stayed): Chi-squared
## approximation may be incorrect
print(Chisq3)
##
## Pearson's Chi-squared test
##
## data: Three$`hr$number_project` and Three$Stayed
## X-squared = 30, df = 25, p-value = 0.2243
P-Value Interpretation:The p-value being .224 means that there is a
low chance these results are random and that they would not change
Chi-Square Test Interpretation: There is a correlation between
number of projects and attrition
Non-Technical Interpretation:The more projects that employees are
working on, the more likely they are to leave, or even not enough work
can cause somebody to leave
plot_ly(Three) %>%
add_bars(x = Three$`hr$number_project`, y = ~Gone, name = "Gone",
marker = list(color = "blue")) %>%
add_bars(x = Three$`hr$number_project`, y = ~Stayed, name = "Stayed",
marker = list(color = "green")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Department"),
yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
title = "Employee Attrition by Department"
)
Chi-Test Number 4
Four <- hr %>%
group_by(hr$time_spend_company) %>%
summarise(
Stayed = sum(left == 0) / n(),
Gone = sum(left == 1) / n()
)
Chisq4 <- chisq.test(Four$`hr$time_spend_company`,Four$Stayed)
## Warning in chisq.test(Four$`hr$time_spend_company`, Four$Stayed): Chi-squared
## approximation may be incorrect
print(Chisq4)
##
## Pearson's Chi-squared test
##
## data: Four$`hr$time_spend_company` and Four$Stayed
## X-squared = 40, df = 35, p-value = 0.2578
P-Value Interpretation: The P-Value being 0.25 means that there is a
low chance that these results are random
Chi-Square Test Interpretation: There is a correlation between time
spent with the company and attrition
Non-Technical Interpretation:Employees are most likely to leave the
company between 4-6 years
plot_ly(Four) %>%
add_bars(x = Four$`hr$time_spend_company`, y = ~Gone, name = "Gone",
marker = list(color = "cyan")) %>%
add_bars(x = Four$`hr$time_spend_company`, y = ~Stayed, name = "Stayed",
marker = list(color = "pink")) %>%
layout(
barmode = "stack",
xaxis = list(title = "Years Spent With Company"),
yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
title = "Employee Attrition by Department"
)