Justin Kaplan

Assignment 9

library(readr)
library(ggplot2)
library(dplyr)
library(vcd)
library(plotly)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

Chi-Test Number 1

# Establish the proportion
One <- hr %>%
  group_by(salary) %>%
  summarise(
    Stayed = sum(left == 0) / n(),
    Gone = sum(left == 1) / n() 
  )
print(One)
## # A tibble: 3 × 3
##   salary Stayed   Gone
##   <chr>   <dbl>  <dbl>
## 1 high    0.934 0.0663
## 2 low     0.703 0.297 
## 3 medium  0.796 0.204
# Perform chi-square test
Chisq1 <- chisq.test(One$salary,One$Gone)
## Warning in chisq.test(One$salary, One$Gone): Chi-squared approximation may be
## incorrect
print(Chisq1)
## 
##  Pearson's Chi-squared test
## 
## data:  One$salary and One$Gone
## X-squared = 6, df = 4, p-value = 0.1991

P-Value Interpretation: A p-value of 0.19 means that there is a small probability that these results are random and are not completely relevant.

Ch-square test interpretation:

Non-Technical Interpretation: The higher that an employees salary is, the less likely they are to leave a company

plot_ly(One) %>%
  add_bars(x = ~salary, y = ~Gone, name = "Gone", 
           marker = list(color = "yellow")) %>%
  add_bars(x = ~salary, y = ~Stayed, name = "Stayed", 
           marker = list(color = "purple")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Salary Level"),
    yaxis = list(title = "Proportion", tickformat = ",.0%"),
    title = "Employee Attrition by Salary Level"
  )

Chi-Test Number 2

Two <- hr %>%
  group_by(Department) %>%
  summarise(
    Stayed = sum(left == 0) / n(),
    Gone = sum(left == 1) / n() 
  )
print(Two)
## # A tibble: 10 × 3
##    Department  Stayed  Gone
##    <chr>        <dbl> <dbl>
##  1 IT           0.778 0.222
##  2 RandD        0.846 0.154
##  3 accounting   0.734 0.266
##  4 hr           0.709 0.291
##  5 management   0.856 0.144
##  6 marketing    0.763 0.237
##  7 product_mng  0.780 0.220
##  8 sales        0.755 0.245
##  9 support      0.751 0.249
## 10 technical    0.744 0.256
Chisq2 <- chisq.test(Two$Department,Two$Gone)
## Warning in chisq.test(Two$Department, Two$Gone): Chi-squared approximation may
## be incorrect
print(Chisq2)
## 
##  Pearson's Chi-squared test
## 
## data:  Two$Department and Two$Gone
## X-squared = 90, df = 81, p-value = 0.2313

P-Value Interpretation: The p-value of the Chi-Squared test being .23 means that there is a small chance that the data would change given more results

Chi-Square Test Interpretation:There is not a strong correlation between department and employee attrition

Non-Technical Interpretation: All of the departments have relatively thr

plot_ly(Two) %>%
  add_bars(x = Two$Department, y = ~Gone, name = "Gone", 
           marker = list(color = "red")) %>%
  add_bars(x = Two$Department, y = ~Stayed, name = "Stayed", 
           marker = list(color = "black")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
    title = "Employee Attrition by Department"
  )

Chi-Test Number 3

Three <- hr %>%
  group_by(hr$number_project) %>%
  summarise(
    Stayed = sum(left == 0) / n(),
    Gone = sum(left == 1) / n() 
  )
Chisq3 <- chisq.test(Three$`hr$number_project`,Three$Stayed)
## Warning in chisq.test(Three$`hr$number_project`, Three$Stayed): Chi-squared
## approximation may be incorrect
print(Chisq3)
## 
##  Pearson's Chi-squared test
## 
## data:  Three$`hr$number_project` and Three$Stayed
## X-squared = 30, df = 25, p-value = 0.2243

P-Value Interpretation:The p-value being .224 means that there is a low chance these results are random and that they would not change

Chi-Square Test Interpretation: There is a correlation between number of projects and attrition

Non-Technical Interpretation:The more projects that employees are working on, the more likely they are to leave, or even not enough work can cause somebody to leave

plot_ly(Three) %>%
  add_bars(x = Three$`hr$number_project`, y = ~Gone, name = "Gone", 
           marker = list(color = "blue")) %>%
  add_bars(x = Three$`hr$number_project`, y = ~Stayed, name = "Stayed", 
           marker = list(color = "green")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
    title = "Employee Attrition by Department"
  )

Chi-Test Number 4

Four <- hr %>%
  group_by(hr$time_spend_company) %>%
  summarise(
    Stayed = sum(left == 0) / n(),
    Gone = sum(left == 1) / n() 
  )
Chisq4 <- chisq.test(Four$`hr$time_spend_company`,Four$Stayed)
## Warning in chisq.test(Four$`hr$time_spend_company`, Four$Stayed): Chi-squared
## approximation may be incorrect
print(Chisq4)
## 
##  Pearson's Chi-squared test
## 
## data:  Four$`hr$time_spend_company` and Four$Stayed
## X-squared = 40, df = 35, p-value = 0.2578

P-Value Interpretation: The P-Value being 0.25 means that there is a low chance that these results are random

Chi-Square Test Interpretation: There is a correlation between time spent with the company and attrition

Non-Technical Interpretation:Employees are most likely to leave the company between 4-6 years

plot_ly(Four) %>%
  add_bars(x = Four$`hr$time_spend_company`, y = ~Gone, name = "Gone", 
           marker = list(color = "cyan")) %>%
  add_bars(x = Four$`hr$time_spend_company`, y = ~Stayed, name = "Stayed", 
           marker = list(color = "pink")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Years Spent With Company"),
    yaxis = list(title = "Percentage of Employees", tickformat = ",.0%"),
    title = "Employee Attrition by Department"
  )