Starter Code:

library(readr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
options(scipen=999)

NOTE: Left = 1, Stayed = 0

Chi-Test 1: work accident vs left

chisq.test(hr$Work_accident , hr$left)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$Work_accident and hr$left
## X-squared = 357.56, df = 1, p-value < 0.00000000000000022

P-value interpretation:

The p-value is extremely tiny, therefore the probability of these results being random is very small.

Chi-Test Interpretation:

There is a dependence between work accidents and employment status.

Non-Technical Interpretation:

Not as many employees with accidents leave as you’d think.

Plot:

q1 <- hr %>%
  group_by(left) %>%
  summarise(
    had_accident = sum(Work_accident == 1) / n(),
    no_accident = sum(Work_accident == 0) / n()
  )

plot_ly(q1) %>%
  add_bars(x = ~left, y = ~had_accident, name = "Had Work Accident", 
           marker = list(color = "purple")) %>%
  add_bars(x = ~left, y = ~no_accident, name = "No Work Accident", 
           marker = list(color = "grey")) %>%
  layout(
    barmode = "stack",
    xaxis = list(
      title = "Left the Company (1 = Yes, 0 = No)",
      tickvals = c(0, 1),
      ticktext = c("Stayed", "Left")
    ),
    yaxis = list(
      title = "Proportion",
      tickformat = ",.0%"
    ),
    title = "Not as many employees with accidents leave as you'd think"
  )

Chi-Test 2: promotion last 5 years vs left

chisq.test(hr$promotion_last_5years , hr$left)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$promotion_last_5years and hr$left
## X-squared = 56.262, df = 1, p-value = 0.00000000000006344

P-value interpretation:

The p-value is extremely small, therefore the probability of these results being random is very small.

Chi-Test Interpretation:

There is a dependence between the promotions within 5 years and employment status.

Non-Technical Interpretation:

Employees who’ve been promoted are less likely to leave.

Plot:

q2 <- hr %>%
  group_by(left) %>%
  summarise(
    had_promotion = sum(promotion_last_5years == 1) / n(),
    no_promotion = sum(promotion_last_5years == 0) / n()
  )

plot_ly(q2) %>%
  add_bars(x = ~left, y = ~had_promotion, name = "Had Promotion", 
           marker = list(color = "blue")) %>%
  add_bars(x = ~left, y = ~no_promotion, name = "No Promotion", 
           marker = list(color = "red")) %>%
  layout(
    barmode = "stack",
    xaxis = list(
      title = "Left the Company (1 = Yes, 0 = No)",
      tickvals = c(0, 1),
      ticktext = c("Stayed", "Left")
    ),
    yaxis = list(
      title = "Proportion",
      tickformat = ",.0%"
    ),
    title = "Employees who've been promoted are less likely to leave."
  )

Chi-Test 3: salary vs left

chisq.test(hr$salary , hr$left)
## 
##  Pearson's Chi-squared test
## 
## data:  hr$salary and hr$left
## X-squared = 381.23, df = 2, p-value < 0.00000000000000022

P-value interpretation:

The p-value is extremely small, therefore the probability of these results being random is very small.

Chi-Test Interpretation:

There is a dependence between the range of salary and employment status.

Non-Technical Interpretation:

Employees who made more money were more inclined to stay.

Plot:

q3 <- hr %>%
  group_by(left) %>%
  summarise(
    low_salary = sum(salary == "low") / n(),
    medium_salary = sum(salary == "medium") / n(),
    high_salary = sum(salary == "high") / n()
  )

# Create stacked bar chart
plot_ly(q3) %>%
  add_bars(x = ~left, y = ~low_salary, name = "Low Salary", 
           marker = list(color = "green")) %>%
  add_bars(x = ~left, y = ~medium_salary, name = "Medium Salary", 
           marker = list(color = "blue")) %>%
  add_bars(x = ~left, y = ~high_salary, name = "High Salary", 
           marker = list(color = "yellow")) %>%
  layout(
    barmode = "stack",
    xaxis = list(
      title = "Left the Company (1 = Yes, 0 = No)",
      tickvals = c(0, 1),
      ticktext = c("Stayed", "Left")
    ),
    yaxis = list(
      title = "Proportion",
      tickformat = ",.0%"
    ),
    title = "Employees who made more money were more inclined to stay."
  )

Chi-Test 4: department vs left

chisq.test(hr$Department , hr$left)
## 
##  Pearson's Chi-squared test
## 
## data:  hr$Department and hr$left
## X-squared = 86.825, df = 9, p-value = 0.000000000000007042
unique(hr$Department)
##  [1] "sales"       "accounting"  "hr"          "technical"   "support"    
##  [6] "management"  "IT"          "product_mng" "marketing"   "RandD"

P-value interpretation:

The p-value is teeny tiny, which means there is a significant difference between the departments and leaving or staying.

Chi-Test Interpretation:

There is a dependence between the departments and employment status.

Non-Technical Interpretation:

No difference between department and leaving.

Plot:

q4 <- hr %>%
  group_by(left) %>%
  summarise(
    sales = sum(Department == "sales") / n(),
    accounting = sum(Department == "accounting") / n(),
    hr = sum(Department == "hr") / n(),
    technical = sum(Department == "technical") / n(),
    support = sum(Department == "support") / n(),
    management = sum(Department == "management") / n(),
    IT = sum(Department == "IT") / n(),
    product_mng = sum(Department == "product_mng") / n(),
    marketing = sum(Department == "marketing") / n(),
    RandD = sum(Department == "RandD") / n()
  )

plot_ly(q4) %>%
  add_bars(x = ~left, y = ~sales, name = "Sales", 
           marker = list(color = "blue")) %>%
  add_bars(x = ~left, y = ~accounting, name = "Accounting", 
           marker = list(color = "orange")) %>%
  add_bars(x = ~left, y = ~hr, name = "HR", 
           marker = list(color = "green")) %>%
  add_bars(x = ~left, y = ~technical, name = "Technical", 
           marker = list(color = "red")) %>%
  add_bars(x = ~left, y = ~support, name = "Support", 
           marker = list(color = "magenta")) %>%
  add_bars(x = ~left, y = ~management, name = "Management", 
           marker = list(color = "brown")) %>%
  add_bars(x = ~left, y = ~IT, name = "IT", 
           marker = list(color = "pink")) %>%
  add_bars(x = ~left, y = ~product_mng, name = "Product Management", 
           marker = list(color = "gray")) %>%
  add_bars(x = ~left, y = ~marketing, name = "Marketing", 
           marker = list(color = "yellow")) %>%
  add_bars(x = ~left, y = ~RandD, name = "R&D", 
           marker = list(color = "cyan")) %>%
  layout(
    barmode = "stack",
    xaxis = list(
      title = "Left the Company (1 = Yes, 0 = No)",
      tickvals = c(0, 1),
      ticktext = c("Stayed", "Left")
    ),
    yaxis = list(
      title = "Proportion",
      tickformat = ",.0%"
    ),
    title = "No difference between department and leaving."
  )