Assignment 9

library(readr)
library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

## Rows: 14999 Columns: 10

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Test 1

chisq.test(hr$promotion_last_5years , hr$left)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$promotion_last_5years and hr$left
## X-squared = 56.262, df = 1, p-value = 6.344e-14

p-value interpretation: p-value is really small so the probability of the results being random is very low

chi-square interpretation: There is a dependency in a promotion in the last 5 years and leaving

non-technical interpretation: Employees that did not get a promotion are 4 times more likely to leave

prop_data <- hr %>%
  mutate(promotion_last_5years = as.factor(promotion_last_5years)) %>%
  group_by(promotion_last_5years) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left = sum(left == 1) / n()
  )

plot_ly(prop_data) %>%
  add_bars(x = ~promotion_last_5years, y = ~stayed, name = "stayed", 
           marker = list(color = "#1f77b4")) %>%
  add_bars(x = ~promotion_last_5years, y = ~left, name = "Left", 
           marker = list(color = "#ff7f0e")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Promotion in the last 5 years"),
    yaxis = list(title = "Proportion", tickformat = ",.0%"),
    title = "Employees that did not get a promotion are 4 times more likely to leave"
  )

Test 2

chisq.test(hr$Work_accident , hr$left)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$Work_accident and hr$left
## X-squared = 357.56, df = 1, p-value < 2.2e-16

p-value interpretation: p-value is very small so this test is statistically significant, there is a correlation

chi-square interpretation: There is a dependency on a work accident and if they left the company

non-technical interpretation: Employees that had a work accident are more likely to stay

prop_data <- hr %>%
  mutate(Work_accident = as.factor(Work_accident)) %>%
  group_by(Work_accident) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left = sum(left == 1) / n()
  )

plot_ly(prop_data) %>%
  add_bars(x = ~Work_accident, y = ~stayed, name = "stayed", 
           marker = list(color = "#ebd409")) %>%
  add_bars(x = ~Work_accident, y = ~left, name = "Left", 
           marker = list(color = "#039605")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Work Accident"),
    yaxis = list(title = "Proportion", tickformat = ",.0%"),
    title = "Employees that did not have a work accident are 3 times more likely to stay"
  )

Test 3

chisq.test(hr$salary , hr$left)

## 
##  Pearson's Chi-squared test
## 
## data:  hr$salary and hr$left
## X-squared = 381.23, df = 2, p-value < 2.2e-16

p-value interpretation: The p-value is very small so the likelihood of the results being random is very low

chi-square interpretation: There is a dependency on salary and if employees left

non-technical interpretation: Employees that have a low salary are 4 times more likey to leave than high salary

prop_data <- hr %>%
  mutate(salary = as.factor(salary)) %>%
  group_by(salary) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left = sum(left == 1) / n()
  )

plot_ly(prop_data) %>%
  add_bars(x = ~salary, y = ~stayed, name = "stayed", 
           marker = list(color = "#940396")) %>%
  add_bars(x = ~salary, y = ~left, name = "Left", 
           marker = list(color = "#00d6f7")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Salary"),
    yaxis = list(title = "Proportion", tickformat = ",.0%"),
    title = "Employees that have a low salary are 4 times more 
    likey to leave than high salary"
  )

Test 4

chisq.test(hr$Department , hr$left)

## 
##  Pearson's Chi-squared test
## 
## data:  hr$Department and hr$left
## X-squared = 86.825, df = 9, p-value = 7.042e-15

p-value interpretation: p-value is really small so the probability of the results being random is very low

chi-square interpretation: There is a dependency in deartment and leaving

non-technical interpretation: Employees that are within the management and RandD departments are more likely to stay with the company over time, while those in the HR department are most likely to leave.

prop_data <- hr %>%
  mutate(Department = as.factor(Department)) %>%
  group_by(Department) %>%
  summarise(
    stayed = sum(left == 0) / n(),
    left = sum(left == 1) / n()
  )

plot_ly(prop_data) %>%
  add_bars(x = ~Department, y = ~stayed, name = "stayed", 
           marker = list(color = "#b50300")) %>%
  add_bars(x = ~Department, y = ~left, name = "Left", 
           marker = list(color = "#fa2fd8")) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Department"),
    yaxis = list(title = "Proportion", tickformat = ",.0%"),
    title = "Employees in Management and RandD are more likely 
    to stay, while those in the HR are most likely to leave"
  )

Assignment 9

Billy Streek

2024-11-19

Test 1

p-value interpretation: p-value is really small so the probability of the results being random is very low

chi-square interpretation: There is a dependency in a promotion in the last 5 years and leaving

non-technical interpretation: Employees that did not get a promotion are 4 times more likely to leave

Test 2

p-value interpretation: p-value is very small so this test is statistically significant, there is a correlation

chi-square interpretation: There is a dependency on a work accident and if they left the company

non-technical interpretation: Employees that had a work accident are more likely to stay

Test 3

p-value interpretation: The p-value is very small so the likelihood of the results being random is very low

chi-square interpretation: There is a dependency on salary and if employees left

non-technical interpretation: Employees that have a low salary are 4 times more likey to leave than high salary

Test 4

p-value interpretation: p-value is really small so the probability of the results being random is very low

chi-square interpretation: There is a dependency in deartment and leaving

non-technical interpretation: Employees that are within the management and RandD departments are more likely to stay with the company over time, while those in the HR department are most likely to leave.