library(readr)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
### Chi test 1

chi_test <- chisq.test(hr$Department, hr$left)
print(chi_test)
## 
##  Pearson's Chi-squared test
## 
## data:  hr$Department and hr$left
## X-squared = 86.825, df = 9, p-value = 7.042e-15
prop_data <- as.data.frame(prop.table(table(hr$Department, hr$left), margin = 1))
colnames(prop_data) <- c("Department", "Left", "Proportion")

plot <- plot_ly(prop_data, 
                x = ~Department, 
                y = ~Proportion, 
                color = ~as.factor(Left), 
                type = "bar", 
                colors = c("blue", "orange")) %>%
  layout(title = "Department and employee attrition",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Proportion"),
         barmode = "stack")

plot
# Technical Interpretation:
# The p-value indicates the likelihood of observing this data if there is no relationship between Department and left.
# A small p-value (< 0.05) suggests a significant relationship between the two variables.

# Non-Technical Interpretation:
# If the p-value is small, the results suggest that an employee's department is related to their decision to leave.
# If the p-value is not small, department does not meaningfully affect employee attrition.
### Chi test 2

chi_test <- chisq.test(hr$salary, hr$left)
print(chi_test)
## 
##  Pearson's Chi-squared test
## 
## data:  hr$salary and hr$left
## X-squared = 381.23, df = 2, p-value < 2.2e-16
prop_data <- as.data.frame(prop.table(table(hr$salary, hr$left), margin = 1))
colnames(prop_data) <- c("Salary", "Left", "Proportion")

plot <- plot_ly(prop_data, 
                x = ~Salary, 
                y = ~Proportion, 
                color = ~as.factor(Left), 
                type = "bar", 
                colors = c("blue", "orange")) %>%
  layout(title = "Salary level and employee attrition",
         xaxis = list(title = "Salary Level"),
         yaxis = list(title = "Proportion"),
         barmode = "stack")

plot
# Technical Interpretation:
# The p-value indicates the likelihood of observing this data if there is no relationship between salary and left.
# A small p-value (< 0.05) suggests a significant relationship between the two variables.

# Non-Technical Interpretation:
# If the p-value is small, the results suggest that an employee's salary level is related to their decision to leave.
# If the p-value is not small, salary does not meaningfully affect employee attrition.
### CHi test 3 

chi_test <- chisq.test(hr$promotion_last_5years, hr$left)
print(chi_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$promotion_last_5years and hr$left
## X-squared = 56.262, df = 1, p-value = 6.344e-14
prop_data <- as.data.frame(prop.table(table(hr$promotion_last_5years, hr$left), margin = 1))
colnames(prop_data) <- c("Promotion_Last_5_Years", "Left", "Proportion")

plot <- plot_ly(prop_data, 
                x = ~Promotion_Last_5_Years, 
                y = ~Proportion, 
                color = ~as.factor(Left), 
                type = "bar", 
                colors = c("blue", "orange")) %>%
  layout(title = "Promotion history and employee attrition",
         xaxis = list(title = "Promotion in Last 5 Years"),
         yaxis = list(title = "Proportion"),
         barmode = "stack")

plot
# Technical Interpretation:
# The p-value indicates the likelihood of observing this data if there is no relationship between promotion history and left.
# A small p-value (< 0.05) suggests a significant relationship between the two variables.

# Non-Technical Interpretation:
# If the p-value is small, the results suggest that whether an employee was promoted in the last 5 years is related to their decision to leave.
# If the p-value is not small, promotion history does not meaningfully affect employee attrition.
### Chi test 4

chi_test <- chisq.test(hr$Work_accident, hr$left)
print(chi_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  hr$Work_accident and hr$left
## X-squared = 357.56, df = 1, p-value < 2.2e-16
prop_data <- as.data.frame(prop.table(table(hr$Work_accident, hr$left), margin = 1))
colnames(prop_data) <- c("Work_Accident", "Left", "Proportion")

plot <- plot_ly(prop_data, 
                x = ~Work_Accident, 
                y = ~Proportion, 
                color = ~as.factor(Left), 
                type = "bar", 
                colors = c("blue", "orange")) %>%
  layout(title = "Work accident history and employee attrition",
         xaxis = list(title = "Work Accident"),
         yaxis = list(title = "Proportion"),
         barmode = "stack")

plot
# Technical Interpretation:
# The p-value indicates the likelihood of observing this data if there is no relationship between work accident history and left.
# A small p-value (< 0.05) suggests a significant relationship between the two variables.

# Non-Technical Interpretation:
# If the p-value is small, the results suggest that whether an employee experienced a work accident is related to their decision to leave.
# If the p-value is not small, work accident history does not meaningfully affect employee attrition.