library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

T-Test 1: Satisfaction by Attrition Status

t_test_result1 <- t.test(hr$satisfaction_level ~ hr$left)
print(t_test_result1)
## 
##  Welch Two Sample t-test
## 
## data:  hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1 
##       0.6668096       0.4400980
plot_data <- hr %>% 
  mutate(Left = as.factor(ifelse(left == 1, 'Left', 'Stayed')))

plot_ly(plot_data, 
        x = ~Left, 
        y = ~satisfaction_level, 
        type = 'box') %>%
  layout(title = "Employees who left were generally less satisfied")
#Technical Interpretation: The p-value, 2.2e-16, is extremely small (well below 0.05), indicating a statistically significant difference in satisfaction levels between employees who left and those who stayed.

#Non-Technical Interpretation: Employees who left the company were generally less satisfied than those who stayed. This finding suggests that dissatisfaction might be a factor influencing employees to leave.

T-Test 2: Last Evaluation Score by Attrition

t_test_result2 <- t.test(hr$last_evaluation ~ hr$left)
print(t_test_result2)
## 
##  Welch Two Sample t-test
## 
## data:  hr$last_evaluation by hr$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.009772224  0.004493874
## sample estimates:
## mean in group 0 mean in group 1 
##       0.7154734       0.7181126
plot_data <- hr %>% 
  mutate(Left = as.factor(ifelse(left == 1, 'Left', 'Stayed')))

plot_ly(plot_data, 
        x = ~Left, 
        y = ~last_evaluation, 
        type = 'box') %>%
  layout(title = "Difference in last evaluation scores between employees who left and stayed")
#Technical Interpretation:The p-value of 0.4683 is above the standard significance level of 0.05, indicating that there is no statistically significant difference in last evaluation scores between employees who left and those who stayed.

#Non-Technical Interpretation: The evaluation scores of employees who left are similar to those who stayed, indicating that performance evaluation does not seem to influence whether an employee decides to leave.

T-Test 3: Average Monthly Hours by Attrition

t_test_result3 <- t.test(hr$average_montly_hours ~ hr$left)
print(t_test_result3)
## 
##  Welch Two Sample t-test
## 
## data:  hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -10.534631  -6.183384
## sample estimates:
## mean in group 0 mean in group 1 
##        199.0602        207.4192
plot_data <- hr %>% 
  mutate(Left = as.factor(ifelse(left == 1, 'Left', 'Stayed')))

plot_ly(plot_data, 
        x = ~Left, 
        y = ~average_montly_hours, 
        type = 'box') %>%
  layout(title = "Average monthly hours differ between employees who left and stayed")
#Technical Interpretation: The p-value of 5.907e-14 is extremely small (well below 0.05), indicating a statistically significant difference in average monthly hours between employees who left and those who stayed.

#Non-Technical Interpretation: Employees who left the company tended to work more hours each month than those who stayed, which might indicate that overworking could lead to higher turnover.

T-Test 4: Number of Projects by Attrition

t_test_result4 <- t.test(hr$number_project ~ hr$left)
print(t_test_result4)
## 
##  Welch Two Sample t-test
## 
## data:  hr$number_project by hr$left
## t = -2.1663, df = 4236.5, p-value = 0.03034
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.131136535 -0.006540119
## sample estimates:
## mean in group 0 mean in group 1 
##        3.786664        3.855503
plot_data <- hr %>% 
  mutate(Left = as.factor(ifelse(left == 1, 'Left', 'Stayed')))

plot_ly(plot_data, 
        x = ~Left, 
        y = ~number_project, 
        type = 'box') %>%
  layout(title = "Difference in number of projects between employees who left and stayed")
#Technical Interpretation:  The p-value of 0.03034 is below the significance level of 0.05, indicating a statistically significant difference in the number of projects between employees who left and those who stayed.

#Non-Technical Interpretation: Employees who left the company generally handled more projects than those who stayed, which might suggest that a heavy workload could contribute to turnover.