library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t_test_satisfaction <- t.test(hr$satisfaction_level ~ hr$left)
t_test_satisfaction
##
## Welch Two Sample t-test
##
## data: hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1
## 0.6668096 0.4400980
1.Since the p-value is extremely low, the difference in satisfaction levels between employees who leftand those who stayed is statistically significant. 2. The results show employees with lower satisfaction levels were more likely to leave the company.
plot_data <- hr %>%
mutate(Attrition = as.factor(ifelse(left == 1, 'Left', 'Stayed')))
plot_ly(plot_data, x = ~Attrition, y = ~satisfaction_level, type = 'box') %>%
layout(title = "Employees with lower satisfaction levels are more likely to leave")
#2
t_test_hours <- t.test(hr$average_montly_hours ~ hr$left)
t_test_hours
##
## Welch Two Sample t-test
##
## data: hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -10.534631 -6.183384
## sample estimates:
## mean in group 0 mean in group 1
## 199.0602 207.4192
1.Since the p-value is much lower than 0.05,there is a statistically significant difference in average monthly hours between employees who left and those who stayed.
2.The results show employees with higher average monthly hours were more likely to leave the company
plot_data <- hr %>%
mutate(Attrition = as.factor(ifelse(left == 1, 'Left', 'Stayed')))
plot_ly(plot_data, x = ~Attrition, y = ~average_montly_hours, type = 'box') %>%
layout(title = "Employees with higher average monthly hours are more likely to leave")
#3
t_test_evaluation <- t.test(hr$last_evaluation ~ hr$left)
t_test_evaluation
##
## Welch Two Sample t-test
##
## data: hr$last_evaluation by hr$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.009772224 0.004493874
## sample estimates:
## mean in group 0 mean in group 1
## 0.7154734 0.7181126
1.The p-value is greater than 0.05, meaning there is no statistically significant difference in last evaluation scores between employees who left and those who stayed.
2.The results show evaluation scores do not appear to be a reason in whether an employee stays or leaves.
plot_data <- hr %>%
mutate(Attrition = as.factor(ifelse(left == 1, 'Left', 'Stayed')))
plot_ly(plot_data, x = ~Attrition, y = ~last_evaluation, type = 'box') %>%
layout(title = "Evaluation scores do not seem to impact employee attrition")
#4
t_test_time_spent <- t.test(hr$time_spend_company ~ hr$left)
t_test_time_spent
##
## Welch Two Sample t-test
##
## data: hr$time_spend_company by hr$left
## t = -22.631, df = 9625.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.5394767 -0.4534706
## sample estimates:
## mean in group 0 mean in group 1
## 3.380032 3.876505
Since the p-value is extremely low, we conclude that the difference in time spent at the company between employees who left and those who stayed is statistically significant.
The results show employees who have been at the company for more years are more likely to leave.
plot_data <- hr %>%
mutate(Attrition = as.factor(ifelse(left == 1, 'Left', 'Stayed')))
plot_ly(plot_data, x = ~Attrition, y = ~time_spend_company, type = 'box') %>%
layout(title = "Employees with longer tenure are more likely to leave")