library(readr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t.test(hr$last_evaluation ~ hr$left)
##
## Welch Two Sample t-test
##
## data: hr$last_evaluation by hr$left
## t = -0.72534, df = 5154.9, p-value = 0.4683
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.009772224 0.004493874
## sample estimates:
## mean in group 0 mean in group 1
## 0.7154734 0.7181126
The p-value is not very small, therefore the difference between means of last evalutation by left is not significant.
Last evaluation is not a deciding factor in whether employees decide to leave.
plot_data <- hr %>%
mutate(left = as.factor(ifelse(left == 0 , 'Not Left' , 'Left')))
plot_ly(plot_data ,
x = ~left ,
y = ~last_evaluation ,
type = 'box')%>%
layout(title = "Last evaluation is not a deciding factor in whether employees decide to leave",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Last Evaluation"))
t.test(hr$average_montly_hours ~ hr$left)
##
## Welch Two Sample t-test
##
## data: hr$average_montly_hours by hr$left
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -10.534631 -6.183384
## sample estimates:
## mean in group 0 mean in group 1
## 199.0602 207.4192
The p-value is very small, therefore the difference between means of average monthly hours by left is significant.
Those who left the company and those who stayed had a significant difference in average monthly hours.
plot_data <- hr %>%
mutate(left = as.factor(ifelse(left == 0 , 'Not Left' , 'Left')))
plot_ly(plot_data ,
x = ~left ,
y = ~average_montly_hours ,
type = 'box') %>%
layout(title = "Difference in monthly hours of those who left and those who stayed",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Average Monthly Hours"))
t.test(hr$satisfaction_level ~ hr$left)
##
## Welch Two Sample t-test
##
## data: hr$satisfaction_level by hr$left
## t = 46.636, df = 5167, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.2171815 0.2362417
## sample estimates:
## mean in group 0 mean in group 1
## 0.6668096 0.4400980
The p-value is very small, therefore the difference between means of satisfaction level by left is significant.
Those who left the company and those who stayed has a significant difference in levels of job satisfaction.
plot_data <- hr %>%
mutate(left = as.factor(ifelse(left == 0 , 'Not Left' , 'Left')))
plot_ly(plot_data ,
x = ~left ,
y = ~satisfaction_level ,
type = 'box') %>%
layout(title = "Difference in satisfaction levels of those who left and those who stayed",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Satisfaction Level"))
t.test(hr$time_spend_company ~ hr$left)
##
## Welch Two Sample t-test
##
## data: hr$time_spend_company by hr$left
## t = -22.631, df = 9625.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.5394767 -0.4534706
## sample estimates:
## mean in group 0 mean in group 1
## 3.380032 3.876505
The p-value is very small, therefore the difference between means of time spend company by left is significant.
Those employees who left the company and those who stayed had significance differences in lengths of employment.
plot_data <- hr %>%
mutate(left = as.factor(ifelse(left == 0, 'Not Left', 'Left')))
plot_ly(plot_data,
x = ~left,
y = ~time_spend_company,
type = 'box') %>%
layout(title = "Difference in time spent at company of those who left and those who stayed",
xaxis = list(title = "Employee Status"),
yaxis = list(title = "Time Spent at Company"))