library(readr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
plot_data <- mtcars %>%
mutate(Transmision = as.factor(ifelse(am == 0 , 'Automatic' , 'Manual')))
plot_ly(plot_data ,
x = ~Transmision ,
y = ~mpg ,
type = 'box')
hr1 <- hr %>%
mutate(employee_status = ifelse(left == 1, "Left", "Active"))
#1a.
t.test(hr1$average_montly_hours ~ hr1$employee_status)
##
## Welch Two Sample t-test
##
## data: hr1$average_montly_hours by hr1$employee_status
## t = -7.5323, df = 4875.1, p-value = 5.907e-14
## alternative hypothesis: true difference in means between group Active and group Left is not equal to 0
## 95 percent confidence interval:
## -10.534631 -6.183384
## sample estimates:
## mean in group Active mean in group Left
## 199.0602 207.4192
#1b.
#There is a significant difference between means, where employees that
#left work at least 6 hours more.
#1c. #Employees that left, on average work more hours, at least 3% more
#1d.
plot_ly(hr1 ,
x = ~employee_status ,
y = ~average_montly_hours ,
type = 'box',
color = ~employee_status,
colors= c('#29a21a','blue')
) %>%
layout(title = 'employees that left on average, work more hours, at least 3% more',
yaxis = list(title = 'Average Monthly Hours', range = c(0,350)),
xaxis = list(title = 'employee status'))
hr$left <- as.factor(hr$left)
hr$promotion_last_5years <- as.factor(hr$promotion_last_5years)
hr$Department <- as.factor(hr$Department)
#2a.
t_test1 <- t.test(hr1$average_montly_hours ~ hr1$promotion_last_5years)
#2b.
#The p-value in this t-test tells us the probability of observing the
difference #in average monthly hours between employees who were promoted
and those who were not
#2c. #Promoted employees work a significantly different number of hours on #average each month compared to those who weren’t promoted
#2d.
plot_ly(hr,
x = ~promotion_last_5years,
y = ~average_montly_hours,
type = 'box',
color = ~promotion_last_5years,
colors = c('#D46A6A', '#4DAF7C') # Using random colors
) %>%
layout(title = ifelse(t_test1$p.value < 0.05,
'Promoted employees work a different number of hours per month',
'No difference in hours worked between promoted and non-promoted employees'),
yaxis = list(title = 'Average Monthly Hours', range = c(0, 350)),
xaxis = list(title = 'Promotion Status (0 = Not Promoted, 1 = Promoted)'))
#3a.
t_test2 <- t.test(hr1$satisfaction_level ~ hr1$left)
#3b.
#The p-value in this t-test measures the probability of observing a
difference #in satisfaction levels between employees who left and those
who stayed
#3c. #Employees who left the company had a significantly different level of #satisfaction compared to those who stayed. This suggests that satisfaction #levels may impact whether an employee decides to leave
#3d.
plot_ly(hr,
x = ~left,
y = ~satisfaction_level,
type = 'box',
color = ~left,
colors = c('#F1C40F', '#2980B9') # Using random colors
) %>%
layout(title = ifelse(t_test2$p.value < 0.05,
'Employees who left had different satisfaction levels than those who stayed',
'No difference in satisfaction levels between employees who left and those who stayed'),
yaxis = list(title = 'Satisfaction Level', range = c(0, 1)),
xaxis = list(title = 'Employee Status (0 = Stayed, 1 = Left)'))
#4a.
t_test3 <- t.test(hr1$last_evaluation[hr$Department %in% c("sales", "technical")] ~ hr1$Department[hr1$Department %in% c("sales", "technical")])
#4b.
#The p-value from this t-test tells us the probability of observing a
#difference in last evaluation scores between the sales and technical
departments
#4c. #Employees in the sales department have different evaluation scores on average #than those in the technical department. This suggests that department may influence evaluation outcomes
#4d.
plot_ly(hr1,
x = ~Department,
y = ~last_evaluation,
type = 'box',
color = ~Department,
colors = c('#FF5733', '#1F618D') # Using random colors
) %>%
layout(title = ifelse(t_test3$p.value < 0.05,
'Evaluation scores differ between sales and technical departments',
'No difference in evaluation scores between sales and technical departments'),
yaxis = list(title = 'Last Evaluation Score', range = c(0, 1)),
xaxis = list(title = 'Department'))