library(readr)
library(plotly)
library(dplyr)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
First Correlation: Satisfaction level and last evaluation
Part 1
cor.test(hr$satisfaction_level , hr$last_evaluation)
##
## Pearson's product-moment correlation
##
## data: hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.08916727 0.12082195
## sample estimates:
## cor
## 0.1050212
Part 2
The p-value is very small (2.2e-16) meaning the correlation between
satisfaction level the last evaluation is significant.
The correlation is positive and small (0.105).
Part 3
Employees who have a higher performance evaluation have a higher
satisfaction level.
Part 4
avg1 <- hr %>%
mutate(satisfaction_bin = round(satisfaction_level, 2)) %>%
group_by(satisfaction_bin) %>%
summarise(mean_eval = mean(last_evaluation))
fit1 <- lm(mean_eval ~ satisfaction_bin, data = avg1)
plot_ly() %>%
add_markers(data = avg1,
x = ~satisfaction_bin, y = ~mean_eval,
marker = list(size = 5, color = 'forestgreen'),
name = "Averages") %>%
add_lines(x = avg1$satisfaction_bin,
y = fitted(fit1),
line = list(color = 'red', width = 1),
name = "Trendline") %>%
layout(title = "Happier employees tend to perform slightly better",
xaxis = list(title = "Average Satisfaction Level"),
yaxis = list(title = "Average Evaluation Score"))
Second Correlation: Satisfaction Level and average monthly
hours
Part 1
cor.test(hr$satisfaction_level, hr$average_montly_hours)
##
## Pearson's product-moment correlation
##
## data: hr$satisfaction_level and hr$average_montly_hours
## t = -2.4556, df = 14997, p-value = 0.01408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.036040356 -0.004045605
## sample estimates:
## cor
## -0.02004811
Part 2
The p-value is significantly small (0.014) and has negative
correlation (-0.02).
Part 3
Those who work longer hours show less satisfaction level. People do
not want to work more and get burnt out.
Part 4
avg2 <- hr %>%
mutate(satisfaction_bin = round(satisfaction_level, 2)) %>%
group_by(satisfaction_bin) %>%
summarise(mean_hours = mean(average_montly_hours))
fit2 <- lm(mean_hours ~ satisfaction_bin, data = avg2)
plot_ly() %>%
add_markers(data = avg2,
x = ~satisfaction_bin, y = ~mean_hours,
marker = list(size = 5, color = 'forestgreen'),
name = "Averages") %>%
add_lines(x = avg2$satisfaction_bin,
y = fitted(fit2),
line = list(color = 'red', width = 1),
name = "Trendline") %>%
layout(title = "Employees who work longer hours are slightly less satisfied",
xaxis = list(title = "Average Satisfaction Level"),
yaxis = list(title = "Average Monthly Hours"))
Third Correlation: Last evaluation and average monthly hours
Part 1
cor.test(hr$last_evaluation, hr$average_montly_hours)
##
## Pearson's product-moment correlation
##
## data: hr$last_evaluation and hr$average_montly_hours
## t = 44.237, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3255078 0.3538218
## sample estimates:
## cor
## 0.3397418
Part 2
There is a significantly small p-value (2.2e-16) and moderately
small correlation (0.34).
Part 3
Those who work more hours tend to have a higher evaluation
score.
Part 4
avg3 <- hr %>%
mutate(eval_bin = round(last_evaluation, 2)) %>%
group_by(eval_bin) %>%
summarise(mean_hours = mean(average_montly_hours))
fit3 <- lm(mean_hours ~ eval_bin, data = avg3)
plot_ly() %>%
add_markers(data = avg3,
x = ~eval_bin, y = ~mean_hours,
marker = list(size = 5, color = 'forestgreen'),
name = "Averages") %>%
add_lines(x = avg3$eval_bin,
y = fitted(fit3),
line = list(color = 'red', width = 1),
name = "Trendline") %>%
layout(title = "Higher-performing employees work more hours on average",
xaxis = list(title = "Average Evaluation Score"),
yaxis = list(title = "Average Monthly Hours"))
Fourth Correlation: Time spent in the company and number of
projects
Part 1
cor.test(hr$time_spend_company, hr$number_project)
##
## Pearson's product-moment correlation
##
## data: hr$time_spend_company and hr$number_project
## t = 24.579, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1813532 0.2121217
## sample estimates:
## cor
## 0.1967859
Part 2
There is a significantly small p-value (2.2e-16) with a small
positive correlation (0.197).
Part 3
Employees who have been working more with the company get more
projects.
Part 4
avg4 <- hr %>%
group_by(time_spend_company) %>%
summarise(mean_projects = mean(number_project))
fit4 <- lm(mean_projects ~ time_spend_company, data = avg4)
plot_ly() %>%
add_markers(data = avg4,
x = ~time_spend_company, y = ~mean_projects,
marker = list(size = 5, color = 'forestgreen'),
name = "Averages") %>%
add_lines(x = avg4$time_spend_company,
y = fitted(fit4),
line = list(color = 'red', width = 1),
name = "Trendline") %>%
layout(title = "Employees with longer tenure handle more projects",
xaxis = list(title = "Years at Company"),
yaxis = list(title = "Average Number of Projects"))