library(readr)
library(plotly)
library(dplyr)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

First Correlation: Satisfaction level and last evaluation

Part 1

cor.test(hr$satisfaction_level , hr$last_evaluation)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08916727 0.12082195
## sample estimates:
##       cor 
## 0.1050212

Part 2

The p-value is very small (2.2e-16) meaning the correlation between satisfaction level the last evaluation is significant.
The correlation is positive and small (0.105).

Part 3

Employees who have a higher performance evaluation have a higher satisfaction level.

Part 4

avg1 <- hr %>%
  mutate(satisfaction_bin = round(satisfaction_level, 2)) %>%
  group_by(satisfaction_bin) %>%
  summarise(mean_eval = mean(last_evaluation))

fit1 <- lm(mean_eval ~ satisfaction_bin, data = avg1)

plot_ly() %>%
  add_markers(data = avg1,
              x = ~satisfaction_bin, y = ~mean_eval,
              marker = list(size = 5, color = 'forestgreen'),
              name = "Averages") %>%
  add_lines(x = avg1$satisfaction_bin,
            y = fitted(fit1),
            line = list(color = 'red', width = 1),
            name = "Trendline") %>%
  layout(title = "Happier employees tend to perform slightly better",
         xaxis = list(title = "Average Satisfaction Level"),
         yaxis = list(title = "Average Evaluation Score"))

Second Correlation: Satisfaction Level and average monthly hours

Part 1

cor.test(hr$satisfaction_level, hr$average_montly_hours)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$average_montly_hours
## t = -2.4556, df = 14997, p-value = 0.01408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.036040356 -0.004045605
## sample estimates:
##         cor 
## -0.02004811

Part 2

The p-value is significantly small (0.014) and has negative correlation (-0.02).

Part 3

Those who work longer hours show less satisfaction level. People do not want to work more and get burnt out.

Part 4

avg2 <- hr %>%
  mutate(satisfaction_bin = round(satisfaction_level, 2)) %>%
  group_by(satisfaction_bin) %>%
  summarise(mean_hours = mean(average_montly_hours))

fit2 <- lm(mean_hours ~ satisfaction_bin, data = avg2)

plot_ly() %>%
  add_markers(data = avg2,
              x = ~satisfaction_bin, y = ~mean_hours,
              marker = list(size = 5, color = 'forestgreen'),
              name = "Averages") %>%
  add_lines(x = avg2$satisfaction_bin,
            y = fitted(fit2),
            line = list(color = 'red', width = 1),
            name = "Trendline") %>%
  layout(title = "Employees who work longer hours are slightly less satisfied",
         xaxis = list(title = "Average Satisfaction Level"),
         yaxis = list(title = "Average Monthly Hours"))

Third Correlation: Last evaluation and average monthly hours

Part 1

cor.test(hr$last_evaluation, hr$average_montly_hours)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$last_evaluation and hr$average_montly_hours
## t = 44.237, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3255078 0.3538218
## sample estimates:
##       cor 
## 0.3397418

Part 2

There is a significantly small p-value (2.2e-16) and moderately small correlation (0.34).

Part 3

Those who work more hours tend to have a higher evaluation score.

Part 4

avg3 <- hr %>%
  mutate(eval_bin = round(last_evaluation, 2)) %>%
  group_by(eval_bin) %>%
  summarise(mean_hours = mean(average_montly_hours))

fit3 <- lm(mean_hours ~ eval_bin, data = avg3)

plot_ly() %>%
  add_markers(data = avg3,
              x = ~eval_bin, y = ~mean_hours,
              marker = list(size = 5, color = 'forestgreen'),
              name = "Averages") %>%
  add_lines(x = avg3$eval_bin,
            y = fitted(fit3),
            line = list(color = 'red', width = 1),
            name = "Trendline") %>%
  layout(title = "Higher-performing employees work more hours on average",
         xaxis = list(title = "Average Evaluation Score"),
         yaxis = list(title = "Average Monthly Hours"))

Fourth Correlation: Time spent in the company and number of projects

Part 1

cor.test(hr$time_spend_company, hr$number_project)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$time_spend_company and hr$number_project
## t = 24.579, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1813532 0.2121217
## sample estimates:
##       cor 
## 0.1967859

Part 2

There is a significantly small p-value (2.2e-16) with a small positive correlation (0.197).

Part 3

Employees who have been working more with the company get more projects.

Part 4

avg4 <- hr %>%
  group_by(time_spend_company) %>%
  summarise(mean_projects = mean(number_project))

fit4 <- lm(mean_projects ~ time_spend_company, data = avg4)

plot_ly() %>%
  add_markers(data = avg4,
              x = ~time_spend_company, y = ~mean_projects,
              marker = list(size = 5, color = 'forestgreen'),
              name = "Averages") %>%
  add_lines(x = avg4$time_spend_company,
            y = fitted(fit4),
            line = list(color = 'red', width = 1),
            name = "Trendline") %>%
  layout(title = "Employees with longer tenure handle more projects",
         xaxis = list(title = "Years at Company"),
         yaxis = list(title = "Average Number of Projects"))