library(readr)

hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Perform correlation test between satisfaction_level and last_evaluation

cor_test_result <- cor.test(hr$satisfaction_level, hr$last_evaluation)

Display the results

print(cor_test_result)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08916727 0.12082195
## sample estimates:
##       cor 
## 0.1050212

The p-value suggests that the correlation is statistically significant, meaning it’s unlikely to have occurred by chance. The correlation coefficient indicates a weak positive relationship, which means there’s a slight connection between satisfaction and evaluations. In simpler terms, while employees who are more satisfied may receive better evaluations, the connection is not very strong, suggesting that other factors could also be affecting the evaluation results.

library(ggplot2)

set.seed(123)  # For reproducibility
sampled_hr <- hr[sample(nrow(hr), 100), ]  # Adjust 100 to the desired number of points

ggplot(sampled_hr, aes(x = satisfaction_level, y = last_evaluation)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Relationship Between Satisfaction Level and Last Evaluation (Sampled)",
       x = "Satisfaction Level",
       y = "Last Evaluation")
## `geom_smooth()` using formula = 'y ~ x'