library(readr)
library(ggplot2)
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')
## Rows: 14999 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Department, salary
## dbl (8): satisfaction_level, last_evaluation, number_project, average_montly...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Correlation 1: Satisfaction level vs Last evaluation

cor_test1 <- cor.test(hr$satisfaction_level, hr$last_evaluation)

print(cor_test1)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08916727 0.12082195
## sample estimates:
##       cor 
## 0.1050212

P-value interpretation: The p-value is very small (< 2.2e-16), therefore the correlation between satisfaction level and last evaluation is significant.

Correlation estimate interpretation: The correlation is positive but small, indicating a weak positive relationship between satisfaction level and last evaluation score.

Non-technical interpretation: Employees who report higher satisfaction levels generally have slightly higher evaluation scores, but the effect is modest.

ggplot(hr, aes(x = satisfaction_level, y = last_evaluation)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Higher Satisfaction, Higher Evaluation Scores",
       x = "Satisfaction Level",
       y = "Last Evaluation")
## `geom_smooth()` using formula = 'y ~ x'

Correlation 2: Time Spent at Company vs Satisfaction Level

cor_test3 <- cor.test(hr$time_spend_company, hr$satisfaction_level)
print(cor_test3)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$time_spend_company and hr$satisfaction_level
## t = -12.416, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.11668153 -0.08499948
## sample estimates:
##        cor 
## -0.1008661

P-value interpretation: The p-value is extremely small (< 2.2e-16), so the correlation between time spent at the company and satisfaction level is statistically significant.

Correlation estimate interpretation: The correlation is negative and small (-0.1009), indicating a weak inverse relationship. This suggests that as time spent at the company increases, satisfaction level tends to decrease slightly.

Non-technical interpretation: Employees who have been with the company longer tend to report slightly lower satisfaction levels, although the effect is weak.

ggplot(hr, aes(x = time_spend_company, y = satisfaction_level)) +
  geom_point(alpha = 0.5) +  # Adds points to the plot with some transparency
  geom_smooth(method = "lm", se = FALSE, color = "red") +  # Adds a linear regression line
  labs(title = "Longer Time at Company, Lower Satisfaction Level",
       x = "Time Spent at Company (Years)",
       y = "Satisfaction Level") +
  theme_minimal()  # Use a minimal theme for better aesthetics
## `geom_smooth()` using formula = 'y ~ x'

Correlation 3: Time Spent at Company vs Last Evaluation

cor_test3 <- cor.test(hr$time_spend_company, hr$last_evaluation)
print(cor_test3)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$time_spend_company and hr$last_evaluation
## t = 16.256, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1158309 0.1472844
## sample estimates:
##       cor 
## 0.1315907

P-value interpretation: The very small p-value (less than 2.2e-16) indicates a statistically significant correlation between time spent at the company and last evaluation, suggesting strong evidence of a relationship.

Correlation estimate interpretation: The correlation coefficient of 0.132 indicates a weak positive relationship, suggesting that employees with longer tenure tend to receive slightly higher evaluation scores.

Non-technical interpretation: Employees who have spent more time at the company tend to receive somewhat higher evaluations, although the connection is not very strong.

ggplot(hr, aes(x = time_spend_company, y = last_evaluation)) +
  geom_point(alpha = 0.5) +  # Add points with some transparency
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add a linear trend line
  labs(title = "Employees with Longer Tenure Tend to Have Slightly Higher Evaluations",
       x = "Time Spent at Company (Years)",
       y = "Last Evaluation Score") +
  theme_minimal()  # Use a clean theme
## `geom_smooth()` using formula = 'y ~ x'

Correlation 4: Time Spent at Company vs Number Projects

cor_test5 <- cor.test(hr$time_spend_company, hr$number_project)
print(cor_test5)
## 
##  Pearson's product-moment correlation
## 
## data:  hr$time_spend_company and hr$number_project
## t = 24.579, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1813532 0.2121217
## sample estimates:
##       cor 
## 0.1967859

P-value interpretation: The extremely small p-value (less than 2.2e-16) indicates a statistically significant correlation between time spent at the company and the number of projects, suggesting strong evidence of a relationship.

Correlation estimate interpretation: The correlation coefficient of 0.197 indicates a weak to moderate positive relationship, suggesting that employees with longer tenure tend to work on slightly more projects.

Non-technical interpretation: Employees who have been with the company longer tend to work on more projects.

ggplot(hr, aes(x = time_spend_company, y = number_project)) +
  geom_point(alpha = 0.5) +  # Add points with some transparency
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add a linear trend line
  labs(title = "Employees with Longer Tenure Tend to Work on More Projects",
       x = "Time Spent at Company (Years)",
       y = "Number of Projects") +
  theme_minimal()  # Use a clean theme
## `geom_smooth()` using formula = 'y ~ x'