Load Data & Libraries

library(readr)
library(ggplot2)

# Read dataset
hr <- read_csv('https://raw.githubusercontent.com/aiplanethub/Datasets/refs/heads/master/HR_comma_sep.csv')

# Display first few rows without printing full tibble structure
print(head(hr), row.names = FALSE)

## # A tibble: 6 × 10
##   satisfaction_level last_evaluation number_project average_montly_hours
##                <dbl>           <dbl>          <dbl>                <dbl>
## 1               0.38            0.53              2                  157
## 2               0.8             0.86              5                  262
## 3               0.11            0.88              7                  272
## 4               0.72            0.87              5                  223
## 5               0.37            0.52              2                  159
## 6               0.41            0.5               2                  153
## # ℹ 6 more variables: time_spend_company <dbl>, Work_accident <dbl>,
## #   left <dbl>, promotion_last_5years <dbl>, Department <chr>, salary <chr>

Correlation 1: Satisfaction Level vs. Last Evaluation

cor_test1 <- cor.test(hr$satisfaction_level, hr$last_evaluation)
cor_test1

## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$last_evaluation
## t = 12.933, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08916727 0.12082195
## sample estimates:
##       cor 
## 0.1050212

Technical Interpretation:

Correlation coefficient: 0.1050212
P-value: 4.7043116^{-38}
Since the p-value is less than 0.05 (significant), the correlation is statistically significant.

Non-Technical Interpretation:

Higher satisfaction may or may not be linked to better performance evaluations.

Visualization

ggplot(hr, aes(x = last_evaluation, y = satisfaction_level)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "red", size = 1.2) +
  theme_minimal() +
  labs(title = "Higher Evaluations May Not Mean Higher Satisfaction",
       x = "Last Evaluation Score",
       y = "Satisfaction Level")

Correlation 2: Average Monthly Hours vs. Time Spent at the Company

cor_test2 <- cor.test(hr$average_montly_hours, hr$time_spend_company)
cor_test2

## 
##  Pearson's product-moment correlation
## 
## data:  hr$average_montly_hours and hr$time_spend_company
## t = 15.774, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1119801 0.1434654
## sample estimates:
##       cor 
## 0.1277549

Technical Interpretation:

Correlation coefficient: 0.1277549
P-value: 1.306156^{-55}

Non-Technical Interpretation:

Employees who have been at the company longer tend to work more hours per month.

Visualization

ggplot(hr, aes(x = time_spend_company, y = average_montly_hours)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "blue", size = 1.2) +
  theme_minimal() +
  labs(title = "Longer Tenure, More Monthly Hours?",
       x = "Years at Company",
       y = "Average Monthly Hours")

Correlation 3: Number of Projects vs. Last Evaluation

cor_test3 <- cor.test(hr$number_project, hr$last_evaluation)
cor_test3

## 
##  Pearson's product-moment correlation
## 
## data:  hr$number_project and hr$last_evaluation
## t = 45.656, df = 14997, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3352028 0.3633053
## sample estimates:
##       cor 
## 0.3493326

Technical Interpretation:

Correlation coefficient: 0.3493326
P-value: 0

Non-Technical Interpretation:

Employees with more projects might receive higher performance evaluations.

Visualization

ggplot(hr, aes(x = number_project, y = last_evaluation)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "green", size = 1.2) +
  theme_minimal() +
  labs(title = "More Projects, Higher Evaluation?",
       x = "Number of Projects",
       y = "Last Evaluation Score")

Correlation 4: Satisfaction Level vs. Average Monthly Hours

cor_test4 <- cor.test(hr$satisfaction_level, hr$average_montly_hours)
cor_test4

## 
##  Pearson's product-moment correlation
## 
## data:  hr$satisfaction_level and hr$average_montly_hours
## t = -2.4556, df = 14997, p-value = 0.01408
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.036040356 -0.004045605
## sample estimates:
##         cor 
## -0.02004811

Technical Interpretation:

Correlation coefficient: -0.0200481
P-value: 0.014075

Non-Technical Interpretation:

Employees who work more hours per month might feel less satisfied with their job.

Visualization

ggplot(hr, aes(x = average_montly_hours, y = satisfaction_level)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "purple", size = 1.2) +
  theme_minimal() +
  labs(title = "More Work Hours, Lower Satisfaction?",
       x = "Average Monthly Hours",
       y = "Satisfaction Level")

Assignment 7: Correlations - Employee Attrition Analysis

Amelie Stufflebeam

2025-03-20

Load Data & Libraries

Correlation 1: Satisfaction Level vs. Last Evaluation

Technical Interpretation:

Non-Technical Interpretation:

Visualization

Correlation 2: Average Monthly Hours vs. Time Spent at the Company

Technical Interpretation:

Non-Technical Interpretation:

Visualization

Correlation 3: Number of Projects vs. Last Evaluation

Technical Interpretation:

Non-Technical Interpretation:

Visualization

Correlation 4: Satisfaction Level vs. Average Monthly Hours

Technical Interpretation:

Non-Technical Interpretation:

Visualization