Data Preparation
# Generate synthetic employee dataset
employee_data <- tibble(
employee_id = 1:200,
department = sample(c("Sales", "Marketing", "IT", "HR", "Finance"), 200, replace = TRUE),
hire_date = sample(seq(as.Date('2010/01/01'), as.Date('2023/12/31'), by="day"), 200),
performance_score = runif(200, 60, 100),
salary = round(runif(200, 40000, 120000), 0),
age = round(runif(200, 22, 60), 0),
years_experience = round(runif(200, 1, 20), 0),
left_company = sample(c(TRUE, FALSE), 200, prob = c(0.2, 0.8), replace = TRUE)
)
Descriptive Statistics
# Summary statistics
employee_summary <- employee_data %>%
group_by(department) %>%
summarise(
avg_performance = mean(performance_score),
avg_salary = mean(salary),
retention_rate = mean(!left_company) * 100,
total_employees = n()
) %>%
arrange(desc(avg_performance))
# Display summary table
employee_summary %>%
kable("html", caption = "Department Performance Overview") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Department Performance Overview
|
department
|
avg_performance
|
avg_salary
|
retention_rate
|
total_employees
|
|
Sales
|
80.38183
|
83703.80
|
80.43478
|
46
|
|
HR
|
79.56821
|
78968.09
|
91.17647
|
34
|
|
Finance
|
79.36061
|
75825.72
|
69.76744
|
43
|
|
Marketing
|
78.97990
|
82973.00
|
82.50000
|
40
|
|
IT
|
77.33365
|
80541.57
|
75.67568
|
37
|
Retention Prediction Model
# Logistic regression for predicting employee turnover
retention_model <- glm(
left_company ~ performance_score + salary + years_experience + age,
data = employee_data,
family = binomial()
)
# Model summary
summary(retention_model)
##
## Call:
## glm(formula = left_company ~ performance_score + salary + years_experience +
## age, family = binomial(), data = employee_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.562e+00 1.583e+00 -0.987 0.324
## performance_score 2.870e-04 1.551e-02 0.019 0.985
## salary 6.027e-06 8.064e-06 0.747 0.455
## years_experience 9.752e-03 3.399e-02 0.287 0.774
## age -1.025e-02 1.584e-02 -0.647 0.518
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 202.90 on 199 degrees of freedom
## Residual deviance: 201.83 on 195 degrees of freedom
## AIC: 211.83
##
## Number of Fisher Scoring iterations: 4
Key Insights
- Department performance varies significantly
- Performance score appears to correlate with retention
- Salary and years of experience impact turnover probability
Conclusion
This analysis provides insights into employee performance and
potential factors influencing retention.
# Export summary to CSV
write_csv(employee_summary, "department_performance_summary.csv")