data <- read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat1 <- data %>% select(EmployeeNumber, Department, JobRole, PerformanceRating, Attrition)
head(dat1)
## # A tibble: 6 × 5
## EmployeeNumber Department JobRole Performa…¹ Attri…²
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1 Sales Sales Executive 3 Yes
## 2 2 Research & Development Research Scientist 4 No
## 3 4 Research & Development Laboratory Technician 3 Yes
## 4 5 Research & Development Research Scientist 3 No
## 5 7 Research & Development Laboratory Technician 3 No
## 6 8 Research & Development Laboratory Technician 3 No
## # … with abbreviated variable names ¹PerformanceRating, ²Attrition
Compute the number of attrition and percentage of attrition by Department:
dat2 <- dat1 %>% group_by(Department, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
group_by(Department) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
dat2
## # A tibble: 3 × 4
## Department Attrition count percentage
## <chr> <chr> <int> <dbl>
## 1 Human Resources Yes 12 0.190
## 2 Research & Development Yes 133 0.138
## 3 Sales Yes 92 0.206
Compute the number of attrition and percentage of attrition by JobRole in each Department:
dat3 <- dat1 %>% group_by(Department, JobRole, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
group_by(Department, JobRole) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
dat3
## # A tibble: 10 × 5
## Department JobRole Attrition count percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 Human Resources Human Resources Yes 12 0.231
## 2 Research & Development Healthcare Representative Yes 9 0.0687
## 3 Research & Development Laboratory Technician Yes 62 0.239
## 4 Research & Development Manager Yes 3 0.0556
## 5 Research & Development Manufacturing Director Yes 10 0.0690
## 6 Research & Development Research Director Yes 2 0.025
## 7 Research & Development Research Scientist Yes 47 0.161
## 8 Sales Manager Yes 2 0.0541
## 9 Sales Sales Executive Yes 57 0.175
## 10 Sales Sales Representative Yes 33 0.398
calculate_attrition_cost <- function(
# Employee
n = 1,
salary = 80000,
# Direct Costs
separation_cost = 500,
vacancy_cost = 10000,
acquisition_cost = 4900,
placement_cost = 3500,
# Productivity Costs
net_revenue_per_employee = 250000,
workdays_per_year = 240,
workdays_position_open = 40,
workdays_onboarding = 60,
onboarding_efficiency = 0.50
) {
# Direct Costs
direct_cost <- sum(separation_cost, vacancy_cost, acquisition_cost, placement_cost)
# Lost Productivity Costs
productivity_cost <- net_revenue_per_employee / workdays_per_year *
(workdays_position_open + workdays_onboarding * onboarding_efficiency)
# Savings of Salary & Benefits (Cost Reduction)
salary_benefit_reduction <- salary / workdays_per_year * workdays_position_open
# Estimated Turnover Per Employee
cost_per_employee <- direct_cost + productivity_cost - salary_benefit_reduction
# Total Cost of Employee Turnover
total_cost <- n * cost_per_employee
return(total_cost)
}
calculate_attrition_cost(n=10, salary=100)
## [1] 918000