Input Data

data <- read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat1 <- data %>% select(EmployeeNumber, Department, JobRole, PerformanceRating, Attrition)
head(dat1)
## # A tibble: 6 × 5
##   EmployeeNumber Department             JobRole               Performa…¹ Attri…²
##            <dbl> <chr>                  <chr>                      <dbl> <chr>  
## 1              1 Sales                  Sales Executive                3 Yes    
## 2              2 Research & Development Research Scientist             4 No     
## 3              4 Research & Development Laboratory Technician          3 Yes    
## 4              5 Research & Development Research Scientist             3 No     
## 5              7 Research & Development Laboratory Technician          3 No     
## 6              8 Research & Development Laboratory Technician          3 No     
## # … with abbreviated variable names ¹​PerformanceRating, ²​Attrition

Question 2.1

Compute the number of attrition and percentage of attrition by Department:

dat2 <- dat1 %>% group_by(Department, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
  group_by(Department) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
dat2
## # A tibble: 3 × 4
##   Department             Attrition count percentage
##   <chr>                  <chr>     <int>      <dbl>
## 1 Human Resources        Yes          12      0.190
## 2 Research & Development Yes         133      0.138
## 3 Sales                  Yes          92      0.206

Question 2.2

Compute the number of attrition and percentage of attrition by JobRole in each Department:

dat3 <- dat1 %>% group_by(Department, JobRole, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
  group_by(Department, JobRole) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
dat3
## # A tibble: 10 × 5
##    Department             JobRole                   Attrition count percentage
##    <chr>                  <chr>                     <chr>     <int>      <dbl>
##  1 Human Resources        Human Resources           Yes          12     0.231 
##  2 Research & Development Healthcare Representative Yes           9     0.0687
##  3 Research & Development Laboratory Technician     Yes          62     0.239 
##  4 Research & Development Manager                   Yes           3     0.0556
##  5 Research & Development Manufacturing Director    Yes          10     0.0690
##  6 Research & Development Research Director         Yes           2     0.025 
##  7 Research & Development Research Scientist        Yes          47     0.161 
##  8 Sales                  Manager                   Yes           2     0.0541
##  9 Sales                  Sales Executive           Yes          57     0.175 
## 10 Sales                  Sales Representative      Yes          33     0.398

HW 2

calculate_attrition_cost <- function(
    
  # Employee
  n                    = 1,
  salary               = 80000,
  
  # Direct Costs
  separation_cost      = 500,
  vacancy_cost         = 10000,
  acquisition_cost     = 4900,
  placement_cost       = 3500,
  
  # Productivity Costs
  net_revenue_per_employee = 250000,
  workdays_per_year        = 240,
  workdays_position_open   = 40,
  workdays_onboarding      = 60,
  onboarding_efficiency    = 0.50
  
) {
  
  # Direct Costs
  direct_cost <- sum(separation_cost, vacancy_cost, acquisition_cost, placement_cost)
  
  # Lost Productivity Costs
  productivity_cost <- net_revenue_per_employee / workdays_per_year * 
    (workdays_position_open + workdays_onboarding * onboarding_efficiency) 
  
  # Savings of Salary & Benefits (Cost Reduction)
  salary_benefit_reduction <- salary / workdays_per_year * workdays_position_open
  
  # Estimated Turnover Per Employee
  cost_per_employee <- direct_cost + productivity_cost - salary_benefit_reduction
  
  # Total Cost of Employee Turnover
  total_cost <- n * cost_per_employee
  
  return(total_cost)
}
calculate_attrition_cost(n=10, salary=100)
## [1] 918000