ATTRITION HOMEWORK 2

1. Import dataset

library(readxl)
data <-  read_excel("~/RStudio/Ribm.txt.xlsx")
head(data)
## # A tibble: 6 × 35
##     Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education
##   <dbl> <chr>     <chr>              <dbl> <chr>                 <dbl>     <dbl>
## 1    41 Yes       Travel_Rarely       1102 Sales                     1         2
## 2    49 No        Travel_Freque…       279 Research …                8         1
## 3    37 Yes       Travel_Rarely       1373 Research …                2         2
## 4    33 No        Travel_Freque…      1392 Research …                3         4
## 5    27 No        Travel_Rarely        591 Research …                2         1
## 6    32 No        Travel_Freque…      1005 Research …                2         2
## # … with 28 more variables: EducationField <chr>, EmployeeCount <dbl>,
## #   EmployeeNumber <dbl>, EnvironmentSatisfaction <dbl>, Gender <chr>,
## #   HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>, JobRole <chr>,
## #   JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>,
## #   MonthlyRate <dbl>, NumCompaniesWorked <dbl>, Over18 <chr>, OverTime <chr>,
## #   PercentSalaryHike <dbl>, PerformanceRating <dbl>,
## #   RelationshipSatisfaction <dbl>, StandardHours <dbl>, …

2. Analysis of attrition

a. By Department

library(dplyr)
dept <- data %>% select(EmployeeNumber, Department, JobRole, 
         PerformanceRating, Attrition)

dept %>% group_by(Department, Attrition) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  group_by(Department) %>%
  mutate(pct = n /sum(n))
## # A tibble: 6 × 4
## # Groups:   Department [3]
##   Department             Attrition     n   pct
##   <chr>                  <chr>     <int> <dbl>
## 1 Human Resources        No           51 0.810
## 2 Human Resources        Yes          12 0.190
## 3 Research & Development No          828 0.862
## 4 Research & Development Yes         133 0.138
## 5 Sales                  No          354 0.794
## 6 Sales                  Yes          92 0.206

b. By Jobrole in each Department

dept %>% group_by(Department, JobRole, Attrition) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  group_by(Department, JobRole) %>%
  mutate(pct = n /sum(n)) 
## # A tibble: 21 × 5
## # Groups:   Department, JobRole [11]
##    Department             JobRole                   Attrition     n    pct
##    <chr>                  <chr>                     <chr>     <int>  <dbl>
##  1 Human Resources        Human Resources           No           40 0.769 
##  2 Human Resources        Human Resources           Yes          12 0.231 
##  3 Human Resources        Manager                   No           11 1     
##  4 Research & Development Healthcare Representative No          122 0.931 
##  5 Research & Development Healthcare Representative Yes           9 0.0687
##  6 Research & Development Laboratory Technician     No          197 0.761 
##  7 Research & Development Laboratory Technician     Yes          62 0.239 
##  8 Research & Development Manager                   No           51 0.944 
##  9 Research & Development Manager                   Yes           3 0.0556
## 10 Research & Development Manufacturing Director    No          135 0.931 
## # … with 11 more rows

3. Calculate the attrition cost

calculate_attrition_cost(n= 1,salary = 80000)
## [1] 78483.33