1. Import dataset
library(readxl)
data <- read_excel("~/RStudio/Ribm.txt.xlsx")
head(data)
## # A tibble: 6 × 35
## Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 41 Yes Travel_Rarely 1102 Sales 1 2
## 2 49 No Travel_Freque… 279 Research … 8 1
## 3 37 Yes Travel_Rarely 1373 Research … 2 2
## 4 33 No Travel_Freque… 1392 Research … 3 4
## 5 27 No Travel_Rarely 591 Research … 2 1
## 6 32 No Travel_Freque… 1005 Research … 2 2
## # … with 28 more variables: EducationField <chr>, EmployeeCount <dbl>,
## # EmployeeNumber <dbl>, EnvironmentSatisfaction <dbl>, Gender <chr>,
## # HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>, JobRole <chr>,
## # JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>,
## # MonthlyRate <dbl>, NumCompaniesWorked <dbl>, Over18 <chr>, OverTime <chr>,
## # PercentSalaryHike <dbl>, PerformanceRating <dbl>,
## # RelationshipSatisfaction <dbl>, StandardHours <dbl>, …
2. Analysis of attrition
a. By Department
library(dplyr)
dept <- data %>% select(EmployeeNumber, Department, JobRole,
PerformanceRating, Attrition)
dept %>% group_by(Department, Attrition) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(Department) %>%
mutate(pct = n /sum(n))
## # A tibble: 6 × 4
## # Groups: Department [3]
## Department Attrition n pct
## <chr> <chr> <int> <dbl>
## 1 Human Resources No 51 0.810
## 2 Human Resources Yes 12 0.190
## 3 Research & Development No 828 0.862
## 4 Research & Development Yes 133 0.138
## 5 Sales No 354 0.794
## 6 Sales Yes 92 0.206
b. By Jobrole in each Department
dept %>% group_by(Department, JobRole, Attrition) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(Department, JobRole) %>%
mutate(pct = n /sum(n))
## # A tibble: 21 × 5
## # Groups: Department, JobRole [11]
## Department JobRole Attrition n pct
## <chr> <chr> <chr> <int> <dbl>
## 1 Human Resources Human Resources No 40 0.769
## 2 Human Resources Human Resources Yes 12 0.231
## 3 Human Resources Manager No 11 1
## 4 Research & Development Healthcare Representative No 122 0.931
## 5 Research & Development Healthcare Representative Yes 9 0.0687
## 6 Research & Development Laboratory Technician No 197 0.761
## 7 Research & Development Laboratory Technician Yes 62 0.239
## 8 Research & Development Manager No 51 0.944
## 9 Research & Development Manager Yes 3 0.0556
## 10 Research & Development Manufacturing Director No 135 0.931
## # … with 11 more rows