Homeowork 1
Instaling some key packages
library(pacman)
p_load(readxl, writexl)
library(readxl)
Importing datasheet
telcodata <- read_excel("saran/telcodata.xlsx")
head(telcodata)
## # A tibble: 6 × 35
## Age Attrit…¹ Busin…² Daily…³ Depar…⁴ Dista…⁵ Educa…⁶ Educa…⁷ Emplo…⁸ Emplo…⁹
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 41 Yes Travel… 1102 Sales 1 2 Life S… 1 1
## 2 49 No Travel… 279 Resear… 8 1 Life S… 1 2
## 3 37 Yes Travel… 1373 Resear… 2 2 Other 1 4
## 4 33 No Travel… 1392 Resear… 3 4 Life S… 1 5
## 5 27 No Travel… 591 Resear… 2 1 Medical 1 7
## 6 32 No Travel… 1005 Resear… 2 2 Life S… 1 8
## # … with 25 more variables: EnvironmentSatisfaction <dbl>, Gender <chr>,
## # HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>, JobRole <chr>,
## # JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>,
## # MonthlyRate <dbl>, NumCompaniesWorked <dbl>, Over18 <chr>, OverTime <chr>,
## # PercentSalaryHike <dbl>, PerformanceRating <dbl>,
## # RelationshipSatisfaction <dbl>, StandardHours <dbl>,
## # StockOptionLevel <dbl>, TotalWorkingYears <dbl>, …
A tibble: 6 × 35
Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
<dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
1 41 Yes Travel_Rarely 1102 Sales 1 2 Life Sciences 1 1
2 49 No Travel_Frequently 279 Research & Develop… 8 1 Life Sciences 1 2
3 37 Yes Travel_Rarely 1373 Research & Develop… 2 2 Other 1 4
4 33 No Travel_Frequently 1392 Research & Develop… 3 4 Life Sciences 1 5
5 27 No Travel_Rarely 591 Research & Develop… 2 1 Medical 1 7
6 32 No Travel_Frequently 1005 Research & Develop… 2 2 Life Sciences 1 8
# … with 25 more variables: EnvironmentSatisfaction <dbl>, Gender <chr>, HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>,
# JobRole <chr>, JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>, MonthlyRate <dbl>, NumCompaniesWorked <dbl>,
# Over18 <chr>, OverTime <chr>, PercentSalaryHike <dbl>, PerformanceRating <dbl>, RelationshipSatisfaction <dbl>,
# StandardHours <dbl>, StockOptionLevel <dbl>, TotalWorkingYears <dbl>, TrainingTimesLastYear <dbl>, WorkLifeBalance <dbl>,
# YearsAtCompany <dbl>, YearsInCurrentRole <dbl>, YearsSinceLastPromotion <dbl>, YearsWithCurrManager <dbl>
Selection
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
telcodata.2 <- telcodata
telcodata.2 <- select(telcodata, EmployeeNumber, Department, JobRole, PerformanceRating, Attrition)
head(telcodata.2)
## # A tibble: 6 × 5
## EmployeeNumber Department JobRole Performa…¹ Attri…²
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1 Sales Sales Executive 3 Yes
## 2 2 Research & Development Research Scientist 4 No
## 3 4 Research & Development Laboratory Technician 3 Yes
## 4 5 Research & Development Research Scientist 3 No
## 5 7 Research & Development Laboratory Technician 3 No
## 6 8 Research & Development Laboratory Technician 3 No
## # … with abbreviated variable names ¹PerformanceRating, ²Attrition
# A tibble: 6 × 5
EmployeeNumber Department JobRole PerformanceRating Attrition
<dbl> <chr> <chr> <dbl> <chr>
1 1 Sales Sales Executive 3 Yes
2 2 Research & Development Research Scientist 4 No
3 4 Research & Development Laboratory Technician 3 Yes
4 5 Research & Development Research Scientist 3 No
5 7 Research & Development Laboratory Technician 3 No
6 8 Research & Development Laboratory Technician 3 No
Question 2.1
telcodata.3 <- telcodata.2 %>% group_by(Department, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
group_by(Department) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
head(telcodata.3)
## # A tibble: 3 × 4
## Department Attrition count percentage
## <chr> <chr> <int> <dbl>
## 1 Human Resources Yes 12 0.245
## 2 Research & Development Yes 111 0.133
## 3 Sales Yes 78 0.211
# A tibble: 3 × 4
Department Attrition count percentage
<chr> <chr> <int> <dbl>
1 Human Resources Yes 12 0.245
2 Research & Development Yes 111 0.133
3 Sales Yes 78 0.211
Question 2.2
telcodata.4 <- telcodata.2 %>% group_by(Department, JobRole, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
group_by(Department, JobRole) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
head(telcodata.4,10,5)
## # A tibble: 10 × 5
## Department JobRole Attrition count percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 Human Resources Human Resources Yes 12 0.308
## 2 Research & Development Healthcare Representative Yes 8 0.0762
## 3 Research & Development Laboratory Technician Yes 49 0.219
## 4 Research & Development Manager Yes 2 0.0417
## 5 Research & Development Manufacturing Director Yes 7 0.0569
## 6 Research & Development Research Director Yes 2 0.0274
## 7 Research & Development Research Scientist Yes 43 0.166
## 8 Sales Manager Yes 2 0.0645
## 9 Sales Sales Executive Yes 50 0.183
## 10 Sales Sales Representative Yes 26 0.4
# A tibble: 10 × 5
Department JobRole Attrition count percentage
<chr> <chr> <chr> <int> <dbl>
1 Human Resources Human Resources Yes 12 0.308
2 Research & Development Healthcare Representative Yes 8 0.0762
3 Research & Development Laboratory Technician Yes 49 0.219
4 Research & Development Manager Yes 2 0.0417
5 Research & Development Manufacturing Director Yes 7 0.0569
6 Research & Development Research Director Yes 2 0.0274
7 Research & Development Research Scientist Yes 43 0.166
8 Sales Manager Yes 2 0.0645
9 Sales Sales Executive Yes 50 0.183
10 Sales Sales Representative Yes 26 0.4