Homeowork 1

Instaling some key packages

library(pacman)
p_load(readxl, writexl)
library(readxl)

Importing datasheet

telcodata <- read_excel("saran/telcodata.xlsx")
head(telcodata)
## # A tibble: 6 × 35
##     Age Attrit…¹ Busin…² Daily…³ Depar…⁴ Dista…⁵ Educa…⁶ Educa…⁷ Emplo…⁸ Emplo…⁹
##   <dbl> <chr>    <chr>     <dbl> <chr>     <dbl>   <dbl> <chr>     <dbl>   <dbl>
## 1    41 Yes      Travel…    1102 Sales         1       2 Life S…       1       1
## 2    49 No       Travel…     279 Resear…       8       1 Life S…       1       2
## 3    37 Yes      Travel…    1373 Resear…       2       2 Other         1       4
## 4    33 No       Travel…    1392 Resear…       3       4 Life S…       1       5
## 5    27 No       Travel…     591 Resear…       2       1 Medical       1       7
## 6    32 No       Travel…    1005 Resear…       2       2 Life S…       1       8
## # … with 25 more variables: EnvironmentSatisfaction <dbl>, Gender <chr>,
## #   HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>, JobRole <chr>,
## #   JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>,
## #   MonthlyRate <dbl>, NumCompaniesWorked <dbl>, Over18 <chr>, OverTime <chr>,
## #   PercentSalaryHike <dbl>, PerformanceRating <dbl>,
## #   RelationshipSatisfaction <dbl>, StandardHours <dbl>,
## #   StockOptionLevel <dbl>, TotalWorkingYears <dbl>, …
A tibble: 6 × 35
    Age Attrition BusinessTravel    DailyRate Department          DistanceFromHome Education EducationField EmployeeCount EmployeeNumber
  <dbl> <chr>     <chr>                 <dbl> <chr>                          <dbl>     <dbl> <chr>                  <dbl>          <dbl>
1    41 Yes       Travel_Rarely          1102 Sales                              1         2 Life Sciences              1              1
2    49 No        Travel_Frequently       279 Research & Develop…                8         1 Life Sciences              1              2
3    37 Yes       Travel_Rarely          1373 Research & Develop…                2         2 Other                      1              4
4    33 No        Travel_Frequently      1392 Research & Develop…                3         4 Life Sciences              1              5
5    27 No        Travel_Rarely           591 Research & Develop…                2         1 Medical                    1              7
6    32 No        Travel_Frequently      1005 Research & Develop…                2         2 Life Sciences              1              8
# … with 25 more variables: EnvironmentSatisfaction <dbl>, Gender <chr>, HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>,
#   JobRole <chr>, JobSatisfaction <dbl>, MaritalStatus <chr>, MonthlyIncome <dbl>, MonthlyRate <dbl>, NumCompaniesWorked <dbl>,
#   Over18 <chr>, OverTime <chr>, PercentSalaryHike <dbl>, PerformanceRating <dbl>, RelationshipSatisfaction <dbl>,
#   StandardHours <dbl>, StockOptionLevel <dbl>, TotalWorkingYears <dbl>, TrainingTimesLastYear <dbl>, WorkLifeBalance <dbl>,
#   YearsAtCompany <dbl>, YearsInCurrentRole <dbl>, YearsSinceLastPromotion <dbl>, YearsWithCurrManager <dbl>     

Selection

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
telcodata.2 <- telcodata
telcodata.2 <- select(telcodata, EmployeeNumber, Department, JobRole, PerformanceRating, Attrition)
head(telcodata.2)
## # A tibble: 6 × 5
##   EmployeeNumber Department             JobRole               Performa…¹ Attri…²
##            <dbl> <chr>                  <chr>                      <dbl> <chr>  
## 1              1 Sales                  Sales Executive                3 Yes    
## 2              2 Research & Development Research Scientist             4 No     
## 3              4 Research & Development Laboratory Technician          3 Yes    
## 4              5 Research & Development Research Scientist             3 No     
## 5              7 Research & Development Laboratory Technician          3 No     
## 6              8 Research & Development Laboratory Technician          3 No     
## # … with abbreviated variable names ¹​PerformanceRating, ²​Attrition
# A tibble: 6 × 5
  EmployeeNumber Department             JobRole               PerformanceRating Attrition
           <dbl> <chr>                  <chr>                             <dbl> <chr>    
1              1 Sales                  Sales Executive                       3 Yes      
2              2 Research & Development Research Scientist                    4 No       
3              4 Research & Development Laboratory Technician                 3 Yes      
4              5 Research & Development Research Scientist                    3 No       
5              7 Research & Development Laboratory Technician                 3 No       
6              8 Research & Development Laboratory Technician                 3 No       

Question 2.1

telcodata.3 <- telcodata.2 %>% group_by(Department, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
  group_by(Department) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
head(telcodata.3)
## # A tibble: 3 × 4
##   Department             Attrition count percentage
##   <chr>                  <chr>     <int>      <dbl>
## 1 Human Resources        Yes          12      0.245
## 2 Research & Development Yes         111      0.133
## 3 Sales                  Yes          78      0.211
# A tibble: 3 × 4
  Department             Attrition count percentage
  <chr>                  <chr>     <int>      <dbl>
1 Human Resources        Yes          12      0.245
2 Research & Development Yes         111      0.133
3 Sales                  Yes          78      0.211
 

Question 2.2

telcodata.4 <- telcodata.2 %>% group_by(Department, JobRole, Attrition) %>% summarise(count= n()) %>% ungroup() %>%
  group_by(Department, JobRole) %>% mutate(percentage= count/sum(count)) %>% ungroup() %>% filter(Attrition=="Yes")
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
head(telcodata.4,10,5)
## # A tibble: 10 × 5
##    Department             JobRole                   Attrition count percentage
##    <chr>                  <chr>                     <chr>     <int>      <dbl>
##  1 Human Resources        Human Resources           Yes          12     0.308 
##  2 Research & Development Healthcare Representative Yes           8     0.0762
##  3 Research & Development Laboratory Technician     Yes          49     0.219 
##  4 Research & Development Manager                   Yes           2     0.0417
##  5 Research & Development Manufacturing Director    Yes           7     0.0569
##  6 Research & Development Research Director         Yes           2     0.0274
##  7 Research & Development Research Scientist        Yes          43     0.166 
##  8 Sales                  Manager                   Yes           2     0.0645
##  9 Sales                  Sales Executive           Yes          50     0.183 
## 10 Sales                  Sales Representative      Yes          26     0.4
# A tibble: 10 × 5
   Department             JobRole                   Attrition count percentage
   <chr>                  <chr>                     <chr>     <int>      <dbl>
 1 Human Resources        Human Resources           Yes          12     0.308 
 2 Research & Development Healthcare Representative Yes           8     0.0762
 3 Research & Development Laboratory Technician     Yes          49     0.219 
 4 Research & Development Manager                   Yes           2     0.0417
 5 Research & Development Manufacturing Director    Yes           7     0.0569
 6 Research & Development Research Director         Yes           2     0.0274
 7 Research & Development Research Scientist        Yes          43     0.166 
 8 Sales                  Manager                   Yes           2     0.0645
 9 Sales                  Sales Executive           Yes          50     0.183 
10 Sales                  Sales Representative      Yes          26     0.4