#pkgs00 <- c("devtools", "RCurl")
#install.packages(pkgs00)

#pkg01 <- c(
         #"tidyverse", #For data science
         #"tidyquant", #Financial time series
         #"lime",      #Explaining black-box mode
         #"glue",      #Pasting text
         #"cowplot",   #Handle multiple ggplot
         #"readxl",    #Read excel file
         #"writexl"   #Write excel file
#)
#install.packages(pkg01)
#install.packages("pacman")

library(readxl)
library(pacman)
p_load(tidyverse, tidyquant, lime, glue, cowplot, readxl, writexl)

#create data directory
#install.packages("fs")
library(fs)
fs::dir_create("data")

#import telco_train.xlsx data
#install.packages("readxl")
library(readxl)
path_train <- "~/Application_ML_2023/data/IBMdataset.csv"
train_raw_tbl <- read.csv(path_train)
class(train_raw_tbl)
## [1] "data.frame"
library("magrittr")
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library("dplyr")
dept_job_role_tbl <- train_raw_tbl %>%
                     select(EmployeeNumber, Department, JobRole, PerformanceRating, Attrition)

dept_job_role_tbl %>% group_by(Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      mutate(pct = n / sum(n))                      
## # A tibble: 2 × 3
##   Attrition     n   pct
##   <chr>     <int> <dbl>
## 1 No         1233 0.839
## 2 Yes         237 0.161
#department attrition
dept_job_role_tbl %>% group_by(Department, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      group_by(Department) %>%
                      mutate(pct = n / sum(n))   
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups:   Department [3]
##   Department             Attrition     n   pct
##   <chr>                  <chr>     <int> <dbl>
## 1 Human Resources        No           51 0.810
## 2 Human Resources        Yes          12 0.190
## 3 Research & Development No          828 0.862
## 4 Research & Development Yes         133 0.138
## 5 Sales                  No          354 0.794
## 6 Sales                  Yes          92 0.206
#JobRole attrition
dept_job_role_tbl %>% group_by(Department, JobRole, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      group_by(Department, JobRole) %>%
                      mutate(pct = n / sum(n)) %>%
                      print(n=21)
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
## # A tibble: 21 × 5
## # Groups:   Department, JobRole [11]
##    Department             JobRole                   Attrition     n    pct
##    <chr>                  <chr>                     <chr>     <int>  <dbl>
##  1 Human Resources        Human Resources           No           40 0.769 
##  2 Human Resources        Human Resources           Yes          12 0.231 
##  3 Human Resources        Manager                   No           11 1     
##  4 Research & Development Healthcare Representative No          122 0.931 
##  5 Research & Development Healthcare Representative Yes           9 0.0687
##  6 Research & Development Laboratory Technician     No          197 0.761 
##  7 Research & Development Laboratory Technician     Yes          62 0.239 
##  8 Research & Development Manager                   No           51 0.944 
##  9 Research & Development Manager                   Yes           3 0.0556
## 10 Research & Development Manufacturing Director    No          135 0.931 
## 11 Research & Development Manufacturing Director    Yes          10 0.0690
## 12 Research & Development Research Director         No           78 0.975 
## 13 Research & Development Research Director         Yes           2 0.025 
## 14 Research & Development Research Scientist        No          245 0.839 
## 15 Research & Development Research Scientist        Yes          47 0.161 
## 16 Sales                  Manager                   No           35 0.946 
## 17 Sales                  Manager                   Yes           2 0.0541
## 18 Sales                  Sales Executive           No          269 0.825 
## 19 Sales                  Sales Executive           Yes          57 0.175 
## 20 Sales                  Sales Representative      No           50 0.602 
## 21 Sales                  Sales Representative      Yes          33 0.398

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.