pkgs00 <- c("devtools", "RCurl")
install.packages(pkgs00)
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
pkg01 <- c(
  "tidyverse", #For data science 
  "tidyquant", #Financial time series
  "lime",      #Explaining black-box mode
  "glue",      #Pasting text
  "fs",        #File system
  "cowplot",   #Handle multiple ggplot
  "readxl",    #read excel file
  "writexl"    #write excel file
)
install.packages(pkg01)
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("pacman")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("h2o")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("xlsx")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(h2o)
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
h2o.no_progress()  # turn off h2o progress bars
h2o.init()         # launch h2o
## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /tmp/Rtmplzv4GN/file160664606b7/h2o_r1737463_started_from_r.out
##     /tmp/Rtmplzv4GN/file1601a95d5ab/h2o_r1737463_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: ... Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 412 milliseconds 
##     H2O cluster timezone:       UTC 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.40.0.1 
##     H2O cluster version age:    1 month and 14 days 
##     H2O cluster name:           H2O_started_from_R_r1737463_otf517 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.24 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  1 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.2.3 (2023-03-15)
library(readxl)
library(pacman)
p_load(tidyverse, tidyquant, lime, glue, cowplot, ggplot2,fs,readxl, writexl)
library(xlsx)

library(readr)
library(writexl)

data <- read_csv("telco_train.xlsx")
## Multiple files in zip: reading 'xl/worksheets/sheet1.xml'
## Rows: 1 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(readxl)

# Create data directory
fs::dir_create("data")

# Import telco_train.xlsx data
path_train <- read_excel("telco_train.xlsx")

library("magrittr")
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library("dplyr")
# Analysis of attrition by department
dept_job_role_tbl <- path_train %>% 
                     select(EmployeeNumber, Department, JobRole, 
                            PerformanceRating, Attrition)
    
dept_job_role_tbl %>% group_by(Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      mutate(pct = n /sum(n))
## # A tibble: 2 × 3
##   Attrition     n   pct
##   <chr>     <int> <dbl>
## 1 No         1049 0.839
## 2 Yes         201 0.161
# department attrition
dept_job_role_tbl %>% group_by(Department, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      #group_by(Department) %>%
                      mutate(pct = n /sum(n))
## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
##   Department             Attrition     n    pct
##   <chr>                  <chr>     <int>  <dbl>
## 1 Human Resources        No           37 0.0296
## 2 Human Resources        Yes          12 0.0096
## 3 Research & Development No          721 0.577 
## 4 Research & Development Yes         111 0.0888
## 5 Sales                  No          291 0.233 
## 6 Sales                  Yes          78 0.0624
# JobRole attrition
dept_job_role_tbl %>% group_by(Department, JobRole, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      group_by(Department, JobRole) %>%
                      mutate(pct = n /sum(n)) %>%
                      ungroup() %>%
                      filter(Attrition %in% c("Yes"))
## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.
## # A tibble: 10 × 5
##    Department             JobRole                   Attrition     n    pct
##    <chr>                  <chr>                     <chr>     <int>  <dbl>
##  1 Human Resources        Human Resources           Yes          12 0.308 
##  2 Research & Development Healthcare Representative Yes           8 0.0762
##  3 Research & Development Laboratory Technician     Yes          49 0.219 
##  4 Research & Development Manager                   Yes           2 0.0417
##  5 Research & Development Manufacturing Director    Yes           7 0.0569
##  6 Research & Development Research Director         Yes           2 0.0274
##  7 Research & Development Research Scientist        Yes          43 0.166 
##  8 Sales                  Manager                   Yes           2 0.0645
##  9 Sales                  Sales Executive           Yes          50 0.183 
## 10 Sales                  Sales Representative      Yes          26 0.4
calculate_attrition_cost <- function(
  # Employee
  n                    = 1,
  salary               = 80000,
  
  # Direct Costs
  separation_cost      = 500,
  vacancy_cost         = 10000,
  acquisition_cost     = 4900,
  placement_cost       = 3500,
  
  # Productivity Costs
  net_revenue_per_employee = 250000,
  workdays_per_year        = 240,
  workdays_position_open   = 40,
  workdays_onboarding      = 60,
  onboarding_efficiency    = 0.50
  
) {
  
  # Direct Costs
  direct_cost <- sum(separation_cost, vacancy_cost, acquisition_cost, placement_cost)
  
  # Lost Productivity Costs
  productivity_cost <- net_revenue_per_employee / workdays_per_year * 
    (workdays_position_open + workdays_onboarding * onboarding_efficiency) 
  
  # Savings of Salary & Benefits (Cost Reduction)
  salary_benefit_reduction <- salary / workdays_per_year * workdays_position_open
  
  # Estimated Turnover Per Employee
  cost_per_employee <- direct_cost + productivity_cost - salary_benefit_reduction
  
  # Total Cost of Employee Turnover
  total_cost <- n * cost_per_employee
  
  return(total_cost)
  
}