hw1_SP

# Some key packages 
pkgs00 <- c("devtools", "RCurl")
install.packages(pkgs00)

## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

# 
pkg01 <- c(
         "tidyverse", #For data science 
         "tidyquant", #Financial time series
         "lime",      #Explaining black-box mode
         "glue",      #Pasting text
         "fs",        #File system
         "cowplot",   #Handle multiple ggplot
         "readxl",    #read excel file
         "writexl"    #write excel file
)
install.packages(pkg01)

## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

install.packages("pacman")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

# pacman::p_install(h2o)
install.packages("h2o")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

library(h2o)

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

# h2o set-up 
h2o.no_progress()  # turn off h2o progress bars
h2o.init()         # launch h2o

##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         9 minutes 52 seconds 
##     H2O cluster timezone:       UTC 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.40.0.4 
##     H2O cluster version age:    1 month and 9 days 
##     H2O cluster name:           H2O_started_from_R_r1753109_jos546 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.19 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  1 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.3.0 (2023-04-21)

library(readxl)
library(pacman)
p_load(tidyverse, tidyquant, lime, glue, cowplot, ggplot2,fs,readxl, writexl)

# Create data directory
fs::dir_create("data")

# Import telco_train.xlsx data
path_train <- "telco_train(1).xlsx"
train_raw_tbl <- read_excel(path_train)
class(train_raw_tbl)

## [1] "tbl_df"     "tbl"        "data.frame"

# Analysis of attrition by department
dept_job_role_tbl <- train_raw_tbl %>% 
                     select(EmployeeNumber, Department, JobRole, 
                            PerformanceRating, Attrition)
    
dept_job_role_tbl %>% group_by(Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      mutate(pct = n /sum(n))

## # A tibble: 2 × 3
##   Attrition     n   pct
##   <chr>     <int> <dbl>
## 1 No         1049 0.839
## 2 Yes         201 0.161

# department attrition
dept_job_role_tbl %>% group_by(Department, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      #group_by(Department) %>%
                      mutate(pct = n /sum(n))

## `summarise()` has grouped output by 'Department'. You can override using the
## `.groups` argument.

## # A tibble: 6 × 4
##   Department             Attrition     n    pct
##   <chr>                  <chr>     <int>  <dbl>
## 1 Human Resources        No           37 0.0296
## 2 Human Resources        Yes          12 0.0096
## 3 Research & Development No          721 0.577 
## 4 Research & Development Yes         111 0.0888
## 5 Sales                  No          291 0.233 
## 6 Sales                  Yes          78 0.0624

# JobRole attrition
dept_job_role_tbl %>% group_by(Department, JobRole, Attrition) %>%
                      summarise(n = n()) %>%
                      ungroup() %>%
                      group_by(Department, JobRole) %>%
                      mutate(pct = n /sum(n)) %>%
                      ungroup() %>%
                      filter(Attrition %in% c("Yes"))

## `summarise()` has grouped output by 'Department', 'JobRole'. You can override
## using the `.groups` argument.

## # A tibble: 10 × 5
##    Department             JobRole                   Attrition     n    pct
##    <chr>                  <chr>                     <chr>     <int>  <dbl>
##  1 Human Resources        Human Resources           Yes          12 0.308 
##  2 Research & Development Healthcare Representative Yes           8 0.0762
##  3 Research & Development Laboratory Technician     Yes          49 0.219 
##  4 Research & Development Manager                   Yes           2 0.0417
##  5 Research & Development Manufacturing Director    Yes           7 0.0569
##  6 Research & Development Research Director         Yes           2 0.0274
##  7 Research & Development Research Scientist        Yes          43 0.166 
##  8 Sales                  Manager                   Yes           2 0.0645
##  9 Sales                  Sales Executive           Yes          50 0.183 
## 10 Sales                  Sales Representative      Yes          26 0.4

calculate_attrition_cost <- function(
    
  # Employee
  n                    = 1,
  salary               = 80000,
  
  # Direct Costs
  separation_cost      = 500,
  vacancy_cost         = 10000,
  acquisition_cost     = 4900,
  placement_cost       = 3500,
  
  # Productivity Costs
  net_revenue_per_employee = 250000,
  workdays_per_year        = 240,
  workdays_position_open   = 40,
  workdays_onboarding      = 60,
  onboarding_efficiency    = 0.50
  
) {
  
  # Direct Costs
  direct_cost <- sum(separation_cost, vacancy_cost, acquisition_cost, placement_cost)
  
  # Lost Productivity Costs
  productivity_cost <- net_revenue_per_employee / workdays_per_year * 
    (workdays_position_open + workdays_onboarding * onboarding_efficiency) 
  
  # Savings of Salary & Benefits (Cost Reduction)
  salary_benefit_reduction <- salary / workdays_per_year * workdays_position_open
  
  # Estimated Turnover Per Employee
  cost_per_employee <- direct_cost + productivity_cost - salary_benefit_reduction
  
  # Total Cost of Employee Turnover
  total_cost <- n * cost_per_employee
  
  return(total_cost)
}
calculate_attrition_cost(n=10, salary=100)

## [1] 918000

hw1_SP

Narantuya

2023-06-01