This RMarkdown-file documents present all the data cleaning and transformation process in R for the case study.


Installing packages and loading libraries

#Loading libraries
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(here)
## here() starts at /Users/florence/Downloads
library(skimr)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(readr)
library(tibble)
library(ggpubr)
library(corrplot)
## corrplot 0.92 loaded


Importing and inspecting dataset

employee_survey_data <- read_csv(file="~/Downloads/HR Analytics Case Study/employee_survey_data.csv")
## Rows: 4410 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): EmployeeID, EnvironmentSatisfaction, JobSatisfaction, WorkLifeBalance
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(employee_survey_data)
## # A tibble: 6 × 4
##   EmployeeID EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
##        <dbl>                   <dbl>           <dbl>           <dbl>
## 1          1                       3               4               2
## 2          2                       3               2               4
## 3          3                       2               2               1
## 4          4                       4               4               3
## 5          5                       4               1               3
## 6          6                       3               2               2
colnames(employee_survey_data)
## [1] "EmployeeID"              "EnvironmentSatisfaction"
## [3] "JobSatisfaction"         "WorkLifeBalance"
general_data <- read_csv(file = "~/Downloads/HR Analytics Case Study/general_data.csv")
## Rows: 4410 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (16): Age, DistanceFromHome, Education, EmployeeCount, EmployeeID, JobLe...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(general_data)
## # A tibble: 6 × 24
##     Age Attrition BusinessTravel    Department        DistanceFromHome Education
##   <dbl> <chr>     <chr>             <chr>                        <dbl>     <dbl>
## 1    51 No        Travel_Rarely     Sales                            6         2
## 2    31 Yes       Travel_Frequently Research & Devel…               10         1
## 3    32 No        Travel_Frequently Research & Devel…               17         4
## 4    38 No        Non-Travel        Research & Devel…                2         5
## 5    32 No        Travel_Rarely     Research & Devel…               10         1
## 6    46 No        Travel_Rarely     Research & Devel…                8         3
## # … with 18 more variables: EducationField <chr>, EmployeeCount <dbl>,
## #   EmployeeID <dbl>, Gender <chr>, JobLevel <dbl>, JobRole <chr>,
## #   MaritalStatus <chr>, MonthlyIncome <dbl>, NumCompaniesWorked <dbl>,
## #   Over18 <chr>, PercentSalaryHike <dbl>, StandardHours <dbl>,
## #   StockOptionLevel <dbl>, TotalWorkingYears <dbl>,
## #   TrainingTimesLastYear <dbl>, YearsAtCompany <dbl>,
## #   YearsSinceLastPromotion <dbl>, YearsWithCurrManager <dbl>
colnames(general_data)
##  [1] "Age"                     "Attrition"              
##  [3] "BusinessTravel"          "Department"             
##  [5] "DistanceFromHome"        "Education"              
##  [7] "EducationField"          "EmployeeCount"          
##  [9] "EmployeeID"              "Gender"                 
## [11] "JobLevel"                "JobRole"                
## [13] "MaritalStatus"           "MonthlyIncome"          
## [15] "NumCompaniesWorked"      "Over18"                 
## [17] "PercentSalaryHike"       "StandardHours"          
## [19] "StockOptionLevel"        "TotalWorkingYears"      
## [21] "TrainingTimesLastYear"   "YearsAtCompany"         
## [23] "YearsSinceLastPromotion" "YearsWithCurrManager"
manager_survey_data <- read_csv(file = "~/Downloads/HR Analytics Case Study/manager_survey_data.csv")
## Rows: 4410 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): EmployeeID, JobInvolvement, PerformanceRating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(manager_survey_data)
## # A tibble: 6 × 3
##   EmployeeID JobInvolvement PerformanceRating
##        <dbl>          <dbl>             <dbl>
## 1          1              3                 3
## 2          2              2                 4
## 3          3              3                 3
## 4          4              2                 3
## 5          5              3                 3
## 6          6              3                 3
colnames(manager_survey_data)
## [1] "EmployeeID"        "JobInvolvement"    "PerformanceRating"
in_time <-read_csv(file = "~/Downloads/HR Analytics Case Study/in_time.csv")
## New names:
## * `` -> ...1
## Rows: 4410 Columns: 262── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl    (1): ...1
## lgl   (12): 2015-01-01, 2015-01-14, 2015-01-26, 2015-03-05, 2015-05-01, 2015...
## dttm (249): 2015-01-02, 2015-01-05, 2015-01-06, 2015-01-07, 2015-01-08, 2015...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(in_time)
## # A tibble: 6 × 262
##    ...1 `2015-01-01` `2015-01-02`        `2015-01-05`        `2015-01-06`       
##   <dbl> <lgl>        <dttm>              <dttm>              <dttm>             
## 1     1 NA           2015-01-02 09:43:45 2015-01-05 10:08:48 2015-01-06 09:54:26
## 2     2 NA           2015-01-02 10:15:44 2015-01-05 10:21:05 NA                 
## 3     3 NA           2015-01-02 10:17:41 2015-01-05 09:50:50 2015-01-06 10:14:13
## 4     4 NA           2015-01-02 10:05:06 2015-01-05 09:56:32 2015-01-06 10:11:07
## 5     5 NA           2015-01-02 10:28:17 2015-01-05 09:49:58 2015-01-06 09:45:28
## 6     6 NA           2015-01-02 09:43:08 2015-01-05 10:14:00 2015-01-06 10:08:42
## # … with 257 more variables: `2015-01-07` <dttm>, `2015-01-08` <dttm>,
## #   `2015-01-09` <dttm>, `2015-01-12` <dttm>, `2015-01-13` <dttm>,
## #   `2015-01-14` <lgl>, `2015-01-15` <dttm>, `2015-01-16` <dttm>,
## #   `2015-01-19` <dttm>, `2015-01-20` <dttm>, `2015-01-21` <dttm>,
## #   `2015-01-22` <dttm>, `2015-01-23` <dttm>, `2015-01-26` <lgl>,
## #   `2015-01-27` <dttm>, `2015-01-28` <dttm>, `2015-01-29` <dttm>,
## #   `2015-01-30` <dttm>, `2015-02-02` <dttm>, `2015-02-03` <dttm>, …
colnames(in_time)
##   [1] "...1"       "2015-01-01" "2015-01-02" "2015-01-05" "2015-01-06"
##   [6] "2015-01-07" "2015-01-08" "2015-01-09" "2015-01-12" "2015-01-13"
##  [11] "2015-01-14" "2015-01-15" "2015-01-16" "2015-01-19" "2015-01-20"
##  [16] "2015-01-21" "2015-01-22" "2015-01-23" "2015-01-26" "2015-01-27"
##  [21] "2015-01-28" "2015-01-29" "2015-01-30" "2015-02-02" "2015-02-03"
##  [26] "2015-02-04" "2015-02-05" "2015-02-06" "2015-02-09" "2015-02-10"
##  [31] "2015-02-11" "2015-02-12" "2015-02-13" "2015-02-16" "2015-02-17"
##  [36] "2015-02-18" "2015-02-19" "2015-02-20" "2015-02-23" "2015-02-24"
##  [41] "2015-02-25" "2015-02-26" "2015-02-27" "2015-03-02" "2015-03-03"
##  [46] "2015-03-04" "2015-03-05" "2015-03-06" "2015-03-09" "2015-03-10"
##  [51] "2015-03-11" "2015-03-12" "2015-03-13" "2015-03-16" "2015-03-17"
##  [56] "2015-03-18" "2015-03-19" "2015-03-20" "2015-03-23" "2015-03-24"
##  [61] "2015-03-25" "2015-03-26" "2015-03-27" "2015-03-30" "2015-03-31"
##  [66] "2015-04-01" "2015-04-02" "2015-04-03" "2015-04-06" "2015-04-07"
##  [71] "2015-04-08" "2015-04-09" "2015-04-10" "2015-04-13" "2015-04-14"
##  [76] "2015-04-15" "2015-04-16" "2015-04-17" "2015-04-20" "2015-04-21"
##  [81] "2015-04-22" "2015-04-23" "2015-04-24" "2015-04-27" "2015-04-28"
##  [86] "2015-04-29" "2015-04-30" "2015-05-01" "2015-05-04" "2015-05-05"
##  [91] "2015-05-06" "2015-05-07" "2015-05-08" "2015-05-11" "2015-05-12"
##  [96] "2015-05-13" "2015-05-14" "2015-05-15" "2015-05-18" "2015-05-19"
## [101] "2015-05-20" "2015-05-21" "2015-05-22" "2015-05-25" "2015-05-26"
## [106] "2015-05-27" "2015-05-28" "2015-05-29" "2015-06-01" "2015-06-02"
## [111] "2015-06-03" "2015-06-04" "2015-06-05" "2015-06-08" "2015-06-09"
## [116] "2015-06-10" "2015-06-11" "2015-06-12" "2015-06-15" "2015-06-16"
## [121] "2015-06-17" "2015-06-18" "2015-06-19" "2015-06-22" "2015-06-23"
## [126] "2015-06-24" "2015-06-25" "2015-06-26" "2015-06-29" "2015-06-30"
## [131] "2015-07-01" "2015-07-02" "2015-07-03" "2015-07-06" "2015-07-07"
## [136] "2015-07-08" "2015-07-09" "2015-07-10" "2015-07-13" "2015-07-14"
## [141] "2015-07-15" "2015-07-16" "2015-07-17" "2015-07-20" "2015-07-21"
## [146] "2015-07-22" "2015-07-23" "2015-07-24" "2015-07-27" "2015-07-28"
## [151] "2015-07-29" "2015-07-30" "2015-07-31" "2015-08-03" "2015-08-04"
## [156] "2015-08-05" "2015-08-06" "2015-08-07" "2015-08-10" "2015-08-11"
## [161] "2015-08-12" "2015-08-13" "2015-08-14" "2015-08-17" "2015-08-18"
## [166] "2015-08-19" "2015-08-20" "2015-08-21" "2015-08-24" "2015-08-25"
## [171] "2015-08-26" "2015-08-27" "2015-08-28" "2015-08-31" "2015-09-01"
## [176] "2015-09-02" "2015-09-03" "2015-09-04" "2015-09-07" "2015-09-08"
## [181] "2015-09-09" "2015-09-10" "2015-09-11" "2015-09-14" "2015-09-15"
## [186] "2015-09-16" "2015-09-17" "2015-09-18" "2015-09-21" "2015-09-22"
## [191] "2015-09-23" "2015-09-24" "2015-09-25" "2015-09-28" "2015-09-29"
## [196] "2015-09-30" "2015-10-01" "2015-10-02" "2015-10-05" "2015-10-06"
## [201] "2015-10-07" "2015-10-08" "2015-10-09" "2015-10-12" "2015-10-13"
## [206] "2015-10-14" "2015-10-15" "2015-10-16" "2015-10-19" "2015-10-20"
## [211] "2015-10-21" "2015-10-22" "2015-10-23" "2015-10-26" "2015-10-27"
## [216] "2015-10-28" "2015-10-29" "2015-10-30" "2015-11-02" "2015-11-03"
## [221] "2015-11-04" "2015-11-05" "2015-11-06" "2015-11-09" "2015-11-10"
## [226] "2015-11-11" "2015-11-12" "2015-11-13" "2015-11-16" "2015-11-17"
## [231] "2015-11-18" "2015-11-19" "2015-11-20" "2015-11-23" "2015-11-24"
## [236] "2015-11-25" "2015-11-26" "2015-11-27" "2015-11-30" "2015-12-01"
## [241] "2015-12-02" "2015-12-03" "2015-12-04" "2015-12-07" "2015-12-08"
## [246] "2015-12-09" "2015-12-10" "2015-12-11" "2015-12-14" "2015-12-15"
## [251] "2015-12-16" "2015-12-17" "2015-12-18" "2015-12-21" "2015-12-22"
## [256] "2015-12-23" "2015-12-24" "2015-12-25" "2015-12-28" "2015-12-29"
## [261] "2015-12-30" "2015-12-31"
out_time <- read_csv(file = "~/Downloads/HR Analytics Case Study/out_time.csv")
## New names:
## * `` -> ...1
## Rows: 4410 Columns: 262── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl    (1): ...1
## lgl   (12): 2015-01-01, 2015-01-14, 2015-01-26, 2015-03-05, 2015-05-01, 2015...
## dttm (249): 2015-01-02, 2015-01-05, 2015-01-06, 2015-01-07, 2015-01-08, 2015...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(out_time)
## # A tibble: 6 × 262
##    ...1 `2015-01-01` `2015-01-02`        `2015-01-05`        `2015-01-06`       
##   <dbl> <lgl>        <dttm>              <dttm>              <dttm>             
## 1     1 NA           2015-01-02 16:56:15 2015-01-05 17:20:11 2015-01-06 17:19:05
## 2     2 NA           2015-01-02 18:22:17 2015-01-05 17:48:22 NA                 
## 3     3 NA           2015-01-02 16:59:14 2015-01-05 17:06:46 2015-01-06 16:38:32
## 4     4 NA           2015-01-02 17:25:24 2015-01-05 17:14:03 2015-01-06 17:07:42
## 5     5 NA           2015-01-02 18:31:37 2015-01-05 17:49:15 2015-01-06 17:26:25
## 6     6 NA           2015-01-02 20:29:54 2015-01-05 20:57:19 2015-01-06 21:06:31
## # … with 257 more variables: `2015-01-07` <dttm>, `2015-01-08` <dttm>,
## #   `2015-01-09` <dttm>, `2015-01-12` <dttm>, `2015-01-13` <dttm>,
## #   `2015-01-14` <lgl>, `2015-01-15` <dttm>, `2015-01-16` <dttm>,
## #   `2015-01-19` <dttm>, `2015-01-20` <dttm>, `2015-01-21` <dttm>,
## #   `2015-01-22` <dttm>, `2015-01-23` <dttm>, `2015-01-26` <lgl>,
## #   `2015-01-27` <dttm>, `2015-01-28` <dttm>, `2015-01-29` <dttm>,
## #   `2015-01-30` <dttm>, `2015-02-02` <dttm>, `2015-02-03` <dttm>, …
colnames(out_time)
##   [1] "...1"       "2015-01-01" "2015-01-02" "2015-01-05" "2015-01-06"
##   [6] "2015-01-07" "2015-01-08" "2015-01-09" "2015-01-12" "2015-01-13"
##  [11] "2015-01-14" "2015-01-15" "2015-01-16" "2015-01-19" "2015-01-20"
##  [16] "2015-01-21" "2015-01-22" "2015-01-23" "2015-01-26" "2015-01-27"
##  [21] "2015-01-28" "2015-01-29" "2015-01-30" "2015-02-02" "2015-02-03"
##  [26] "2015-02-04" "2015-02-05" "2015-02-06" "2015-02-09" "2015-02-10"
##  [31] "2015-02-11" "2015-02-12" "2015-02-13" "2015-02-16" "2015-02-17"
##  [36] "2015-02-18" "2015-02-19" "2015-02-20" "2015-02-23" "2015-02-24"
##  [41] "2015-02-25" "2015-02-26" "2015-02-27" "2015-03-02" "2015-03-03"
##  [46] "2015-03-04" "2015-03-05" "2015-03-06" "2015-03-09" "2015-03-10"
##  [51] "2015-03-11" "2015-03-12" "2015-03-13" "2015-03-16" "2015-03-17"
##  [56] "2015-03-18" "2015-03-19" "2015-03-20" "2015-03-23" "2015-03-24"
##  [61] "2015-03-25" "2015-03-26" "2015-03-27" "2015-03-30" "2015-03-31"
##  [66] "2015-04-01" "2015-04-02" "2015-04-03" "2015-04-06" "2015-04-07"
##  [71] "2015-04-08" "2015-04-09" "2015-04-10" "2015-04-13" "2015-04-14"
##  [76] "2015-04-15" "2015-04-16" "2015-04-17" "2015-04-20" "2015-04-21"
##  [81] "2015-04-22" "2015-04-23" "2015-04-24" "2015-04-27" "2015-04-28"
##  [86] "2015-04-29" "2015-04-30" "2015-05-01" "2015-05-04" "2015-05-05"
##  [91] "2015-05-06" "2015-05-07" "2015-05-08" "2015-05-11" "2015-05-12"
##  [96] "2015-05-13" "2015-05-14" "2015-05-15" "2015-05-18" "2015-05-19"
## [101] "2015-05-20" "2015-05-21" "2015-05-22" "2015-05-25" "2015-05-26"
## [106] "2015-05-27" "2015-05-28" "2015-05-29" "2015-06-01" "2015-06-02"
## [111] "2015-06-03" "2015-06-04" "2015-06-05" "2015-06-08" "2015-06-09"
## [116] "2015-06-10" "2015-06-11" "2015-06-12" "2015-06-15" "2015-06-16"
## [121] "2015-06-17" "2015-06-18" "2015-06-19" "2015-06-22" "2015-06-23"
## [126] "2015-06-24" "2015-06-25" "2015-06-26" "2015-06-29" "2015-06-30"
## [131] "2015-07-01" "2015-07-02" "2015-07-03" "2015-07-06" "2015-07-07"
## [136] "2015-07-08" "2015-07-09" "2015-07-10" "2015-07-13" "2015-07-14"
## [141] "2015-07-15" "2015-07-16" "2015-07-17" "2015-07-20" "2015-07-21"
## [146] "2015-07-22" "2015-07-23" "2015-07-24" "2015-07-27" "2015-07-28"
## [151] "2015-07-29" "2015-07-30" "2015-07-31" "2015-08-03" "2015-08-04"
## [156] "2015-08-05" "2015-08-06" "2015-08-07" "2015-08-10" "2015-08-11"
## [161] "2015-08-12" "2015-08-13" "2015-08-14" "2015-08-17" "2015-08-18"
## [166] "2015-08-19" "2015-08-20" "2015-08-21" "2015-08-24" "2015-08-25"
## [171] "2015-08-26" "2015-08-27" "2015-08-28" "2015-08-31" "2015-09-01"
## [176] "2015-09-02" "2015-09-03" "2015-09-04" "2015-09-07" "2015-09-08"
## [181] "2015-09-09" "2015-09-10" "2015-09-11" "2015-09-14" "2015-09-15"
## [186] "2015-09-16" "2015-09-17" "2015-09-18" "2015-09-21" "2015-09-22"
## [191] "2015-09-23" "2015-09-24" "2015-09-25" "2015-09-28" "2015-09-29"
## [196] "2015-09-30" "2015-10-01" "2015-10-02" "2015-10-05" "2015-10-06"
## [201] "2015-10-07" "2015-10-08" "2015-10-09" "2015-10-12" "2015-10-13"
## [206] "2015-10-14" "2015-10-15" "2015-10-16" "2015-10-19" "2015-10-20"
## [211] "2015-10-21" "2015-10-22" "2015-10-23" "2015-10-26" "2015-10-27"
## [216] "2015-10-28" "2015-10-29" "2015-10-30" "2015-11-02" "2015-11-03"
## [221] "2015-11-04" "2015-11-05" "2015-11-06" "2015-11-09" "2015-11-10"
## [226] "2015-11-11" "2015-11-12" "2015-11-13" "2015-11-16" "2015-11-17"
## [231] "2015-11-18" "2015-11-19" "2015-11-20" "2015-11-23" "2015-11-24"
## [236] "2015-11-25" "2015-11-26" "2015-11-27" "2015-11-30" "2015-12-01"
## [241] "2015-12-02" "2015-12-03" "2015-12-04" "2015-12-07" "2015-12-08"
## [246] "2015-12-09" "2015-12-10" "2015-12-11" "2015-12-14" "2015-12-15"
## [251] "2015-12-16" "2015-12-17" "2015-12-18" "2015-12-21" "2015-12-22"
## [256] "2015-12-23" "2015-12-24" "2015-12-25" "2015-12-28" "2015-12-29"
## [261] "2015-12-30" "2015-12-31"


Verify the value of each columns

unique(employee_survey_data$EmployeeID)
unique(general_data$EmployeeID)
unique(manager_survey_data$EmployeeID)


Checking for duplicates

sum(duplicated(employee_survey_data))
## [1] 0
sum(duplicated(general_data))
## [1] 0
sum(duplicated(in_time))
## [1] 0
sum(duplicated(manager_survey_data))
## [1] 0
sum(duplicated(out_time))
## [1] 0


Consistency of data and time format

In in_time and out_time, I will format data-time value as time only.

in_time_1 <- in_time %>%
mutate("2015-01-02" = strftime("2015-01-02", format = "%I:%M:%S %p", tz=Sys.timezone()))
head(in_time_1 )
## # A tibble: 6 × 262
##    ...1 `2015-01-01` `2015-01-02` `2015-01-05`        `2015-01-06`       
##   <dbl> <lgl>        <chr>        <dttm>              <dttm>             
## 1     1 NA           12:00:00 am  2015-01-05 10:08:48 2015-01-06 09:54:26
## 2     2 NA           12:00:00 am  2015-01-05 10:21:05 NA                 
## 3     3 NA           12:00:00 am  2015-01-05 09:50:50 2015-01-06 10:14:13
## 4     4 NA           12:00:00 am  2015-01-05 09:56:32 2015-01-06 10:11:07
## 5     5 NA           12:00:00 am  2015-01-05 09:49:58 2015-01-06 09:45:28
## 6     6 NA           12:00:00 am  2015-01-05 10:14:00 2015-01-06 10:08:42
## # … with 257 more variables: `2015-01-07` <dttm>, `2015-01-08` <dttm>,
## #   `2015-01-09` <dttm>, `2015-01-12` <dttm>, `2015-01-13` <dttm>,
## #   `2015-01-14` <lgl>, `2015-01-15` <dttm>, `2015-01-16` <dttm>,
## #   `2015-01-19` <dttm>, `2015-01-20` <dttm>, `2015-01-21` <dttm>,
## #   `2015-01-22` <dttm>, `2015-01-23` <dttm>, `2015-01-26` <lgl>,
## #   `2015-01-27` <dttm>, `2015-01-28` <dttm>, `2015-01-29` <dttm>,
## #   `2015-01-30` <dttm>, `2015-02-02` <dttm>, `2015-02-03` <dttm>, …
col_num<-ncol(in_time)
for (i in 2:col_num){
      a <- colnames(in_time[i])
      b <- colnames(out_time[i])
      temp<-as.numeric(difftime(in_time[[a]], out_time[[b]], units ="hours"))
      in_time[ , ncol(in_time) + 1] <- temp
    
}


Merging dataset

I will merge general_data employee_survey_data manager_survey_data into one consolidated dataframe.

Since merge only merge two tables once a time, I will first combine general data and employee_survey_data. Then I combine with manger_survey_data.

HR_data <- merge (general_data, employee_survey_data, by=c("EmployeeID"))
head(HR_data)
##   EmployeeID Age Attrition    BusinessTravel             Department
## 1          1  51        No     Travel_Rarely                  Sales
## 2          2  31       Yes Travel_Frequently Research & Development
## 3          3  32        No Travel_Frequently Research & Development
## 4          4  38        No        Non-Travel Research & Development
## 5          5  32        No     Travel_Rarely Research & Development
## 6          6  46        No     Travel_Rarely Research & Development
##   DistanceFromHome Education EducationField EmployeeCount Gender JobLevel
## 1                6         2  Life Sciences             1 Female        1
## 2               10         1  Life Sciences             1 Female        1
## 3               17         4          Other             1   Male        4
## 4                2         5  Life Sciences             1   Male        3
## 5               10         1        Medical             1   Male        1
## 6                8         3  Life Sciences             1 Female        4
##                     JobRole MaritalStatus MonthlyIncome NumCompaniesWorked
## 1 Healthcare Representative       Married        131160                  1
## 2        Research Scientist        Single         41890                  0
## 3           Sales Executive       Married        193280                  1
## 4           Human Resources       Married         83210                  3
## 5           Sales Executive        Single         23420                  4
## 6         Research Director       Married         40710                  3
##   Over18 PercentSalaryHike StandardHours StockOptionLevel TotalWorkingYears
## 1      Y                11             8                0                 1
## 2      Y                23             8                1                 6
## 3      Y                15             8                3                 5
## 4      Y                11             8                3                13
## 5      Y                12             8                2                 9
## 6      Y                13             8                0                28
##   TrainingTimesLastYear YearsAtCompany YearsSinceLastPromotion
## 1                     6              1                       0
## 2                     3              5                       1
## 3                     2              5                       0
## 4                     5              8                       7
## 5                     2              6                       0
## 6                     5              7                       7
##   YearsWithCurrManager EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
## 1                    0                       3               4               2
## 2                    4                       3               2               4
## 3                    3                       2               2               1
## 4                    5                       4               4               3
## 5                    4                       4               1               3
## 6                    7                       3               2               2
write_csv(HR_data,"HR_data.csv")


Now, I will combine with manager_survey_data.

HR_data_combined <- merge (HR_data, manager_survey_data, by=c("EmployeeID"))
head(HR_data_combined)
##   EmployeeID Age Attrition    BusinessTravel             Department
## 1          1  51        No     Travel_Rarely                  Sales
## 2          2  31       Yes Travel_Frequently Research & Development
## 3          3  32        No Travel_Frequently Research & Development
## 4          4  38        No        Non-Travel Research & Development
## 5          5  32        No     Travel_Rarely Research & Development
## 6          6  46        No     Travel_Rarely Research & Development
##   DistanceFromHome Education EducationField EmployeeCount Gender JobLevel
## 1                6         2  Life Sciences             1 Female        1
## 2               10         1  Life Sciences             1 Female        1
## 3               17         4          Other             1   Male        4
## 4                2         5  Life Sciences             1   Male        3
## 5               10         1        Medical             1   Male        1
## 6                8         3  Life Sciences             1 Female        4
##                     JobRole MaritalStatus MonthlyIncome NumCompaniesWorked
## 1 Healthcare Representative       Married        131160                  1
## 2        Research Scientist        Single         41890                  0
## 3           Sales Executive       Married        193280                  1
## 4           Human Resources       Married         83210                  3
## 5           Sales Executive        Single         23420                  4
## 6         Research Director       Married         40710                  3
##   Over18 PercentSalaryHike StandardHours StockOptionLevel TotalWorkingYears
## 1      Y                11             8                0                 1
## 2      Y                23             8                1                 6
## 3      Y                15             8                3                 5
## 4      Y                11             8                3                13
## 5      Y                12             8                2                 9
## 6      Y                13             8                0                28
##   TrainingTimesLastYear YearsAtCompany YearsSinceLastPromotion
## 1                     6              1                       0
## 2                     3              5                       1
## 3                     2              5                       0
## 4                     5              8                       7
## 5                     2              6                       0
## 6                     5              7                       7
##   YearsWithCurrManager EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
## 1                    0                       3               4               2
## 2                    4                       3               2               4
## 3                    3                       2               2               1
## 4                    5                       4               4               3
## 5                    4                       4               1               3
## 6                    7                       3               2               2
##   JobInvolvement PerformanceRating
## 1              3                 3
## 2              2                 4
## 3              3                 3
## 4              2                 3
## 5              3                 3
## 6              3                 3


Replace numeric value to what it represents as follows:

HR_data_combined$Education[HR_data_combined$Education == "1"] <- "Below College"
HR_data_combined$Education[HR_data_combined$Education == "2"] <- "College"
HR_data_combined$Education[HR_data_combined$Education == "3"] <- "Bachelor"
HR_data_combined$Education[HR_data_combined$Education == "4"] <- "Master"
HR_data_combined$Education[HR_data_combined$Education == "5"] <- "Doctor"


HR_data_combined$EnvironmentSatisfaction[HR_data_combined$EnvironmentSatisfaction == "1"] <- "Low"
HR_data_combined$EnvironmentSatisfaction[HR_data_combined$EnvironmentSatisfaction == "2"] <- "Medium"
HR_data_combined$EnvironmentSatisfaction[HR_data_combined$EnvironmentSatisfaction == "3"] <- "High"
HR_data_combined$EnvironmentSatisfaction[HR_data_combined$EnvironmentSatisfaction == "4"] <- "Very High"


HR_data_combined$JobSatisfaction[HR_data_combined$JobSatisfaction == "1"] <- "Low"
HR_data_combined$JobSatisfaction[HR_data_combined$JobSatisfaction == "2"] <- "Medium"
HR_data_combined$JobSatisfaction[HR_data_combined$JobSatisfaction == "3"] <- "High"
HR_data_combined$JobSatisfaction[HR_data_combined$JobSatisfaction == "4"] <- "Very High"


HR_data_combined$WorkLifeBalance[HR_data_combined$WorkLifeBalance == "1"] <- "Bad"
HR_data_combined$WorkLifeBalance[HR_data_combined$WorkLifeBalance == "2"] <- "Good"
HR_data_combined$WorkLifeBalance[HR_data_combined$WorkLifeBalance == "3"] <- "Better"
HR_data_combined$WorkLifeBalance[HR_data_combined$WorkLifeBalance == "4"] <- "Best"


HR_data_combined$JobInvolvement[HR_data_combined$JobInvolvement == "1"] <- "Low"
HR_data_combined$JobInvolvement[HR_data_combined$JobInvolvement == "2"] <- "Medium"
HR_data_combined$JobInvolvement[HR_data_combined$JobInvolvement == "3"] <- "High"
HR_data_combined$JobInvolvement[HR_data_combined$JobInvolvement == "4"] <- "Very High"


HR_data_combined$PerformanceRating[HR_data_combined$PerformanceRating == "1"] <- "Low"
HR_data_combined$PerformanceRating[HR_data_combined$PerformanceRating == "2"] <- "Good"
HR_data_combined$PerformanceRating[HR_data_combined$PerformanceRating == "3"] <- "Excellent"
HR_data_combined$PerformanceRating[HR_data_combined$PerformanceRating == "4"] <- "Outstanding"

head(HR_data_combined)
##   EmployeeID Age Attrition    BusinessTravel             Department
## 1          1  51        No     Travel_Rarely                  Sales
## 2          2  31       Yes Travel_Frequently Research & Development
## 3          3  32        No Travel_Frequently Research & Development
## 4          4  38        No        Non-Travel Research & Development
## 5          5  32        No     Travel_Rarely Research & Development
## 6          6  46        No     Travel_Rarely Research & Development
##   DistanceFromHome     Education EducationField EmployeeCount Gender JobLevel
## 1                6       College  Life Sciences             1 Female        1
## 2               10 Below College  Life Sciences             1 Female        1
## 3               17        Master          Other             1   Male        4
## 4                2        Doctor  Life Sciences             1   Male        3
## 5               10 Below College        Medical             1   Male        1
## 6                8      Bachelor  Life Sciences             1 Female        4
##                     JobRole MaritalStatus MonthlyIncome NumCompaniesWorked
## 1 Healthcare Representative       Married        131160                  1
## 2        Research Scientist        Single         41890                  0
## 3           Sales Executive       Married        193280                  1
## 4           Human Resources       Married         83210                  3
## 5           Sales Executive        Single         23420                  4
## 6         Research Director       Married         40710                  3
##   Over18 PercentSalaryHike StandardHours StockOptionLevel TotalWorkingYears
## 1      Y                11             8                0                 1
## 2      Y                23             8                1                 6
## 3      Y                15             8                3                 5
## 4      Y                11             8                3                13
## 5      Y                12             8                2                 9
## 6      Y                13             8                0                28
##   TrainingTimesLastYear YearsAtCompany YearsSinceLastPromotion
## 1                     6              1                       0
## 2                     3              5                       1
## 3                     2              5                       0
## 4                     5              8                       7
## 5                     2              6                       0
## 6                     5              7                       7
##   YearsWithCurrManager EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
## 1                    0                    High       Very High            Good
## 2                    4                    High          Medium            Best
## 3                    3                  Medium          Medium             Bad
## 4                    5               Very High       Very High          Better
## 5                    4               Very High             Low          Better
## 6                    7                    High          Medium            Good
##   JobInvolvement PerformanceRating
## 1           High         Excellent
## 2         Medium       Outstanding
## 3           High         Excellent
## 4         Medium         Excellent
## 5           High         Excellent
## 6           High         Excellent
write_csv(HR_data_combined, "HR_data_combined.csv")


Classify employees into different categories


Classify employee by Monthly Income

  • Low income: less then $ 52,200
  • Middle income: $52,200 - 156,600
  • Upper income: more than $156,600
monthlyincome_attrition <- read_csv(file = "~/Downloads/HR Analytics Case Study/monthlyincome_attrition.csv")
## Rows: 4362 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): MonthlyIncome, EmployeeID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Add another column that contains income type

income_type <- monthlyincome_attrition %>%
mutate(income_type = case_when(
        MonthlyIncome <= 52200  ~ "Low Income",
        MonthlyIncome >= 52200 & MonthlyIncome <= 156600 ~ "Middle Income",
        MonthlyIncome > 156600 ~"Upper Income"
       ))
head(income_type) 
## # A tibble: 6 × 4
##   MonthlyIncome Attrition EmployeeID income_type  
##           <dbl> <chr>          <dbl> <chr>        
## 1        131160 No                 1 Middle Income
## 2         41890 Yes                2 Low Income   
## 3        193280 No                 3 Upper Income 
## 4         83210 No                 4 Middle Income
## 5         23420 No                 5 Low Income   
## 6         40710 No                 6 Low Income
write_csv(income_type, "income_type.csv")


Classify employee by Distance From Home

Now we will classify Distance From Home by three groups.

  • Short: > 1 and <7
  • Middle: >=7 and < 10
  • Long: >=10
distancefromhome_attrition <- read_csv(file = "~/Downloads/HR Analytics Case Study/distancefromhome_attrition.csv")
## Rows: 4362 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): DistanceFromHome, EmployeeID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  max(distancefromhome_attrition$DistanceFromHome)
## [1] 29
  min(distancefromhome_attrition$DistanceFromHome)
## [1] 1
  median(distancefromhome_attrition$DistanceFromHome)
## [1] 7
  mean(distancefromhome_attrition$DistanceFromHome)
## [1] 9.202201
distance_type <- distancefromhome_attrition %>%
  mutate(distance_type = case_when(
    DistanceFromHome > 1 & DistanceFromHome < 7 ~"Short",
    DistanceFromHome >= 7 & DistanceFromHome < 10 ~"Middle",
    DistanceFromHome >=10 ~"Long"
  ))
head(distance_type)
## # A tibble: 6 × 4
##   DistanceFromHome Attrition EmployeeID distance_type
##              <dbl> <chr>          <dbl> <chr>        
## 1                6 No                 1 Short        
## 2               10 Yes                2 Long         
## 3               17 No                 3 Long         
## 4                2 No                 4 Short        
## 5               10 No                 5 Long         
## 6                8 No                 6 Middle
write_csv(distance_type, "distance_type.csv")


Classify employee by Total Working Years

  • Junior: work >0 and <=10
  • Senior: work >=11 and <20
  • Expert: work >=20
totalworkingyears_attrition <- read_csv(file = "~/Downloads/HR Analytics Case Study/totalworkingyears_attrition.csv")
## Rows: 4362 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): TotalWorkingYears, EmployeeID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
min(totalworkingyears_attrition$TotalWorkingYears)
## [1] 0
max(totalworkingyears_attrition$TotalWorkingYears)
## [1] 40
mean(totalworkingyears_attrition$TotalWorkingYears)
## [1] 11.28496
median(totalworkingyears_attrition$TotalWorkingYears)
## [1] 10
workyears_type <- totalworkingyears_attrition %>%
  mutate(workyears_type = case_when(
        TotalWorkingYears >0 & TotalWorkingYears <=10 ~'Juniro',
        TotalWorkingYears >=11 & TotalWorkingYears <20 ~'Senior',
        TotalWorkingYears >=20 ~'Expert'
  ))
head(workyears_type)
## # A tibble: 6 × 4
##   TotalWorkingYears Attrition EmployeeID workyears_type
##               <dbl> <chr>          <dbl> <chr>         
## 1                 1 No                 1 Juniro        
## 2                 6 Yes                2 Juniro        
## 3                 5 No                 3 Juniro        
## 4                13 No                 4 Senior        
## 5                 9 No                 5 Juniro        
## 6                28 No                 6 Expert
write_csv(workyears_type,"workyears_type.csv")


Classify employee by Years At Company

  • Newbie: years at company >0 and <=5
  • Stable Employee: years at company >5 and <=8
  • Loyal Employee: years at company >8
yearsatcompany<-read_csv(file = "~/Downloads/HR Analytics Case Study/yearsatcompany_attrition.csv")
## Rows: 4362 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): YearsAtCompany, EmployeeID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
min(yearsatcompany$YearsAtCompany)
## [1] 0
max(yearsatcompany$YearsAtCompany)
## [1] 40
mean(yearsatcompany$YearsAtCompany)
## [1] 7.008941
median(yearsatcompany$YearsAtCompany)
## [1] 5
loyalty_type <- yearsatcompany %>%
  mutate(loyalty_type = case_when(
         YearsAtCompany >0 & YearsAtCompany <=5 ~'Newbie',
         YearsAtCompany >5 & YearsAtCompany <=8 ~'Stable Employee',
         YearsAtCompany >8 ~'Loyal Employee'
  ))
head(loyalty_type)
## # A tibble: 6 × 4
##   YearsAtCompany Attrition EmployeeID loyalty_type   
##            <dbl> <chr>          <dbl> <chr>          
## 1              1 No                 1 Newbie         
## 2              5 Yes                2 Newbie         
## 3              5 No                 3 Newbie         
## 4              8 No                 4 Stable Employee
## 5              6 No                 5 Stable Employee
## 6              7 No                 6 Stable Employee
write_csv(loyalty_type, "loyal_type.csv")


Classify employee by Age group

  • Young Adults: 17-30
  • Middle-Age Adults: 31-45
  • Old-Aged Adults: Above 45
age_attrition <- read_csv(file = "~/Downloads/HR Analytics Case Study/Age_Attrition.csv")
## Rows: 82 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): Age, COUNT(EmployeeID)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
age_group <- age_attrition %>%
 mutate(age_group = case_when(
         Age >=17 & Age <=30 ~"Young Adults",
         Age >=31 & Age <=45 ~"Middle-Age Adults",
         Age >45 ~"Old-Aged Adults "
  ))
head(age_group)
## # A tibble: 6 × 4
##     Age Attrition `COUNT(EmployeeID)` age_group          
##   <dbl> <chr>                   <dbl> <chr>              
## 1    51 No                         51 "Old-Aged Adults " 
## 2    31 Yes                        54 "Middle-Age Adults"
## 3    32 No                        149 "Middle-Age Adults"
## 4    38 No                        167 "Middle-Age Adults"
## 5    46 No                         87 "Old-Aged Adults " 
## 6    28 Yes                        40 "Young Adults"
write_csv(age_group, "age_group.csv")

Correlation test

HR_data <- read_csv(file = '~/Downloads/HR_analytics_csv/CT_JobSatisfactiomployee_Attrition.csv')
## Rows: 8 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Attrition
## dbl (2): JobSatisfaction, COUNT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

For categorical values with multiple values, I use fastDummies to sort out.

HR_data_new <- fastDummies::dummy_cols(HR_data_combined)
head(HR_data_new)
##   EmployeeID Age Attrition    BusinessTravel             Department
## 1          1  51        No     Travel_Rarely                  Sales
## 2          2  31       Yes Travel_Frequently Research & Development
## 3          3  32        No Travel_Frequently Research & Development
## 4          4  38        No        Non-Travel Research & Development
## 5          5  32        No     Travel_Rarely Research & Development
## 6          6  46        No     Travel_Rarely Research & Development
##   DistanceFromHome     Education EducationField EmployeeCount Gender JobLevel
## 1                6       College  Life Sciences             1 Female        1
## 2               10 Below College  Life Sciences             1 Female        1
## 3               17        Master          Other             1   Male        4
## 4                2        Doctor  Life Sciences             1   Male        3
## 5               10 Below College        Medical             1   Male        1
## 6                8      Bachelor  Life Sciences             1 Female        4
##                     JobRole MaritalStatus MonthlyIncome NumCompaniesWorked
## 1 Healthcare Representative       Married        131160                  1
## 2        Research Scientist        Single         41890                  0
## 3           Sales Executive       Married        193280                  1
## 4           Human Resources       Married         83210                  3
## 5           Sales Executive        Single         23420                  4
## 6         Research Director       Married         40710                  3
##   Over18 PercentSalaryHike StandardHours StockOptionLevel TotalWorkingYears
## 1      Y                11             8                0                 1
## 2      Y                23             8                1                 6
## 3      Y                15             8                3                 5
## 4      Y                11             8                3                13
## 5      Y                12             8                2                 9
## 6      Y                13             8                0                28
##   TrainingTimesLastYear YearsAtCompany YearsSinceLastPromotion
## 1                     6              1                       0
## 2                     3              5                       1
## 3                     2              5                       0
## 4                     5              8                       7
## 5                     2              6                       0
## 6                     5              7                       7
##   YearsWithCurrManager EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
## 1                    0                    High       Very High            Good
## 2                    4                    High          Medium            Best
## 3                    3                  Medium          Medium             Bad
## 4                    5               Very High       Very High          Better
## 5                    4               Very High             Low          Better
## 6                    7                    High          Medium            Good
##   JobInvolvement PerformanceRating Attrition_No Attrition_Yes
## 1           High         Excellent            1             0
## 2         Medium       Outstanding            0             1
## 3           High         Excellent            1             0
## 4         Medium         Excellent            1             0
## 5           High         Excellent            1             0
## 6           High         Excellent            1             0
##   BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently
## 1                         0                                0
## 2                         0                                1
## 3                         0                                1
## 4                         1                                0
## 5                         0                                0
## 6                         0                                0
##   BusinessTravel_Travel_Rarely Department_Human Resources
## 1                            1                          0
## 2                            0                          0
## 3                            0                          0
## 4                            0                          0
## 5                            1                          0
## 6                            1                          0
##   Department_Research & Development Department_Sales Education_Bachelor
## 1                                 0                1                  0
## 2                                 1                0                  0
## 3                                 1                0                  0
## 4                                 1                0                  0
## 5                                 1                0                  0
## 6                                 1                0                  1
##   Education_Below College Education_College Education_Doctor Education_Master
## 1                       0                 1                0                0
## 2                       1                 0                0                0
## 3                       0                 0                0                1
## 4                       0                 0                1                0
## 5                       1                 0                0                0
## 6                       0                 0                0                0
##   EducationField_Human Resources EducationField_Life Sciences
## 1                              0                            1
## 2                              0                            1
## 3                              0                            0
## 4                              0                            1
## 5                              0                            0
## 6                              0                            1
##   EducationField_Marketing EducationField_Medical EducationField_Other
## 1                        0                      0                    0
## 2                        0                      0                    0
## 3                        0                      0                    1
## 4                        0                      0                    0
## 5                        0                      1                    0
## 6                        0                      0                    0
##   EducationField_Technical Degree Gender_Female Gender_Male
## 1                               0             1           0
## 2                               0             1           0
## 3                               0             0           1
## 4                               0             0           1
## 5                               0             0           1
## 6                               0             1           0
##   JobRole_Healthcare Representative JobRole_Human Resources
## 1                                 1                       0
## 2                                 0                       0
## 3                                 0                       0
## 4                                 0                       1
## 5                                 0                       0
## 6                                 0                       0
##   JobRole_Laboratory Technician JobRole_Manager JobRole_Manufacturing Director
## 1                             0               0                              0
## 2                             0               0                              0
## 3                             0               0                              0
## 4                             0               0                              0
## 5                             0               0                              0
## 6                             0               0                              0
##   JobRole_Research Director JobRole_Research Scientist JobRole_Sales Executive
## 1                         0                          0                       0
## 2                         0                          1                       0
## 3                         0                          0                       1
## 4                         0                          0                       0
## 5                         0                          0                       1
## 6                         1                          0                       0
##   JobRole_Sales Representative MaritalStatus_Divorced MaritalStatus_Married
## 1                            0                      0                     1
## 2                            0                      0                     0
## 3                            0                      0                     1
## 4                            0                      0                     1
## 5                            0                      0                     0
## 6                            0                      0                     1
##   MaritalStatus_Single Over18_Y EnvironmentSatisfaction_High
## 1                    0        1                            1
## 2                    1        1                            1
## 3                    0        1                            0
## 4                    0        1                            0
## 5                    1        1                            0
## 6                    0        1                            1
##   EnvironmentSatisfaction_Low EnvironmentSatisfaction_Medium
## 1                           0                              0
## 2                           0                              0
## 3                           0                              1
## 4                           0                              0
## 5                           0                              0
## 6                           0                              0
##   EnvironmentSatisfaction_Very High EnvironmentSatisfaction_NA
## 1                                 0                          0
## 2                                 0                          0
## 3                                 0                          0
## 4                                 1                          0
## 5                                 1                          0
## 6                                 0                          0
##   JobSatisfaction_High JobSatisfaction_Low JobSatisfaction_Medium
## 1                    0                   0                      0
## 2                    0                   0                      1
## 3                    0                   0                      1
## 4                    0                   0                      0
## 5                    0                   1                      0
## 6                    0                   0                      1
##   JobSatisfaction_Very High JobSatisfaction_NA WorkLifeBalance_Bad
## 1                         1                  0                   0
## 2                         0                  0                   0
## 3                         0                  0                   1
## 4                         1                  0                   0
## 5                         0                  0                   0
## 6                         0                  0                   0
##   WorkLifeBalance_Best WorkLifeBalance_Better WorkLifeBalance_Good
## 1                    0                      0                    1
## 2                    1                      0                    0
## 3                    0                      0                    0
## 4                    0                      1                    0
## 5                    0                      1                    0
## 6                    0                      0                    1
##   WorkLifeBalance_NA JobInvolvement_High JobInvolvement_Low
## 1                  0                   1                  0
## 2                  0                   0                  0
## 3                  0                   1                  0
## 4                  0                   0                  0
## 5                  0                   1                  0
## 6                  0                   1                  0
##   JobInvolvement_Medium JobInvolvement_Very High PerformanceRating_Excellent
## 1                     0                        0                           1
## 2                     1                        0                           0
## 3                     0                        0                           1
## 4                     1                        0                           1
## 5                     0                        0                           1
## 6                     0                        0                           1
##   PerformanceRating_Outstanding
## 1                             0
## 2                             1
## 3                             0
## 4                             0
## 5                             0
## 6                             0
HR_data_new$Attrition[HR_data_new$Attrition == "Yes"] <- 1
HR_data_new$Attrition[HR_data_new$Attrition == "No"] <- 0
HR_data_new$Attrition <- as.numeric(HR_data_new$Attrition)
head(HR_data_new)
##   EmployeeID Age Attrition    BusinessTravel             Department
## 1          1  51         0     Travel_Rarely                  Sales
## 2          2  31         1 Travel_Frequently Research & Development
## 3          3  32         0 Travel_Frequently Research & Development
## 4          4  38         0        Non-Travel Research & Development
## 5          5  32         0     Travel_Rarely Research & Development
## 6          6  46         0     Travel_Rarely Research & Development
##   DistanceFromHome     Education EducationField EmployeeCount Gender JobLevel
## 1                6       College  Life Sciences             1 Female        1
## 2               10 Below College  Life Sciences             1 Female        1
## 3               17        Master          Other             1   Male        4
## 4                2        Doctor  Life Sciences             1   Male        3
## 5               10 Below College        Medical             1   Male        1
## 6                8      Bachelor  Life Sciences             1 Female        4
##                     JobRole MaritalStatus MonthlyIncome NumCompaniesWorked
## 1 Healthcare Representative       Married        131160                  1
## 2        Research Scientist        Single         41890                  0
## 3           Sales Executive       Married        193280                  1
## 4           Human Resources       Married         83210                  3
## 5           Sales Executive        Single         23420                  4
## 6         Research Director       Married         40710                  3
##   Over18 PercentSalaryHike StandardHours StockOptionLevel TotalWorkingYears
## 1      Y                11             8                0                 1
## 2      Y                23             8                1                 6
## 3      Y                15             8                3                 5
## 4      Y                11             8                3                13
## 5      Y                12             8                2                 9
## 6      Y                13             8                0                28
##   TrainingTimesLastYear YearsAtCompany YearsSinceLastPromotion
## 1                     6              1                       0
## 2                     3              5                       1
## 3                     2              5                       0
## 4                     5              8                       7
## 5                     2              6                       0
## 6                     5              7                       7
##   YearsWithCurrManager EnvironmentSatisfaction JobSatisfaction WorkLifeBalance
## 1                    0                    High       Very High            Good
## 2                    4                    High          Medium            Best
## 3                    3                  Medium          Medium             Bad
## 4                    5               Very High       Very High          Better
## 5                    4               Very High             Low          Better
## 6                    7                    High          Medium            Good
##   JobInvolvement PerformanceRating Attrition_No Attrition_Yes
## 1           High         Excellent            1             0
## 2         Medium       Outstanding            0             1
## 3           High         Excellent            1             0
## 4         Medium         Excellent            1             0
## 5           High         Excellent            1             0
## 6           High         Excellent            1             0
##   BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently
## 1                         0                                0
## 2                         0                                1
## 3                         0                                1
## 4                         1                                0
## 5                         0                                0
## 6                         0                                0
##   BusinessTravel_Travel_Rarely Department_Human Resources
## 1                            1                          0
## 2                            0                          0
## 3                            0                          0
## 4                            0                          0
## 5                            1                          0
## 6                            1                          0
##   Department_Research & Development Department_Sales Education_Bachelor
## 1                                 0                1                  0
## 2                                 1                0                  0
## 3                                 1                0                  0
## 4                                 1                0                  0
## 5                                 1                0                  0
## 6                                 1                0                  1
##   Education_Below College Education_College Education_Doctor Education_Master
## 1                       0                 1                0                0
## 2                       1                 0                0                0
## 3                       0                 0                0                1
## 4                       0                 0                1                0
## 5                       1                 0                0                0
## 6                       0                 0                0                0
##   EducationField_Human Resources EducationField_Life Sciences
## 1                              0                            1
## 2                              0                            1
## 3                              0                            0
## 4                              0                            1
## 5                              0                            0
## 6                              0                            1
##   EducationField_Marketing EducationField_Medical EducationField_Other
## 1                        0                      0                    0
## 2                        0                      0                    0
## 3                        0                      0                    1
## 4                        0                      0                    0
## 5                        0                      1                    0
## 6                        0                      0                    0
##   EducationField_Technical Degree Gender_Female Gender_Male
## 1                               0             1           0
## 2                               0             1           0
## 3                               0             0           1
## 4                               0             0           1
## 5                               0             0           1
## 6                               0             1           0
##   JobRole_Healthcare Representative JobRole_Human Resources
## 1                                 1                       0
## 2                                 0                       0
## 3                                 0                       0
## 4                                 0                       1
## 5                                 0                       0
## 6                                 0                       0
##   JobRole_Laboratory Technician JobRole_Manager JobRole_Manufacturing Director
## 1                             0               0                              0
## 2                             0               0                              0
## 3                             0               0                              0
## 4                             0               0                              0
## 5                             0               0                              0
## 6                             0               0                              0
##   JobRole_Research Director JobRole_Research Scientist JobRole_Sales Executive
## 1                         0                          0                       0
## 2                         0                          1                       0
## 3                         0                          0                       1
## 4                         0                          0                       0
## 5                         0                          0                       1
## 6                         1                          0                       0
##   JobRole_Sales Representative MaritalStatus_Divorced MaritalStatus_Married
## 1                            0                      0                     1
## 2                            0                      0                     0
## 3                            0                      0                     1
## 4                            0                      0                     1
## 5                            0                      0                     0
## 6                            0                      0                     1
##   MaritalStatus_Single Over18_Y EnvironmentSatisfaction_High
## 1                    0        1                            1
## 2                    1        1                            1
## 3                    0        1                            0
## 4                    0        1                            0
## 5                    1        1                            0
## 6                    0        1                            1
##   EnvironmentSatisfaction_Low EnvironmentSatisfaction_Medium
## 1                           0                              0
## 2                           0                              0
## 3                           0                              1
## 4                           0                              0
## 5                           0                              0
## 6                           0                              0
##   EnvironmentSatisfaction_Very High EnvironmentSatisfaction_NA
## 1                                 0                          0
## 2                                 0                          0
## 3                                 0                          0
## 4                                 1                          0
## 5                                 1                          0
## 6                                 0                          0
##   JobSatisfaction_High JobSatisfaction_Low JobSatisfaction_Medium
## 1                    0                   0                      0
## 2                    0                   0                      1
## 3                    0                   0                      1
## 4                    0                   0                      0
## 5                    0                   1                      0
## 6                    0                   0                      1
##   JobSatisfaction_Very High JobSatisfaction_NA WorkLifeBalance_Bad
## 1                         1                  0                   0
## 2                         0                  0                   0
## 3                         0                  0                   1
## 4                         1                  0                   0
## 5                         0                  0                   0
## 6                         0                  0                   0
##   WorkLifeBalance_Best WorkLifeBalance_Better WorkLifeBalance_Good
## 1                    0                      0                    1
## 2                    1                      0                    0
## 3                    0                      0                    0
## 4                    0                      1                    0
## 5                    0                      1                    0
## 6                    0                      0                    1
##   WorkLifeBalance_NA JobInvolvement_High JobInvolvement_Low
## 1                  0                   1                  0
## 2                  0                   0                  0
## 3                  0                   1                  0
## 4                  0                   0                  0
## 5                  0                   1                  0
## 6                  0                   1                  0
##   JobInvolvement_Medium JobInvolvement_Very High PerformanceRating_Excellent
## 1                     0                        0                           1
## 2                     1                        0                           0
## 3                     0                        0                           1
## 4                     1                        0                           1
## 5                     0                        0                           1
## 6                     0                        0                           1
##   PerformanceRating_Outstanding
## 1                             0
## 2                             1
## 3                             0
## 4                             0
## 5                             0
## 6                             0
Mat_data = subset(HR_data_new, select = -c(EmployeeID, BusinessTravel, Department, Education, EducationField, Gender, JobRole ,MaritalStatus, EnvironmentSatisfaction, JobSatisfaction, WorkLifeBalance, JobInvolvement, PerformanceRating, Over18_Y,Over18, EmployeeCount, StandardHours, Attrition_Yes, Attrition_No))
Mat_data[is.na(Mat_data)] <- 0
head(Mat_data)
##   Age Attrition DistanceFromHome JobLevel MonthlyIncome NumCompaniesWorked
## 1  51         0                6        1        131160                  1
## 2  31         1               10        1         41890                  0
## 3  32         0               17        4        193280                  1
## 4  38         0                2        3         83210                  3
## 5  32         0               10        1         23420                  4
## 6  46         0                8        4         40710                  3
##   PercentSalaryHike StockOptionLevel TotalWorkingYears TrainingTimesLastYear
## 1                11                0                 1                     6
## 2                23                1                 6                     3
## 3                15                3                 5                     2
## 4                11                3                13                     5
## 5                12                2                 9                     2
## 6                13                0                28                     5
##   YearsAtCompany YearsSinceLastPromotion YearsWithCurrManager
## 1              1                       0                    0
## 2              5                       1                    4
## 3              5                       0                    3
## 4              8                       7                    5
## 5              6                       0                    4
## 6              7                       7                    7
##   BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently
## 1                         0                                0
## 2                         0                                1
## 3                         0                                1
## 4                         1                                0
## 5                         0                                0
## 6                         0                                0
##   BusinessTravel_Travel_Rarely Department_Human Resources
## 1                            1                          0
## 2                            0                          0
## 3                            0                          0
## 4                            0                          0
## 5                            1                          0
## 6                            1                          0
##   Department_Research & Development Department_Sales Education_Bachelor
## 1                                 0                1                  0
## 2                                 1                0                  0
## 3                                 1                0                  0
## 4                                 1                0                  0
## 5                                 1                0                  0
## 6                                 1                0                  1
##   Education_Below College Education_College Education_Doctor Education_Master
## 1                       0                 1                0                0
## 2                       1                 0                0                0
## 3                       0                 0                0                1
## 4                       0                 0                1                0
## 5                       1                 0                0                0
## 6                       0                 0                0                0
##   EducationField_Human Resources EducationField_Life Sciences
## 1                              0                            1
## 2                              0                            1
## 3                              0                            0
## 4                              0                            1
## 5                              0                            0
## 6                              0                            1
##   EducationField_Marketing EducationField_Medical EducationField_Other
## 1                        0                      0                    0
## 2                        0                      0                    0
## 3                        0                      0                    1
## 4                        0                      0                    0
## 5                        0                      1                    0
## 6                        0                      0                    0
##   EducationField_Technical Degree Gender_Female Gender_Male
## 1                               0             1           0
## 2                               0             1           0
## 3                               0             0           1
## 4                               0             0           1
## 5                               0             0           1
## 6                               0             1           0
##   JobRole_Healthcare Representative JobRole_Human Resources
## 1                                 1                       0
## 2                                 0                       0
## 3                                 0                       0
## 4                                 0                       1
## 5                                 0                       0
## 6                                 0                       0
##   JobRole_Laboratory Technician JobRole_Manager JobRole_Manufacturing Director
## 1                             0               0                              0
## 2                             0               0                              0
## 3                             0               0                              0
## 4                             0               0                              0
## 5                             0               0                              0
## 6                             0               0                              0
##   JobRole_Research Director JobRole_Research Scientist JobRole_Sales Executive
## 1                         0                          0                       0
## 2                         0                          1                       0
## 3                         0                          0                       1
## 4                         0                          0                       0
## 5                         0                          0                       1
## 6                         1                          0                       0
##   JobRole_Sales Representative MaritalStatus_Divorced MaritalStatus_Married
## 1                            0                      0                     1
## 2                            0                      0                     0
## 3                            0                      0                     1
## 4                            0                      0                     1
## 5                            0                      0                     0
## 6                            0                      0                     1
##   MaritalStatus_Single EnvironmentSatisfaction_High EnvironmentSatisfaction_Low
## 1                    0                            1                           0
## 2                    1                            1                           0
## 3                    0                            0                           0
## 4                    0                            0                           0
## 5                    1                            0                           0
## 6                    0                            1                           0
##   EnvironmentSatisfaction_Medium EnvironmentSatisfaction_Very High
## 1                              0                                 0
## 2                              0                                 0
## 3                              1                                 0
## 4                              0                                 1
## 5                              0                                 1
## 6                              0                                 0
##   EnvironmentSatisfaction_NA JobSatisfaction_High JobSatisfaction_Low
## 1                          0                    0                   0
## 2                          0                    0                   0
## 3                          0                    0                   0
## 4                          0                    0                   0
## 5                          0                    0                   1
## 6                          0                    0                   0
##   JobSatisfaction_Medium JobSatisfaction_Very High JobSatisfaction_NA
## 1                      0                         1                  0
## 2                      1                         0                  0
## 3                      1                         0                  0
## 4                      0                         1                  0
## 5                      0                         0                  0
## 6                      1                         0                  0
##   WorkLifeBalance_Bad WorkLifeBalance_Best WorkLifeBalance_Better
## 1                   0                    0                      0
## 2                   0                    1                      0
## 3                   1                    0                      0
## 4                   0                    0                      1
## 5                   0                    0                      1
## 6                   0                    0                      0
##   WorkLifeBalance_Good WorkLifeBalance_NA JobInvolvement_High
## 1                    1                  0                   1
## 2                    0                  0                   0
## 3                    0                  0                   1
## 4                    0                  0                   0
## 5                    0                  0                   1
## 6                    1                  0                   1
##   JobInvolvement_Low JobInvolvement_Medium JobInvolvement_Very High
## 1                  0                     0                        0
## 2                  0                     1                        0
## 3                  0                     0                        0
## 4                  0                     1                        0
## 5                  0                     0                        0
## 6                  0                     0                        0
##   PerformanceRating_Excellent PerformanceRating_Outstanding
## 1                           1                             0
## 2                           0                             1
## 3                           1                             0
## 4                           1                             0
## 5                           1                             0
## 6                           1                             0
res <- round(cor(Mat_data), digits = 2)
head(res)
##                      Age Attrition DistanceFromHome JobLevel MonthlyIncome
## Age                 1.00     -0.16             0.01     0.00         -0.04
## Attrition          -0.16      1.00            -0.01    -0.01         -0.03
## DistanceFromHome    0.01     -0.01             1.00    -0.04         -0.02
## JobLevel            0.00     -0.01            -0.04     1.00          0.05
## MonthlyIncome      -0.04     -0.03            -0.02     0.05          1.00
## NumCompaniesWorked  0.30      0.04            -0.01    -0.01         -0.02
##                    NumCompaniesWorked PercentSalaryHike StockOptionLevel
## Age                              0.30             -0.03            -0.03
## Attrition                        0.04              0.03            -0.01
## DistanceFromHome                -0.01              0.04             0.01
## JobLevel                        -0.01              0.01             0.00
## MonthlyIncome                   -0.02              0.00             0.03
## NumCompaniesWorked               1.00              0.03             0.02
##                    TotalWorkingYears TrainingTimesLastYear YearsAtCompany
## Age                             0.68                 -0.03           0.31
## Attrition                      -0.17                 -0.05          -0.13
## DistanceFromHome                0.01                 -0.01           0.03
## JobLevel                       -0.04                 -0.03          -0.06
## MonthlyIncome                  -0.03                  0.05           0.00
## NumCompaniesWorked              0.24                 -0.03          -0.12
##                    YearsSinceLastPromotion YearsWithCurrManager
## Age                                   0.22                 0.20
## Attrition                            -0.03                -0.16
## DistanceFromHome                      0.00                 0.02
## JobLevel                             -0.06                -0.06
## MonthlyIncome                         0.07                 0.02
## NumCompaniesWorked                   -0.04                -0.11
##                    BusinessTravel_Non-Travel BusinessTravel_Travel_Frequently
## Age                                    -0.01                            -0.02
## Attrition                              -0.07                             0.12
## DistanceFromHome                       -0.03                             0.00
## JobLevel                               -0.04                            -0.01
## MonthlyIncome                           0.06                            -0.03
## NumCompaniesWorked                      0.00                            -0.04
##                    BusinessTravel_Travel_Rarely Department_Human Resources
## Age                                        0.03                      -0.01
## Attrition                                 -0.05                       0.08
## DistanceFromHome                           0.02                      -0.02
## JobLevel                                   0.04                       0.04
## MonthlyIncome                             -0.01                      -0.03
## NumCompaniesWorked                         0.03                      -0.06
##                    Department_Research & Development Department_Sales
## Age                                             0.02            -0.02
## Attrition                                      -0.02            -0.02
## DistanceFromHome                                0.01             0.00
## JobLevel                                       -0.04             0.02
## MonthlyIncome                                   0.06            -0.05
## NumCompaniesWorked                              0.05            -0.03
##                    Education_Bachelor Education_Below College Education_College
## Age                             -0.03                    0.04              0.00
## Attrition                       -0.01                   -0.01              0.04
## DistanceFromHome                -0.02                    0.05             -0.03
## JobLevel                         0.03                   -0.06             -0.01
## MonthlyIncome                    0.04                   -0.02              0.00
## NumCompaniesWorked               0.01                    0.01              0.00
##                    Education_Doctor Education_Master
## Age                           -0.05             0.02
## Attrition                     -0.01            -0.01
## DistanceFromHome               0.00             0.02
## JobLevel                       0.00             0.01
## MonthlyIncome                  0.00            -0.03
## NumCompaniesWorked             0.01            -0.03
##                    EducationField_Human Resources EducationField_Life Sciences
## Age                                         -0.05                         0.00
## Attrition                                    0.09                         0.01
## DistanceFromHome                             0.03                         0.01
## JobLevel                                     0.01                         0.01
## MonthlyIncome                               -0.01                         0.01
## NumCompaniesWorked                          -0.04                         0.01
##                    EducationField_Marketing EducationField_Medical
## Age                                   -0.03                   0.01
## Attrition                              0.00                   0.00
## DistanceFromHome                       0.01                  -0.01
## JobLevel                               0.04                  -0.03
## MonthlyIncome                         -0.04                   0.03
## NumCompaniesWorked                     0.01                  -0.01
##                    EducationField_Other EducationField_Technical Degree
## Age                                0.00                            0.05
## Attrition                         -0.03                           -0.04
## DistanceFromHome                  -0.01                           -0.01
## JobLevel                          -0.01                            0.00
## MonthlyIncome                      0.00                           -0.01
## NumCompaniesWorked                 0.00                            0.01
##                    Gender_Female Gender_Male JobRole_Healthcare Representative
## Age                         0.04       -0.04                             -0.05
## Attrition                  -0.02        0.02                             -0.01
## DistanceFromHome            0.04       -0.04                             -0.02
## JobLevel                    0.03       -0.03                              0.01
## MonthlyIncome              -0.01        0.01                             -0.03
## NumCompaniesWorked          0.07       -0.07                              0.01
##                    JobRole_Human Resources JobRole_Laboratory Technician
## Age                                   0.00                          0.03
## Attrition                            -0.01                          0.00
## DistanceFromHome                      0.00                          0.05
## JobLevel                              0.03                         -0.03
## MonthlyIncome                        -0.03                          0.01
## NumCompaniesWorked                    0.03                          0.01
##                    JobRole_Manager JobRole_Manufacturing Director
## Age                           0.01                           0.00
## Attrition                    -0.02                          -0.05
## DistanceFromHome             -0.04                           0.00
## JobLevel                      0.03                           0.00
## MonthlyIncome                -0.01                           0.03
## NumCompaniesWorked           -0.01                           0.00
##                    JobRole_Research Director JobRole_Research Scientist
## Age                                    -0.02                       0.01
## Attrition                               0.05                       0.03
## DistanceFromHome                        0.00                       0.00
## JobLevel                                0.03                       0.02
## MonthlyIncome                           0.00                       0.00
## NumCompaniesWorked                      0.01                      -0.02
##                    JobRole_Sales Executive JobRole_Sales Representative
## Age                                   0.01                        -0.01
## Attrition                             0.01                        -0.01
## DistanceFromHome                     -0.01                         0.01
## JobLevel                             -0.02                        -0.02
## MonthlyIncome                         0.00                         0.00
## NumCompaniesWorked                   -0.02                         0.01
##                    MaritalStatus_Divorced MaritalStatus_Married
## Age                                  0.03                  0.08
## Attrition                           -0.09                 -0.09
## DistanceFromHome                     0.00                  0.04
## JobLevel                             0.01                  0.01
## MonthlyIncome                        0.03                  0.02
## NumCompaniesWorked                   0.04                 -0.01
##                    MaritalStatus_Single EnvironmentSatisfaction_High
## Age                               -0.12                        -0.01
## Attrition                          0.18                        -0.04
## DistanceFromHome                  -0.04                         0.01
## JobLevel                          -0.02                        -0.03
## MonthlyIncome                     -0.05                        -0.03
## NumCompaniesWorked                -0.02                        -0.03
##                    EnvironmentSatisfaction_Low EnvironmentSatisfaction_Medium
## Age                                       0.01                          -0.02
## Attrition                                 0.12                          -0.02
## DistanceFromHome                         -0.01                          -0.02
## JobLevel                                  0.02                           0.02
## MonthlyIncome                            -0.01                           0.05
## NumCompaniesWorked                        0.01                          -0.02
##                    EnvironmentSatisfaction_Very High EnvironmentSatisfaction_NA
## Age                                             0.02                      -0.02
## Attrition                                      -0.05                       0.01
## DistanceFromHome                                0.02                       0.00
## JobLevel                                       -0.01                      -0.01
## MonthlyIncome                                   0.00                       0.01
## NumCompaniesWorked                              0.03                       0.01
##                    JobSatisfaction_High JobSatisfaction_Low
## Age                                0.01                0.00
## Attrition                          0.01                0.09
## DistanceFromHome                   0.02                0.00
## JobLevel                          -0.03                0.03
## MonthlyIncome                      0.05                0.01
## NumCompaniesWorked                -0.01                0.03
##                    JobSatisfaction_Medium JobSatisfaction_Very High
## Age                                  0.00                     -0.01
## Attrition                            0.00                     -0.09
## DistanceFromHome                     0.00                     -0.02
## JobLevel                             0.00                      0.01
## MonthlyIncome                       -0.05                     -0.02
## NumCompaniesWorked                   0.03                     -0.05
##                    JobSatisfaction_NA WorkLifeBalance_Bad WorkLifeBalance_Best
## Age                              0.00                0.01                -0.01
## Attrition                       -0.02                0.10                 0.02
## DistanceFromHome                -0.01                0.03                -0.02
## JobLevel                        -0.02                0.01                -0.03
## MonthlyIncome                   -0.02                0.03                 0.02
## NumCompaniesWorked               0.01                0.04                 0.04
##                    WorkLifeBalance_Better WorkLifeBalance_Good
## Age                                 -0.01                 0.02
## Attrition                           -0.06                 0.01
## DistanceFromHome                     0.05                -0.06
## JobLevel                             0.00                 0.01
## MonthlyIncome                       -0.01                -0.02
## NumCompaniesWorked                  -0.04                 0.00
##                    WorkLifeBalance_NA JobInvolvement_High JobInvolvement_Low
## Age                              0.02               -0.02               0.00
## Attrition                       -0.01               -0.03               0.04
## DistanceFromHome                 0.01               -0.04               0.01
## JobLevel                        -0.01                0.01              -0.01
## MonthlyIncome                    0.00                0.03              -0.03
## NumCompaniesWorked              -0.01               -0.02              -0.01
##                    JobInvolvement_Medium JobInvolvement_Very High
## Age                                -0.01                     0.04
## Attrition                           0.00                     0.02
## DistanceFromHome                    0.02                     0.03
## JobLevel                            0.01                    -0.03
## MonthlyIncome                      -0.01                    -0.01
## NumCompaniesWorked                 -0.01                     0.05
##                    PerformanceRating_Excellent PerformanceRating_Outstanding
## Age                                       0.03                         -0.03
## Attrition                                -0.02                          0.02
## DistanceFromHome                         -0.04                          0.04
## JobLevel                                  0.00                          0.00
## MonthlyIncome                            -0.02                          0.02
## NumCompaniesWorked                       -0.02                          0.02


Melt the correlation Matrix

melted_res <- reshape2::melt(res, value.name = "Correlation")
head(melted_res)
##                 Var1 Var2 Correlation
## 1                Age  Age        1.00
## 2          Attrition  Age       -0.16
## 3   DistanceFromHome  Age        0.01
## 4           JobLevel  Age        0.00
## 5      MonthlyIncome  Age       -0.04
## 6 NumCompaniesWorked  Age        0.30
write_csv(melted_res,"melted_res.csv")


Extract the Correlation of Attrition from Matrix.

attrition_ext <- melted_res[melted_res$Var2=="Attrition", c("Var1", "Var2", "Correlation")]
head(attrition_ext)
##                  Var1      Var2 Correlation
## 66                Age Attrition       -0.16
## 67          Attrition Attrition        1.00
## 68   DistanceFromHome Attrition       -0.01
## 69           JobLevel Attrition       -0.01
## 70      MonthlyIncome Attrition       -0.03
## 71 NumCompaniesWorked Attrition        0.04
write.csv(attrition_ext, "attrition_ext.csv")


Visualize the Matrix of Attrition above in a Bar chart by descending order.

ggplot(attrition_ext, aes(reorder(Var1, -Correlation), Correlation))+geom_bar(stat = "identity", fill="#99CCFF")+
  theme_bw()+theme(axis.text.x = element_text(angle=90, size=8, vjust=0.5, hjust=1))+
  scale_y_continuous(name="Correlation", limits = c(-0.2,1)) + geom_hline(yintercept = 0)