Installing the required libraries for the analysis
## Warning: package 'randomForest' was built under R version 4.0.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::combine() masks randomForest::combine()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x ggplot2::margin() masks randomForest::margin()
## Warning: package 'dbplyr' was built under R version 4.0.3
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(readr)
library(devtools)
## Warning: package 'devtools' was built under R version 4.0.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.0.3
#install_github("ramnathv/htmlwidgets")
#install_github("smartinsightsfromdata/rpivotTable")
library(rpivotTable)
library(easyalluvial)
library(parcats)
## Warning: package 'parcats' was built under R version 4.0.3
#install.packages("ROSE")
library(ROSE)
## Warning: package 'ROSE' was built under R version 4.0.3
## Loaded ROSE 0.0-3
#devtools::install_github("MI2DataLab/randomForestExplainer")
library(randomForestExplainer)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
Reading te HR data and also looking at the sumamry stats
WA_Fn_UseC_HR_Employee_Attrition <- read_csv("C:/Users/Nikhil/Desktop/WA_Fn-UseC_-HR-Employee-Attrition.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## Attrition = col_character(),
## BusinessTravel = col_character(),
## Department = col_character(),
## EducationField = col_character(),
## Gender = col_character(),
## JobRole = col_character(),
## MaritalStatus = col_character(),
## Over18 = col_character(),
## OverTime = col_character()
## )
## i Use `spec()` for the full column specifications.
View(WA_Fn_UseC_HR_Employee_Attrition)
dataHR<-WA_Fn_UseC_HR_Employee_Attrition
str(dataHR)
## tibble [1,470 x 35] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:1470] 41 49 37 33 27 32 59 30 38 36 ...
## $ Attrition : chr [1:1470] "Yes" "No" "Yes" "No" ...
## $ BusinessTravel : chr [1:1470] "Travel_Rarely" "Travel_Frequently" "Travel_Rarely" "Travel_Frequently" ...
## $ DailyRate : num [1:1470] 1102 279 1373 1392 591 ...
## $ Department : chr [1:1470] "Sales" "Research & Development" "Research & Development" "Research & Development" ...
## $ DistanceFromHome : num [1:1470] 1 8 2 3 2 2 3 24 23 27 ...
## $ Education : num [1:1470] 2 1 2 4 1 2 3 1 3 3 ...
## $ EducationField : chr [1:1470] "Life Sciences" "Life Sciences" "Other" "Life Sciences" ...
## $ EmployeeCount : num [1:1470] 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : num [1:1470] 1 2 4 5 7 8 10 11 12 13 ...
## $ EnvironmentSatisfaction : num [1:1470] 2 3 4 4 1 4 3 4 4 3 ...
## $ Gender : chr [1:1470] "Female" "Male" "Male" "Female" ...
## $ HourlyRate : num [1:1470] 94 61 92 56 40 79 81 67 44 94 ...
## $ JobInvolvement : num [1:1470] 3 2 2 3 3 3 4 3 2 3 ...
## $ JobLevel : num [1:1470] 2 2 1 1 1 1 1 1 3 2 ...
## $ JobRole : chr [1:1470] "Sales Executive" "Research Scientist" "Laboratory Technician" "Research Scientist" ...
## $ JobSatisfaction : num [1:1470] 4 2 3 3 2 4 1 3 3 3 ...
## $ MaritalStatus : chr [1:1470] "Single" "Married" "Single" "Married" ...
## $ MonthlyIncome : num [1:1470] 5993 5130 2090 2909 3468 ...
## $ MonthlyRate : num [1:1470] 19479 24907 2396 23159 16632 ...
## $ NumCompaniesWorked : num [1:1470] 8 1 6 1 9 0 4 1 0 6 ...
## $ Over18 : chr [1:1470] "Y" "Y" "Y" "Y" ...
## $ OverTime : chr [1:1470] "Yes" "No" "Yes" "Yes" ...
## $ PercentSalaryHike : num [1:1470] 11 23 15 11 12 13 20 22 21 13 ...
## $ PerformanceRating : num [1:1470] 3 4 3 3 3 3 4 4 4 3 ...
## $ RelationshipSatisfaction: num [1:1470] 1 4 2 3 4 3 1 2 2 2 ...
## $ StandardHours : num [1:1470] 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : num [1:1470] 0 1 0 0 1 0 3 1 0 2 ...
## $ TotalWorkingYears : num [1:1470] 8 10 7 8 6 8 12 1 10 17 ...
## $ TrainingTimesLastYear : num [1:1470] 0 3 3 3 3 2 3 2 2 3 ...
## $ WorkLifeBalance : num [1:1470] 1 3 3 3 3 2 2 3 3 2 ...
## $ YearsAtCompany : num [1:1470] 6 10 0 8 2 7 1 1 9 7 ...
## $ YearsInCurrentRole : num [1:1470] 4 7 0 7 2 7 0 0 7 7 ...
## $ YearsSinceLastPromotion : num [1:1470] 0 1 0 3 2 3 0 0 1 7 ...
## $ YearsWithCurrManager : num [1:1470] 5 7 0 0 2 6 0 0 8 7 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. Attrition = col_character(),
## .. BusinessTravel = col_character(),
## .. DailyRate = col_double(),
## .. Department = col_character(),
## .. DistanceFromHome = col_double(),
## .. Education = col_double(),
## .. EducationField = col_character(),
## .. EmployeeCount = col_double(),
## .. EmployeeNumber = col_double(),
## .. EnvironmentSatisfaction = col_double(),
## .. Gender = col_character(),
## .. HourlyRate = col_double(),
## .. JobInvolvement = col_double(),
## .. JobLevel = col_double(),
## .. JobRole = col_character(),
## .. JobSatisfaction = col_double(),
## .. MaritalStatus = col_character(),
## .. MonthlyIncome = col_double(),
## .. MonthlyRate = col_double(),
## .. NumCompaniesWorked = col_double(),
## .. Over18 = col_character(),
## .. OverTime = col_character(),
## .. PercentSalaryHike = col_double(),
## .. PerformanceRating = col_double(),
## .. RelationshipSatisfaction = col_double(),
## .. StandardHours = col_double(),
## .. StockOptionLevel = col_double(),
## .. TotalWorkingYears = col_double(),
## .. TrainingTimesLastYear = col_double(),
## .. WorkLifeBalance = col_double(),
## .. YearsAtCompany = col_double(),
## .. YearsInCurrentRole = col_double(),
## .. YearsSinceLastPromotion = col_double(),
## .. YearsWithCurrManager = col_double()
## .. )
## Age Attrition BusinessTravel DailyRate
## Min. :18.00 Length:1470 Length:1470 Min. : 102.0
## 1st Qu.:30.00 Class :character Class :character 1st Qu.: 465.0
## Median :36.00 Mode :character Mode :character Median : 802.0
## Mean :36.92 Mean : 802.5
## 3rd Qu.:43.00 3rd Qu.:1157.0
## Max. :60.00 Max. :1499.0
## Department DistanceFromHome Education EducationField
## Length:1470 Min. : 1.000 Min. :1.000 Length:1470
## Class :character 1st Qu.: 2.000 1st Qu.:2.000 Class :character
## Mode :character Median : 7.000 Median :3.000 Mode :character
## Mean : 9.193 Mean :2.913
## 3rd Qu.:14.000 3rd Qu.:4.000
## Max. :29.000 Max. :5.000
## EmployeeCount EmployeeNumber EnvironmentSatisfaction Gender
## Min. :1 Min. : 1.0 Min. :1.000 Length:1470
## 1st Qu.:1 1st Qu.: 491.2 1st Qu.:2.000 Class :character
## Median :1 Median :1020.5 Median :3.000 Mode :character
## Mean :1 Mean :1024.9 Mean :2.722
## 3rd Qu.:1 3rd Qu.:1555.8 3rd Qu.:4.000
## Max. :1 Max. :2068.0 Max. :4.000
## HourlyRate JobInvolvement JobLevel JobRole
## Min. : 30.00 Min. :1.00 Min. :1.000 Length:1470
## 1st Qu.: 48.00 1st Qu.:2.00 1st Qu.:1.000 Class :character
## Median : 66.00 Median :3.00 Median :2.000 Mode :character
## Mean : 65.89 Mean :2.73 Mean :2.064
## 3rd Qu.: 83.75 3rd Qu.:3.00 3rd Qu.:3.000
## Max. :100.00 Max. :4.00 Max. :5.000
## JobSatisfaction MaritalStatus MonthlyIncome MonthlyRate
## Min. :1.000 Length:1470 Min. : 1009 Min. : 2094
## 1st Qu.:2.000 Class :character 1st Qu.: 2911 1st Qu.: 8047
## Median :3.000 Mode :character Median : 4919 Median :14236
## Mean :2.729 Mean : 6503 Mean :14313
## 3rd Qu.:4.000 3rd Qu.: 8379 3rd Qu.:20462
## Max. :4.000 Max. :19999 Max. :26999
## NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. :0.000 Length:1470 Length:1470 Min. :11.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:12.00
## Median :2.000 Mode :character Mode :character Median :14.00
## Mean :2.693 Mean :15.21
## 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :9.000 Max. :25.00
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## Min. :3.000 Min. :1.000 Min. :80 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.0000
## Median :3.000 Median :3.000 Median :80 Median :1.0000
## Mean :3.154 Mean :2.712 Mean :80 Mean :0.7939
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.0000
## Max. :4.000 Max. :4.000 Max. :80 Max. :3.0000
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.000 Min. :1.000 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 3.000
## Median :10.00 Median :3.000 Median :3.000 Median : 5.000
## Mean :11.28 Mean :2.799 Mean :2.761 Mean : 7.008
## 3rd Qu.:15.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.: 9.000
## Max. :40.00 Max. :6.000 Max. :4.000 Max. :40.000
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 2.000
## Median : 3.000 Median : 1.000 Median : 3.000
## Mean : 4.229 Mean : 2.188 Mean : 4.123
## 3rd Qu.: 7.000 3rd Qu.: 3.000 3rd Qu.: 7.000
## Max. :18.000 Max. :15.000 Max. :17.000
## [1] 1470 35
| 41 |
Yes |
Travel_Rarely |
1102 |
Sales |
1 |
2 |
Life Sciences |
1 |
1 |
2 |
Female |
94 |
3 |
2 |
Sales Executive |
4 |
Single |
5993 |
19479 |
8 |
Y |
Yes |
11 |
3 |
1 |
80 |
0 |
8 |
0 |
1 |
6 |
4 |
0 |
5 |
| 49 |
No |
Travel_Frequently |
279 |
Research & Development |
8 |
1 |
Life Sciences |
1 |
2 |
3 |
Male |
61 |
2 |
2 |
Research Scientist |
2 |
Married |
5130 |
24907 |
1 |
Y |
No |
23 |
4 |
4 |
80 |
1 |
10 |
3 |
3 |
10 |
7 |
1 |
7 |
| 37 |
Yes |
Travel_Rarely |
1373 |
Research & Development |
2 |
2 |
Other |
1 |
4 |
4 |
Male |
92 |
2 |
1 |
Laboratory Technician |
3 |
Single |
2090 |
2396 |
6 |
Y |
Yes |
15 |
3 |
2 |
80 |
0 |
7 |
3 |
3 |
0 |
0 |
0 |
0 |
| 33 |
No |
Travel_Frequently |
1392 |
Research & Development |
3 |
4 |
Life Sciences |
1 |
5 |
4 |
Female |
56 |
3 |
1 |
Research Scientist |
3 |
Married |
2909 |
23159 |
1 |
Y |
Yes |
11 |
3 |
3 |
80 |
0 |
8 |
3 |
3 |
8 |
7 |
3 |
0 |
| 27 |
No |
Travel_Rarely |
591 |
Research & Development |
2 |
1 |
Medical |
1 |
7 |
1 |
Male |
40 |
3 |
1 |
Laboratory Technician |
2 |
Married |
3468 |
16632 |
9 |
Y |
No |
12 |
3 |
4 |
80 |
1 |
6 |
3 |
3 |
2 |
2 |
2 |
2 |
| 32 |
No |
Travel_Frequently |
1005 |
Research & Development |
2 |
2 |
Life Sciences |
1 |
8 |
4 |
Male |
79 |
3 |
1 |
Laboratory Technician |
4 |
Single |
3068 |
11864 |
0 |
Y |
No |
13 |
3 |
3 |
80 |
0 |
8 |
2 |
2 |
7 |
7 |
3 |
6 |
Converting the columns to appropraite data types
names(dataHR)[names(dataHR) == "ï..Age"] <- "Age"
dataHR$Education <- as.factor(dataHR$Education)
dataHR$EnvironmentSatisfaction <- as.factor(dataHR$EnvironmentSatisfaction)
dataHR$JobInvolvement <- as.factor(dataHR$JobInvolvement)
dataHR$JobLevel <- as.factor(dataHR$JobLevel)
dataHR$JobSatisfaction <- as.factor(dataHR$JobSatisfaction)
dataHR$StockOptionLevel <- as.factor(dataHR$StockOptionLevel)
dataHR$PerformanceRating <- as.factor(dataHR$PerformanceRating)
dataHR$RelationshipSatisfaction <- as.factor(dataHR$RelationshipSatisfaction)
dataHR$WorkLifeBalance <- as.factor(dataHR$WorkLifeBalance)
Alluvial Plots
suppressPackageStartupMessages( require(parcats) )
p = alluvial_wide(dataHR, max_variables = 5)
parcats(p, marginal_histograms = TRUE, data_input = dataHR)
Class Imbalance treatment for Attrition and dividing the dataset in to test and train set
library(ROSE)
set.seed(18)
index <- sample(nrow(dataHR), nrow(dataHR)*0.8)
data_train <- dataHR[index, ]
data_test <- dataHR[-index,]
train_balanced <- ovun.sample(Attrition ~ ., data = data_train, method = "over",N = 996*2, seed = 1)$data
table(train_balanced$Attrition)
##
## No Yes
## 977 1015
Building the random Forest Model
set.seed(2017)
forest <- randomForest(as.factor(Attrition) ~ ., data = data_train, localImp = TRUE)
forest
##
## Call:
## randomForest(formula = as.factor(Attrition) ~ ., data = data_train, localImp = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 15.14%
## Confusion matrix:
## No Yes class.error
## No 962 15 0.01535312
## Yes 163 36 0.81909548
##identifying the minimum depth of the distribution
min_depth_frame <- min_depth_distribution(forest)
head(min_depth_frame, n = 10)
| 1 |
Age |
4 |
| 1 |
BusinessTravel |
10 |
| 1 |
DailyRate |
2 |
| 1 |
Department |
3 |
| 1 |
DistanceFromHome |
6 |
| 1 |
Education |
4 |
| 1 |
EducationField |
4 |
| 1 |
EmployeeNumber |
3 |
| 1 |
EnvironmentSatisfaction |
4 |
| 1 |
Gender |
7 |
plotting the minimum depth distribution of random forest
plot_min_depth_distribution(min_depth_frame)

plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 15)

plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 10)

plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 7)

plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 5)

Variable importance from Random Forest
importance_frame <- measure_importance(forest)
importance_frame
| Age |
3.096000 |
3654 |
0.0048660 |
18.015176 |
500 |
32 |
0.0000000 |
| BusinessTravel |
6.276096 |
1100 |
0.0007821 |
4.128006 |
449 |
1 |
1.0000000 |
| DailyRate |
3.822000 |
3774 |
-0.0004191 |
16.459973 |
500 |
6 |
0.0000000 |
| Department |
6.103872 |
1025 |
0.0008948 |
4.278470 |
443 |
2 |
1.0000000 |
| DistanceFromHome |
3.960000 |
3277 |
0.0005757 |
14.064970 |
500 |
3 |
0.0000000 |
| Education |
5.107184 |
1998 |
0.0002596 |
7.541793 |
496 |
2 |
0.9030326 |
| EducationField |
5.119776 |
1853 |
0.0000779 |
7.213499 |
494 |
4 |
0.9999980 |
| EmployeeCount |
NA |
0 |
0.0000000 |
0.000000 |
0 |
0 |
1.0000000 |
| EmployeeNumber |
3.934000 |
3603 |
-0.0007056 |
15.008320 |
500 |
3 |
0.0000000 |
| EnvironmentSatisfaction |
3.697888 |
2403 |
0.0027532 |
11.880843 |
497 |
9 |
0.0000000 |
| Gender |
7.270864 |
880 |
0.0000350 |
2.780954 |
416 |
0 |
1.0000000 |
| HourlyRate |
4.197296 |
3428 |
-0.0004743 |
13.784202 |
499 |
1 |
0.0000000 |
| JobInvolvement |
4.331664 |
1706 |
0.0012502 |
8.168434 |
491 |
9 |
1.0000000 |
| JobLevel |
3.967552 |
1420 |
0.0048050 |
9.320446 |
488 |
49 |
1.0000000 |
| JobRole |
4.148592 |
2248 |
0.0019138 |
9.381890 |
498 |
19 |
0.0000111 |
| JobSatisfaction |
3.912592 |
2333 |
0.0025473 |
11.001585 |
498 |
6 |
0.0000000 |
| MaritalStatus |
4.849584 |
1335 |
0.0027792 |
6.826378 |
471 |
17 |
1.0000000 |
| MonthlyIncome |
2.844000 |
4067 |
0.0079190 |
22.961365 |
500 |
47 |
0.0000000 |
| MonthlyRate |
4.237296 |
3450 |
-0.0003348 |
13.697943 |
499 |
5 |
0.0000000 |
| NumCompaniesWorked |
4.579888 |
2397 |
0.0010880 |
9.984568 |
497 |
3 |
0.0000000 |
| Over18 |
NA |
0 |
0.0000000 |
0.000000 |
0 |
0 |
1.0000000 |
| OverTime |
2.748592 |
1613 |
0.0152744 |
19.109253 |
498 |
63 |
1.0000000 |
| PercentSalaryHike |
4.390000 |
2802 |
0.0003409 |
10.902798 |
500 |
0 |
0.0000000 |
| PerformanceRating |
8.820736 |
313 |
0.0001822 |
1.100411 |
234 |
0 |
1.0000000 |
| RelationshipSatisfaction |
4.655888 |
2160 |
-0.0002907 |
8.282852 |
497 |
0 |
0.0103003 |
| StandardHours |
NA |
0 |
0.0000000 |
0.000000 |
0 |
0 |
1.0000000 |
| StockOptionLevel |
3.473776 |
1654 |
0.0054037 |
10.334163 |
494 |
35 |
1.0000000 |
| TotalWorkingYears |
3.154592 |
2791 |
0.0061197 |
16.602159 |
498 |
54 |
0.0000000 |
| TrainingTimesLastYear |
4.709184 |
2172 |
-0.0002434 |
8.109115 |
496 |
2 |
0.0049488 |
| WorkLifeBalance |
4.410480 |
1836 |
0.0016001 |
8.471253 |
495 |
9 |
0.9999997 |
| YearsAtCompany |
3.516000 |
2601 |
0.0046960 |
13.845431 |
500 |
56 |
0.0000000 |
| YearsInCurrentRole |
4.469776 |
2045 |
0.0033190 |
9.107631 |
494 |
26 |
0.5952213 |
| YearsSinceLastPromotion |
4.987184 |
1998 |
0.0007326 |
8.107687 |
496 |
1 |
0.9030326 |
| YearsWithCurrManager |
4.375664 |
1948 |
0.0028666 |
9.137336 |
491 |
36 |
0.9925221 |
Variable ranking plot and importance plot
plot_multi_way_importance(importance_frame, size_measure = "no_of_nodes")

plot_importance_ggpairs(importance_frame)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

plot_importance_rankings(importance_frame)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

(vars <- important_variables(importance_frame, k = 5, measures = c("mean_min_depth", "no_of_trees")))
## [1] "MonthlyIncome" "Age" "OverTime"
## [4] "TotalWorkingYears" "YearsAtCompany"
Interaction Analysis for Random Forest
Interaction plot
Intercation plots for Features in Attrition Analysis
plot_predict_interaction(forest, data_test, "DailyRate", "MonthlyIncome")

plot_predict_interaction(forest, data_test, "Age", "MonthlyIncome")

plot_predict_interaction(forest, data_test, "Age", "DistanceFromHome")

plot_predict_interaction(forest, data_test, "Age", "NumCompaniesWorked")

plot_predict_interaction(forest, data_test, "Age", "NumCompaniesWorked")

plot_predict_interaction(forest, data_test, "HourlyRate", "YearsInCurrentRole")

Interactive Pivot Table visualization
rpivotTable(dataHR, rows="Education", col="Attrition", aggregatorName="Count Unique Values ",
vals="JobLevel", rendererName="Treemap")