Structure of the dataframe
# structure of the dataframe
str(HR.df)
## 'data.frame': 8995 obs. of 17 variables:
## $ CandidateRef : int 2110407 2112635 2112838 2115021 2115125 2117167 2119124 2127572 2138169 2143362 ...
## $ DOJExtended : chr "Yes" "No" "No" "No" ...
## $ DurationToAcceptOffer : int 14 18 3 26 1 17 37 16 1 6 ...
## $ NoticePeriod : int 30 30 45 30 120 30 30 0 30 30 ...
## $ OfferedBand : chr "E2" "E2" "E2" "E2" ...
## $ PercentHikeExpectedInCTC: num -20.8 50 42.8 42.8 42.6 ...
## $ PercentHikeOfferedInCTC : num 13.2 320 42.8 42.8 42.6 ...
## $ PercentDifferenceCTC : num 42.9 180 0 0 0 ...
## $ JoiningBonus : chr "No" "No" "No" "No" ...
## $ CandidateRelocateActual : chr "No" "No" "No" "No" ...
## $ Gender : chr "Female" "Male" "Male" "Male" ...
## $ CandidateSource : chr "Agency" "Employee Referral" "Agency" "Employee Referral" ...
## $ RexInYrs : int 7 8 4 4 6 2 7 8 3 3 ...
## $ LOB : chr "ERS" "INFRA" "INFRA" "INFRA" ...
## $ Location : chr "Noida" "Chennai" "Noida" "Noida" ...
## $ Age : int 34 34 27 34 34 34 32 34 26 34 ...
## $ Status : chr "Joined" "Joined" "Joined" "Joined" ...
Training (80%) and Testing (20%) Data
library(caret)
# data partition
set.seed(2341)
trainIndex <- createDataPartition(HR.df$Status, p = 0.80, list = FALSE)
# 80% training data
trainHR.df <- HR.df[trainIndex, ]
dim(trainHR.df)
## [1] 7197 17
# 20% testing data
testHR.df <- HR.df[-trainIndex, ]
dim(testHR.df)
## [1] 1798 17