Load HR Data

load("HRData.rda")
hr <- HR.df
rm(HR.df)
hr_copy <- hr

Number of rows and columns in dataframe

dim(hr)
## [1] 8995   16

Column names of the dataframe

colnames(hr)
##  [1] "DOJExtended"              "DurationToAcceptOffer"   
##  [3] "NoticePeriod"             "OfferedBand"             
##  [5] "PercentHikeExpectedInCTC" "PercentHikeOfferedInCTC" 
##  [7] "PercentDifferenceCTC"     "JoiningBonus"            
##  [9] "CandidateRelocateActual"  "Gender"                  
## [11] "CandidateSource"          "RexInYrs"                
## [13] "LOB"                      "Location"                
## [15] "Age"                      "Status"

Descriptive statistics of the dataframe

describe(hr)[,1:5]
##                          vars    n  mean    sd median
## DOJExtended*                1 8995  1.47  0.50      1
## DurationToAcceptOffer       2 8995 21.43 25.81     10
## NoticePeriod                3 8995 39.29 22.22     30
## OfferedBand*                4 8995  2.39  0.63      2
## PercentHikeExpectedInCTC    5 8995 43.86 29.79     40
## PercentHikeOfferedInCTC     6 8995 40.66 36.06     36
## PercentDifferenceCTC        7 8995 -1.57 19.61      0
## JoiningBonus*               8 8995  1.05  0.21      1
## CandidateRelocateActual*    9 8995  1.14  0.35      1
## Gender*                    10 8995  1.83  0.38      2
## CandidateSource*           11 8995  1.89  0.67      2
## RexInYrs                   12 8995  4.24  2.55      4
## LOB*                       13 8995  5.18  2.38      5
## Location*                  14 8995  4.94  3.00      3
## Age                        15 8995 29.91  4.10     29
## Status*                    16 8995  1.19  0.39      1

Descrete data distribution

Percentage of candidates joined/not-joined

round(prop.table(table(hr$Status))*100,2)
## 
##    Joined NotJoined 
##      81.3      18.7

Bar chart of candidates joined/not-joined

hr %>% ggplot(aes(Status, fill=Status)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the candidates Joined / Did Not Join the Company, Split by DOJ extended

round(prop.table(table(hr$DOJExtended, hr$Status),margin = 1)*100,2)
##      
##       Joined NotJoined
##   No   81.08     18.92
##   Yes  81.55     18.45

Bar Chart for Candidates Who Did Not Join, Split by DOJ Extended

hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=DOJExtended, fill=DOJExtended)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of Candidates Who (Joined / Did Not Join), split by Notice Period

round(prop.table(table(hr$NoticePeriod, hr$Status),margin = 1)*100,2)
##      
##       Joined NotJoined
##   0    93.44      6.56
##   30   85.17     14.83
##   45   75.48     24.52
##   60   73.22     26.78
##   75   68.18     31.82
##   90   66.19     33.81
##   120  52.38     47.62

Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period

Look into it

attach(hr)
tab3 <- round(prop.table(table(NoticePeriod, Status), 1)*100, 2)[,2]
bp <- barplot(
    tab3,
    main = "% of the Candidates, who did not join, split by Notice Period",
    xlab = "NoticePeriod",
    ylab = "Percentage(%)",
    col = 'skyblue',
    legend = rownames(tab3),
                      beside = TRUE)
text(bp, 0, round(tab3, 1), cex = 1, pos = 3)

Percentage of the Candidates Who (Joined / Did Not Join), split by Joining Bonus

round(prop.table(table(hr$JoiningBonus, hr$Status), margin = 1)*100,2)
##      
##       Joined NotJoined
##   No   81.34     18.66
##   Yes  80.58     19.42

Bar Chart for # of Candidates, Who Did Not Join, Split by Joining Bonus

hr %>% filter(Status=="NotJoined") %>%  ggplot(aes(x=JoiningBonus, fill=JoiningBonus)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Gender

round(prop.table(table(hr$Gender, hr$Status), margin=1)*100,2)
##         
##          Joined NotJoined
##   Female  82.40     17.60
##   Male    81.07     18.93

Bar Chart for # of the Candidates Who Did Not join, Split by Gender

hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=Gender, fill=Gender)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Candidate Source

round(prop.table(table(hr$CandidateSource, hr$Status), margin=1)*100,2)
##                    
##                     Joined NotJoined
##   Agency             75.82     24.18
##   Direct             82.00     18.00
##   Employee Referral  88.00     12.00

Bar Chart for % of Candidates Who Did Not Join the Company, Split by Candidate Source

hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=CandidateSource, fill=CandidateSource)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Offered Band

round(prop.table(table(hr$OfferedBand, hr$Status), margin=1)*100,2)
##     
##      Joined NotJoined
##   E0  76.30     23.70
##   E1  81.30     18.70
##   E2  80.97     19.03
##   E3  85.15     14.85

Bar Chart for % Candidates Who Did Not Join, Split by Offered Band

hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=OfferedBand, fill=OfferedBand)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not join), Split by Line of Business (LOB)

round(prop.table(table(hr$LOB, hr$Status), margin=1)*100,2)
##             
##              Joined NotJoined
##   AXON        77.46     22.54
##   BFSI        75.86     24.14
##   CSMP        81.52     18.48
##   EAS         73.41     26.59
##   ERS         78.11     21.89
##   ETS         83.07     16.93
##   Healthcare  82.26     17.74
##   INFRA       87.79     12.21
##   MMS        100.00      0.00

Bar Chart for % of the Candidates Who Did Not join, Split by Line of Business (LOB)

hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=LOB, fill=LOB)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Continuous Data Distribution

Average Age of the Candidates (Joined / Did Not join)

hr %>% group_by(Status) %>% summarise(AverageAge=mean(Age))
## # A tibble: 2 x 2
##   Status    AverageAge
##   <fct>          <dbl>
## 1 Joined          30.0
## 2 NotJoined       29.5

Mean Plot for the Age, Split by Status

plotmeans(Age~Status, data = hr, mean.labels = T)

Boxplot of Age, Split by Status (Joined / Did Not Join)

hr %>% ggplot(aes(x=Status, y=Age, fill=Status)) + geom_boxplot()

Average Notice Period of the Candidates, Split by Status (Joined / Did Not join)

hr %>% group_by(Status) %>% summarise(AverageNotice=mean(NoticePeriod))
## # A tibble: 2 x 2
##   Status    AverageNotice
##   <fct>             <dbl>
## 1 Joined             37.2
## 2 NotJoined          48.2

Mean Plot for Notice Period, Split by Status (Joined / Did Not join)

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

Boxplot for Notice Period of the Candidates, Split by Status (Joined / Did Not join)

hr %>% ggplot(aes(x=Status, y=NoticePeriod, fill=Status)) + geom_boxplot()

Average (Relevant Years of Experience) of the candidates (Joined / Not joined)

hr %>% group_by(Status) %>% summarise(AverageExperience=mean(RexInYrs))
## # A tibble: 2 x 2
##   Status    AverageExperience
##   <fct>                 <dbl>
## 1 Joined                 4.19
## 2 NotJoined              4.44

Mean plot for the above

plotmeans(RexInYrs~Status, data=hr, mean.labels = T)

Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)

hr %>% ggplot(aes(x=Status, y=RexInYrs, fill=Status)) + geom_boxplot()

Average of DurationToAcceptOffer (Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined)

hr %>% group_by(Status) %>% summarise(Duration=mean(DurationToAcceptOffer))
## # A tibble: 2 x 2
##   Status    Duration
##   <fct>        <dbl>
## 1 Joined        20.6
## 2 NotJoined     25.0

Meanplot for the above

plotmeans(DurationToAcceptOffer~Status, data=hr, mean.labels = T)

Average (Age, Relevant Years of Experience and Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined) by Gender (Male / Female)

hr %>% group_by(Status, Gender) %>% summarise(Age=mean(Age), Experience=mean(RexInYrs), DaysTaken=mean(DurationToAcceptOffer))
## # A tibble: 4 x 5
## # Groups:   Status [2]
##   Status    Gender   Age Experience DaysTaken
##   <fct>     <fct>  <dbl>      <dbl>     <dbl>
## 1 Joined    Female  29.1       3.45      19.4
## 2 Joined    Male    30.2       4.35      20.9
## 3 NotJoined Female  28.0       3.61      24.1
## 4 NotJoined Male    29.8       4.60      25.1

CORRELATION

Correlation Matrix for all the Continuous Variable

cor(hr[,c(2,3,6,7,12,15)], method = c("pearson", "kendall", "spearman"))
##                         DurationToAcceptOffer NoticePeriod
## DurationToAcceptOffer             1.000000000  0.363760039
## NoticePeriod                      0.363760039  1.000000000
## PercentHikeOfferedInCTC           0.007307491 -0.013457512
## PercentDifferenceCTC             -0.009395037 -0.015066608
## RexInYrs                          0.113951452  0.181786080
## Age                               0.020131119 -0.004967358
##                         PercentHikeOfferedInCTC PercentDifferenceCTC
## DurationToAcceptOffer               0.007307491         -0.009395037
## NoticePeriod                       -0.013457512         -0.015066608
## PercentHikeOfferedInCTC             1.000000000          0.598778760
## PercentDifferenceCTC                0.598778760          1.000000000
## RexInYrs                           -0.110243489          0.081547098
## Age                                -0.080010484          0.037955053
##                           RexInYrs          Age
## DurationToAcceptOffer    0.1139515  0.020131119
## NoticePeriod             0.1817861 -0.004967358
## PercentHikeOfferedInCTC -0.1102435 -0.080010484
## PercentDifferenceCTC     0.0815471  0.037955053
## RexInYrs                 1.0000000  0.568227965
## Age                      0.5682280  1.000000000

Plotting correlation matrix

chart.Correlation(hr[,c(2,3,6,7,12,15)])

Scatter Plots

Scatter Plot of Experience and Duration to Accept Offer by Status (Joined / Not Joined)

hr %>% ggplot(aes(RexInYrs, DurationToAcceptOffer, color=Status)) + geom_point()

Scatter Plot of Experience and Notice Period by Status (Joined / Not Joined)

hr %>% ggplot(aes(RexInYrs, NoticePeriod, color=Status)) + geom_point()

Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status (Joined / Not Joined)

hr %>% ggplot(aes(RexInYrs, PercentHikeExpectedInCTC,  color=Status)) + geom_point()

Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status (Joined / Not Joined)

hr %>% ggplot(aes(RexInYrs, PercentHikeOfferedInCTC, color=Status)) + geom_point()