## [1] "CandidateRef" "DOJExtended"
## [3] "DurationToAcceptOffer" "NoticePeriod"
## [5] "OfferedBand" "PercentHikeExpectedInCTC"
## [7] "PercentHikeOfferedInCTC" "PercentDifferenceCTC"
## [9] "JoiningBonus" "CandidateRelocateActual"
## [11] "Gender" "CandidateSource"
## [13] "RexInYrs" "LOB"
## [15] "Location" "Age"
## [17] "Status"
## vars n mean sd median
## CandidateRef 1 8995 2843647.38 486344.77 2807482
## DOJExtended* 2 8995 1.47 0.50 1
## DurationToAcceptOffer 3 8995 21.43 25.81 10
## NoticePeriod 4 8995 39.29 22.22 30
## OfferedBand* 5 8995 2.39 0.63 2
## PercentHikeExpectedInCTC 6 8995 43.86 29.79 40
## PercentHikeOfferedInCTC 7 8995 40.66 36.06 36
## PercentDifferenceCTC 8 8995 -1.57 19.61 0
## JoiningBonus* 9 8995 1.05 0.21 1
## CandidateRelocateActual* 10 8995 1.14 0.35 1
## Gender* 11 8995 1.88 0.33 2
## CandidateSource* 12 8995 1.89 0.67 2
## RexInYrs 13 8995 4.24 2.55 4
## LOB* 14 8995 5.18 2.38 5
## Location* 15 8995 4.94 3.00 3
## Age 16 8995 29.91 4.10 29
## Status* 17 8995 1.19 0.39 1
# percentage of candidates who joined the company and who did not
tab <- round(prop.table(table(Status))*100, 2)
tab## Status
## Joined NotJoined
## 81.3 18.7
# bar-plot
bp <- barplot(tab,
xlab = "Status", ylab = "Percentage (%)",
main = "% of the Candidates Who Joined / Did Not Join the Company",
col = c("lightblue","red"),
legend = rownames(tab),
beside = TRUE)
text(bp, 0, round(tab, 1),cex = 2, pos = 3) # % of candidates who joined / did not join the company, split by DOJ extended
tab2 <- round(prop.table(table(DOJExtended, Status), 1)*100, 2)
tab2## Status
## DOJExtended Joined NotJoined
## No 81.08 18.92
## Yes 81.55 18.45
# % of Candidates who Did Not Join, split by DOJ Extended
tab <- tab2[,2]
# bar-plot
bp <- barplot(tab,
xlab = "DOJ Extended", ylab = "Percentage (%)",
main = "% of Candidates, Who Did Not Join, Split by DOJ Extended",
col = c("lightblue"),
beside = TRUE)
text(bp, 0, round(tab, 1),cex=2,pos=3) # % of Candidates, who Joined / Did Not Join, split by Notice Period
tab3 <- round(prop.table(table(NoticePeriod, Status), 1)*100, 2)
tab3## Status
## NoticePeriod Joined NotJoined
## 0 93.44 6.56
## 30 85.17 14.83
## 45 75.48 24.52
## 60 73.22 26.78
## 75 68.18 31.82
## 90 66.19 33.81
## 120 52.38 47.62
# % of Candidates, who Did Not Join, split by Notice Period
tab <- tab3[,2]
# bar-plot
bp <- barplot(tab,
xlab = "NoticePeriod", ylab = "Percentage (%)",
main = "% of Candidates, Who Did Not Join, Split by Notice Period",
col = c("lightblue"),
beside = TRUE)
text(bp, 0, round(tab, 1),cex=1.7,pos=3) # % of candidates who joined / not joined the company, split by Notice JoiningBonus
tab4 <- round(prop.table(table(JoiningBonus, Status), 1)*100, 2)
tab4## Status
## JoiningBonus Joined NotJoined
## No 81.34 18.66
## Yes 80.58 19.42
# % of Candidates, Who Did Not Join, Split by Joining Bonus
tab <- tab4[,2]
# bar-plot
bp <- barplot(tab,
xlab = "Joining Bonus", ylab = "Percentage (%)",
main = "% of Candidates, Who Did Not Join, Split by Joining Bonus",
col = c("lightblue"), width = 0.3, xlim = c(0,1),
beside = TRUE)
text(bp, 0, round(tab, 1), cex= 2, pos=3) # Percentage of the Candidates Who (Joined / Did Not join), split by Gender
tab5 <- round(prop.table(table(Gender, Status), 1)*100, 2)
tab5## Status
## Gender Joined NotJoined
## Female 74.86 25.14
## Male 82.18 17.82
# % of the Candidates who Did Not join, split by Gender
tab <- tab5[,2]
# bar-plot
bp <- barplot(tab,
xlab = "Gender", ylab = "Percentage (%)",
main = " % of the Candidates, Who Did Not join, Split by Gender",
col = c("lightblue"),width = 0.3,xlim = c(0,1),
beside = TRUE)
text(bp, 0, round(tab, 1),cex = 2 ,pos = 3) # Percentage of the candidates who (Joined / Did Not join), split by Candidate Source
tab6 <- round(prop.table(table(CandidateSource, Status), 1)*100, 2)
tab6## Status
## CandidateSource Joined NotJoined
## Agency 75.82 24.18
## Direct 82.00 18.00
## Employee Referral 88.00 12.00
# % % of Candidates Who Did Not Join the Company, Split by Candidate Source
tab <- tab6[,2]
# bar-plot
bp <- barplot(tab,
xlab = "Candidate Source", ylab = "Percentage (%)",
main = "% of Candidates, Who Did Not Join the Company, Split by Candidate Source",
col = c("lightblue"),
beside = TRUE)
text(bp, 0, round(tab, 1),cex = 2, pos = 3) # Percentage of the Candidates Who (Joined / Did Not Join), Split by Offered Band
tab7 <- round(prop.table(table(OfferedBand, Status), 1)*100, 2)
tab7## Status
## OfferedBand Joined NotJoined
## E0 76.30 23.70
## E1 81.30 18.70
## E2 80.97 19.03
## E3 85.15 14.85
# % Candidates Who Did Not Join, Split by Offered Band
tab <- tab7[,2]
# bar-plot
bp <- barplot(tab,
xlab = "Offered Band", ylab = "Percentage (%)",
main = "% of Candidates, Who Did Not Join, Split by Offered Band",
col = c("lightblue"),
beside = TRUE)
text(bp, 0, round(tab, 1), cex = 2, pos =3 ) # Percentage of the candidates (Joined / Did Not joined), Split by Line of Business (LOB)
tab8 <- round(prop.table(table(LOB, Status), 1)*100, 2)
tab8## Status
## LOB Joined NotJoined
## AXON 77.46 22.54
## BFSI 75.86 24.14
## CSMP 81.52 18.48
## EAS 73.41 26.59
## ERS 78.11 21.89
## ETS 83.07 16.93
## Healthcare 82.26 17.74
## INFRA 87.79 12.21
## MMS 100.00 0.00
# % of the Candidates Who Did Not join, Split by Line of Business (LOB)
tab <- tab8[,2]
# bar-plot
bp <- barplot(tab,
xlab = "LOB", ylab = "Percentage (%)",
main = "% of the Candidates, Who Did Not join, Split by Line of Business (LOB)",
col = c("lightblue"),las = 3,
beside = TRUE)
text(bp, 0, round(tab, 1),cex = 1.7, pos = 3) # average age of candidates by status
library(data.table)
dt <- data.table(HR.df)
dt[, .(AverageAgeofCandidates = round(mean(Age), 2)),
by=list(Status)]## Status AverageAgeofCandidates
## 1: Joined 30.00
## 2: NotJoined 29.52
# plotting mean age Split by Status
library(gplots)
plotmeans(Age ~ Status,data = HR.df,
ylab = "Age(in Years)",
mean.labels= TRUE,
n.label = FALSE, digits= 2,
col = "red", lwd = 1.5)# boxplot of age by status
boxplot(Age ~ Status, data = HR.df,
xlab = "Status",
ylab = "Age (in Years)",
main = "Boxplot of Age by Status",
col = c("lightblue","red"))# average Notice Period of candidates by status
library(data.table)
dt <- data.table(HR.df)
dt[, .( AverageNoticePeriod = round(mean(NoticePeriod), 2)),
by= list(Status)]## Status AverageNoticePeriod
## 1: Joined 37.24
## 2: NotJoined 48.19
# plotting average notice period by status
plotmeans(NoticePeriod ~ Status,data = HR.df,
mean.labels= TRUE, ylab = "Notice Period",
n.label = FALSE, digits= 2,
col = "red", lwd = 1.5)## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, : zero-
## length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, : zero-
## length arrow is of indeterminate angle and so skipped
# boxplot of Notice Period by status
boxplot(NoticePeriod ~ Status, data = HR.df,
xlab = "Status",
ylab = "Notice Period",
main = "Boxplot of Notice Period, Split by Status",
col = c("lightblue","red"))# average age and relevant experience by status
library(data.table)
dt <- data.table(HR.df)
dt[, .(YearsOfExperience = round(mean(RexInYrs), 2)),
by = list(Status)]## Status YearsOfExperience
## 1: Joined 4.19
## 2: NotJoined 4.44
# boxplot of Notice Period by status
boxplot(RexInYrs ~ Status, data = HR.df,
xlab = "Status",
ylab = "Relevant Years of Experience",
main = "Boxplot of Relevant Years of Experience by Status",
col = c("lightblue","red"))# average of DurationToAcceptOffer by status
dt <- data.table(HR.df)
dt[, .(DurationToAcceptOffer = round(mean(DurationToAcceptOffer), 2)),
by = list(Status)]## Status DurationToAcceptOffer
## 1: Joined 20.62
## 2: NotJoined 24.96
# plotting mean of DurationToAcceptOffer by status
plotmeans(DurationToAcceptOffer ~ Status,data = HR.df,
mean.labels= TRUE, ylab = "Duration To Accept Offer",
n.label = FALSE, digits= 2,
col = "red", lwd = 1.5)# average age, relevent experience and number of days to accet offer by status and gender
dt <- data.table(HR.df)
dt[,list(AgeofCandidates = round(mean(Age), 2),
YearsOfExperience = round(mean(RexInYrs), 2),
DurationtoAcceptOffer = round(mean(DurationToAcceptOffer), 2),
NoticePeriod = round(mean(NoticePeriod), 2)),
by = list(Status, Gender)][order(Status, Gender)]## Status Gender AgeofCandidates YearsOfExperience DurationtoAcceptOffer
## 1: Joined Female 29.93 3.56 20.75
## 2: Joined Male 30.01 4.27 20.61
## 3: NotJoined Female 28.02 3.61 24.10
## 4: NotJoined Male 29.81 4.60 25.12
## NoticePeriod
## 1: 36.03
## 2: 37.40
## 3: 46.65
## 4: 48.49
# average age, relevent experience and number of days to accet offer by status and DOJExtended
dt <- data.table(HR.df)
dt[,list(AgeofCandidates = round(mean(Age), 2),
YearsOfExperience = round(mean(RexInYrs), 2),
DurationtoAcceptOffer = round(mean(DurationToAcceptOffer), 2),
NoticePeriod = round(mean(NoticePeriod), 2)),
by = list(Status, DOJExtended)][order(Status, DOJExtended)]## Status DOJExtended AgeofCandidates YearsOfExperience
## 1: Joined No 29.77 3.96
## 2: Joined Yes 30.27 4.46
## 3: NotJoined No 29.38 4.25
## 4: NotJoined Yes 29.67 4.66
## DurationtoAcceptOffer NoticePeriod
## 1: 12.02 33.86
## 2: 30.36 41.08
## 3: 18.11 47.20
## 4: 32.95 49.35
library(dplyr)
# creating subset of continuous variables
SubsetHR <- select(HR.df, DurationToAcceptOffer, NoticePeriod, RexInYrs, Age, PercentHikeExpectedInCTC, PercentHikeOfferedInCTC, PercentDifferenceCTC)
library("Hmisc")
cm2 <- rcorr(as.matrix(SubsetHR))
cm2## DurationToAcceptOffer NoticePeriod RexInYrs Age
## DurationToAcceptOffer 1.00 0.36 0.11 0.02
## NoticePeriod 0.36 1.00 0.18 0.00
## RexInYrs 0.11 0.18 1.00 0.57
## Age 0.02 0.00 0.57 1.00
## PercentHikeExpectedInCTC 0.02 -0.01 -0.22 -0.14
## PercentHikeOfferedInCTC 0.01 -0.01 -0.11 -0.08
## PercentDifferenceCTC -0.01 -0.02 0.08 0.04
## PercentHikeExpectedInCTC PercentHikeOfferedInCTC
## DurationToAcceptOffer 0.02 0.01
## NoticePeriod -0.01 -0.01
## RexInYrs -0.22 -0.11
## Age -0.14 -0.08
## PercentHikeExpectedInCTC 1.00 0.67
## PercentHikeOfferedInCTC 0.67 1.00
## PercentDifferenceCTC -0.16 0.60
## PercentDifferenceCTC
## DurationToAcceptOffer -0.01
## NoticePeriod -0.02
## RexInYrs 0.08
## Age 0.04
## PercentHikeExpectedInCTC -0.16
## PercentHikeOfferedInCTC 0.60
## PercentDifferenceCTC 1.00
##
## n= 8995
##
##
## P
## DurationToAcceptOffer NoticePeriod RexInYrs Age
## DurationToAcceptOffer 0.0000 0.0000 0.0562
## NoticePeriod 0.0000 0.0000 0.6376
## RexInYrs 0.0000 0.0000 0.0000
## Age 0.0562 0.6376 0.0000
## PercentHikeExpectedInCTC 0.0743 0.6178 0.0000 0.0000
## PercentHikeOfferedInCTC 0.4883 0.2019 0.0000 0.0000
## PercentDifferenceCTC 0.3730 0.1531 0.0000 0.0003
## PercentHikeExpectedInCTC PercentHikeOfferedInCTC
## DurationToAcceptOffer 0.0743 0.4883
## NoticePeriod 0.6178 0.2019
## RexInYrs 0.0000 0.0000
## Age 0.0000 0.0000
## PercentHikeExpectedInCTC 0.0000
## PercentHikeOfferedInCTC 0.0000
## PercentDifferenceCTC 0.0000 0.0000
## PercentDifferenceCTC
## DurationToAcceptOffer 0.3730
## NoticePeriod 0.1531
## RexInYrs 0.0000
## Age 0.0003
## PercentHikeExpectedInCTC 0.0000
## PercentHikeOfferedInCTC 0.0000
## PercentDifferenceCTC
# plotting correlation matrix
library("PerformanceAnalytics")
chart.Correlation(SubsetHR, histogram=TRUE, pch=19)# scatterplot of experience and duration to accept offer
library(lattice)
xyplot(DurationToAcceptOffer ~ RexInYrs, data = HR.df,
group = Status,
xlab = "Relevant Experience (in Years)",
ylab = "Duration to Accept Offer (in Days)",
main = "Scatterplot of Experience and Duration to Accept Offer",
auto.key = TRUE)# scatterplot of experience and notice period
xyplot(NoticePeriod ~ RexInYrs, data = HR.df,
group = Status,
xlab = "Relevant Experience (in Years)",
ylab = "Notice Period (in Days)",
main = "Scatterplot of Experience and Notice Period",
auto.key = TRUE)# scatterplot of experience and percent hike expected by candidate
xyplot(PercentHikeExpectedInCTC ~ RexInYrs, data = HR.df,
group = Status,
xlab = "Relevant Experience (in Years)",
ylab = "Percent Hike (CTC) Expeceted by Candidate",
main = "Scatterplot of Experience and Percent Hike (CTC) Expecetd by Candidate",
auto.key = TRUE)# scatterplot of experience and percent hike offered by company
xyplot(PercentHikeOfferedInCTC ~ RexInYrs, data = HR.df,
group = Status,
xlab = "Relevant Experience (in Years)",
ylab = "Percent Hike (CTC) Offered by Company",
main = "Scatterplot of Experience and Percent Hike (CTC) Offered by Company",
auto.key = TRUE)