Descrete data distribution
Percentage of candidates joined/not-joined
round(prop.table(table(hr$Status))*100,2)
##
## Joined NotJoined
## 81.3 18.7
Bar chart of candidates joined/not-joined
hr %>% ggplot(aes(Status, fill=Status)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the candidates Joined / Did Not Join the Company, Split by DOJ extended
round(prop.table(table(hr$DOJExtended, hr$Status),margin = 1)*100,2)
##
## Joined NotJoined
## No 81.08 18.92
## Yes 81.55 18.45
Bar Chart for Candidates Who Did Not Join, Split by DOJ Extended
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=DOJExtended, fill=DOJExtended)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of Candidates Who (Joined / Did Not Join), split by Notice Period
round(prop.table(table(hr$NoticePeriod, hr$Status),margin = 1)*100,2)
##
## Joined NotJoined
## 0 93.44 6.56
## 30 85.17 14.83
## 45 75.48 24.52
## 60 73.22 26.78
## 75 68.18 31.82
## 90 66.19 33.81
## 120 52.38 47.62
Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period
Look into it
attach(hr)
tab3 <- round(prop.table(table(NoticePeriod, Status), 1)*100, 2)[,2]
bp <- barplot(
tab3,
main = "% of the Candidates, who did not join, split by Notice Period",
xlab = "NoticePeriod",
ylab = "Percentage(%)",
col = 'skyblue',
legend = rownames(tab3),
beside = TRUE)
text(bp, 0, round(tab3, 1), cex = 1, pos = 3)

Percentage of the Candidates Who (Joined / Did Not Join), split by Joining Bonus
round(prop.table(table(hr$JoiningBonus, hr$Status), margin = 1)*100,2)
##
## Joined NotJoined
## No 81.34 18.66
## Yes 80.58 19.42
Bar Chart for # of Candidates, Who Did Not Join, Split by Joining Bonus
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=JoiningBonus, fill=JoiningBonus)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Gender
round(prop.table(table(hr$Gender, hr$Status), margin=1)*100,2)
##
## Joined NotJoined
## Female 82.40 17.60
## Male 81.07 18.93
Bar Chart for # of the Candidates Who Did Not join, Split by Gender
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=Gender, fill=Gender)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Candidate Source
round(prop.table(table(hr$CandidateSource, hr$Status), margin=1)*100,2)
##
## Joined NotJoined
## Agency 75.82 24.18
## Direct 82.00 18.00
## Employee Referral 88.00 12.00
Bar Chart for % of Candidates Who Did Not Join the Company, Split by Candidate Source
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=CandidateSource, fill=CandidateSource)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not Join), Split by Offered Band
round(prop.table(table(hr$OfferedBand, hr$Status), margin=1)*100,2)
##
## Joined NotJoined
## E0 76.30 23.70
## E1 81.30 18.70
## E2 80.97 19.03
## E3 85.15 14.85
Bar Chart for % Candidates Who Did Not Join, Split by Offered Band
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=OfferedBand, fill=OfferedBand)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Percentage of the Candidates Who (Joined / Did Not join), Split by Line of Business (LOB)
round(prop.table(table(hr$LOB, hr$Status), margin=1)*100,2)
##
## Joined NotJoined
## AXON 77.46 22.54
## BFSI 75.86 24.14
## CSMP 81.52 18.48
## EAS 73.41 26.59
## ERS 78.11 21.89
## ETS 83.07 16.93
## Healthcare 82.26 17.74
## INFRA 87.79 12.21
## MMS 100.00 0.00
Bar Chart for % of the Candidates Who Did Not join, Split by Line of Business (LOB)
hr %>% filter(Status=="NotJoined") %>% ggplot(aes(x=LOB, fill=LOB)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Continuous Data Distribution
Average Age of the Candidates (Joined / Did Not join)
hr %>% group_by(Status) %>% summarise(AverageAge=mean(Age))
## # A tibble: 2 x 2
## Status AverageAge
## <fct> <dbl>
## 1 Joined 30.0
## 2 NotJoined 29.5
Mean Plot for the Age, Split by Status
plotmeans(Age~Status, data = hr, mean.labels = T)

Boxplot of Age, Split by Status (Joined / Did Not Join)
hr %>% ggplot(aes(x=Status, y=Age, fill=Status)) + geom_boxplot()

Average Notice Period of the Candidates, Split by Status (Joined / Did Not join)
hr %>% group_by(Status) %>% summarise(AverageNotice=mean(NoticePeriod))
## # A tibble: 2 x 2
## Status AverageNotice
## <fct> <dbl>
## 1 Joined 37.2
## 2 NotJoined 48.2
Mean Plot for Notice Period, Split by Status (Joined / Did Not join)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

Boxplot for Notice Period of the Candidates, Split by Status (Joined / Did Not join)
hr %>% ggplot(aes(x=Status, y=NoticePeriod, fill=Status)) + geom_boxplot()

Average (Relevant Years of Experience) of the candidates (Joined / Not joined)
hr %>% group_by(Status) %>% summarise(AverageExperience=mean(RexInYrs))
## # A tibble: 2 x 2
## Status AverageExperience
## <fct> <dbl>
## 1 Joined 4.19
## 2 NotJoined 4.44
Mean plot for the above
plotmeans(RexInYrs~Status, data=hr, mean.labels = T)

Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)
hr %>% ggplot(aes(x=Status, y=RexInYrs, fill=Status)) + geom_boxplot()

Average of DurationToAcceptOffer (Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined)
hr %>% group_by(Status) %>% summarise(Duration=mean(DurationToAcceptOffer))
## # A tibble: 2 x 2
## Status Duration
## <fct> <dbl>
## 1 Joined 20.6
## 2 NotJoined 25.0
Meanplot for the above
plotmeans(DurationToAcceptOffer~Status, data=hr, mean.labels = T)

Average (Age, Relevant Years of Experience and Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined) by Gender (Male / Female)
hr %>% group_by(Status, Gender) %>% summarise(Age=mean(Age), Experience=mean(RexInYrs), DaysTaken=mean(DurationToAcceptOffer))
## # A tibble: 4 x 5
## # Groups: Status [2]
## Status Gender Age Experience DaysTaken
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 Joined Female 29.1 3.45 19.4
## 2 Joined Male 30.2 4.35 20.9
## 3 NotJoined Female 28.0 3.61 24.1
## 4 NotJoined Male 29.8 4.60 25.1
CORRELATION
Correlation Matrix for all the Continuous Variable
cor(hr[,c(2,3,6,7,12,15)], method = c("pearson", "kendall", "spearman"))
## DurationToAcceptOffer NoticePeriod
## DurationToAcceptOffer 1.000000000 0.363760039
## NoticePeriod 0.363760039 1.000000000
## PercentHikeOfferedInCTC 0.007307491 -0.013457512
## PercentDifferenceCTC -0.009395037 -0.015066608
## RexInYrs 0.113951452 0.181786080
## Age 0.020131119 -0.004967358
## PercentHikeOfferedInCTC PercentDifferenceCTC
## DurationToAcceptOffer 0.007307491 -0.009395037
## NoticePeriod -0.013457512 -0.015066608
## PercentHikeOfferedInCTC 1.000000000 0.598778760
## PercentDifferenceCTC 0.598778760 1.000000000
## RexInYrs -0.110243489 0.081547098
## Age -0.080010484 0.037955053
## RexInYrs Age
## DurationToAcceptOffer 0.1139515 0.020131119
## NoticePeriod 0.1817861 -0.004967358
## PercentHikeOfferedInCTC -0.1102435 -0.080010484
## PercentDifferenceCTC 0.0815471 0.037955053
## RexInYrs 1.0000000 0.568227965
## Age 0.5682280 1.000000000
Plotting correlation matrix
chart.Correlation(hr[,c(2,3,6,7,12,15)])

Scatter Plots
Scatter Plot of Experience and Duration to Accept Offer by Status (Joined / Not Joined)
hr %>% ggplot(aes(RexInYrs, DurationToAcceptOffer, color=Status)) + geom_point()

Scatter Plot of Experience and Notice Period by Status (Joined / Not Joined)
hr %>% ggplot(aes(RexInYrs, NoticePeriod, color=Status)) + geom_point()

Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status (Joined / Not Joined)
hr %>% ggplot(aes(RexInYrs, PercentHikeExpectedInCTC, color=Status)) + geom_point()

Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status (Joined / Not Joined)
hr %>% ggplot(aes(RexInYrs, PercentHikeOfferedInCTC, color=Status)) + geom_point()
