Part 1: Read the data..
# reading external data and storing into a dataframe called "airline.df"
load("HRData.rda")
Part 2: Column names
# Display the column names
colnames(HR.df)
## [1] "DOJExtended" "DurationToAcceptOffer"
## [3] "NoticePeriod" "OfferedBand"
## [5] "PercentHikeExpectedInCTC" "PercentHikeOfferedInCTC"
## [7] "PercentDifferenceCTC" "JoiningBonus"
## [9] "CandidateRelocateActual" "Gender"
## [11] "CandidateSource" "RexInYrs"
## [13] "LOB" "Location"
## [15] "Age" "Status"
Part 3: Data Dimensions
# Display the Data Dimensions
dim(HR.df)
## [1] 8995 16
Part 4: Descriptive Statistics of the dataframe
library(psych)
describe(HR.df)[,2:5]
## n mean sd median
## DOJExtended* 8995 1.47 0.50 1
## DurationToAcceptOffer 8995 21.43 25.81 10
## NoticePeriod 8995 39.29 22.22 30
## OfferedBand* 8995 2.39 0.63 2
## PercentHikeExpectedInCTC 8995 43.86 29.79 40
## PercentHikeOfferedInCTC 8995 40.66 36.06 36
## PercentDifferenceCTC 8995 -1.57 19.61 0
## JoiningBonus* 8995 1.05 0.21 1
## CandidateRelocateActual* 8995 1.14 0.35 1
## Gender* 8995 1.83 0.38 2
## CandidateSource* 8995 1.89 0.67 2
## RexInYrs 8995 4.24 2.55 4
## LOB* 8995 5.18 2.38 5
## Location* 8995 4.94 3.00 3
## Age 8995 29.91 4.10 29
## Status* 8995 1.19 0.39 1
Part 5: Percentage of the candidates (Joined / Not joined)
attach(HR.df)
tab <- table(Status)
PercentProportion <- round(prop.table(tab)*100,1)
PercentProportion
## Status
## Joined NotJoined
## 81.3 18.7
Part 6: Bar Chart for % of the Candidates Who Joined / Did Not Join the Company
bar <- barplot(PercentProportion, col = c("blue","red"),
xlab = "Status",ylab = "Percentage (%)",
main = "% Of Candidates who joined / Not joined the company",legend = rownames(PercentProportion))
text(bar, 0, PercentProportion,cex=1,pos=3)

Part 7: Percentage of the candidates Joined / Did Not Join the Company, Split by DOJ extended
tab <- table(DOJExtended,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## DOJExtended Joined NotJoined
## No 81.1 18.9
## Yes 81.6 18.4
Part 8: Bar Chart for % of Candidates Who Did Not Join, Split by DOJ Extended
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "DOJExtended",ylab = "Percentage (%)",
main = "Bar chart for % Of Candidates, Who did not join,split by DOJExtended",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 9: Percentage of Candidates Who (Joined / Did Not Join), split by Notice Period
tab <- table(NoticePeriod,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## NoticePeriod Joined NotJoined
## 0 93.4 6.6
## 30 85.2 14.8
## 45 75.5 24.5
## 60 73.2 26.8
## 75 68.2 31.8
## 90 66.2 33.8
## 120 52.4 47.6
Part 10: Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar chart for % Of Candidates, Who did not join,split by Notice Period",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 11: Percentage of the Candidates Who (Joined / Did Not Join), split by Joining Bonus
tab <- table(JoiningBonus,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## JoiningBonus Joined NotJoined
## No 81.3 18.7
## Yes 80.6 19.4
Part 12: Bar Chart for % of Candidates, Who Did Not Join, Split by Joining Bonus
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "JoiningBonus",ylab = "Percentage (%)",
main = "Bar chart for % Of Candidates, Who did not join,split by JoiningBonus",
legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 13: Percentage of the Candidates Who (Joined / Did Not Join), Split by Gender
tab <- table(Gender,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## Gender Joined NotJoined
## Female 82.4 17.6
## Male 81.1 18.9
Part 14: Bar Chart for % of the Candidates Who Did Not join, Split by Gender
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar chart for % Of Candidates, Who did not join,split by Gender",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 15: Percentage of the Candidates Who (Joined / Did Not Join), Split by Candidate Source
tab <- table(CandidateSource,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## CandidateSource Joined NotJoined
## Agency 75.8 24.2
## Direct 82.0 18.0
## Employee Referral 88.0 12.0
Part 16: Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Candidate Source",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 17: Percentage of the Candidates Who (Joined / Did Not Join), Split by Offered Band
tab <- table(OfferedBand,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## OfferedBand Joined NotJoined
## E0 76.3 23.7
## E1 81.3 18.7
## E2 81.0 19.0
## E3 85.1 14.9
Part 18: Bar Chart for % Candidates Who Did Not Join, Split by Offered Band
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Bar Chart for % Candidates Who Did Not Join, Split by Offered Band",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 19: Percentage of the Candidates Who (Joined / Did Not join), Split by Line of Business (LOB)
tab <- table(LOB,Status)
PercentProportion <- round(prop.table(tab,1)*100,1)
PercentProportion
## Status
## LOB Joined NotJoined
## AXON 77.5 22.5
## BFSI 75.9 24.1
## CSMP 81.5 18.5
## EAS 73.4 26.6
## ERS 78.1 21.9
## ETS 83.1 16.9
## Healthcare 82.3 17.7
## INFRA 87.8 12.2
## MMS 100.0 0.0
Part 20: Bar Chart for % Candidates Who Did Not Join, Split by Offered Band
temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Bar Chart for % Candidates Who Did Not Join, Split by LOB",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 21: Average Age of the Candidates (Joined / Did Not join)
aggregate(Age,list(Status), mean)
## Group.1 x
## 1 Joined 30.00410
## 2 NotJoined 29.51784
Part 22: Mean Plot for the Age, Split by Status
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(Age~Status,data=HR.df,mean.labels=T)

Part 23: Boxplot of Age, Split by Status (Joined / Did Not Join)
boxplot(Age~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 24: Average Notice Period of the Candidates, Split by Status (Joined / Did Not join)
aggregate(NoticePeriod,list(Status), mean)
## Group.1 x
## 1 Joined 37.24463
## 2 NotJoined 48.19263
Part 25: Mean Plot for Notice Period, Split by Status (Joined / Did Not join)
plotmeans(NoticePeriod~Status,data=HR.df,mean.labels=T)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

Part 26: Boxplot for Notice Period of the Candidates, Split by Status (Joined / Did Not join)
boxplot(NoticePeriod~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 27: Average (Relevant Years of Experience) of the candidates (Joined / Not joined)
aggregate(RexInYrs,list(Status), mean)
## Group.1 x
## 1 Joined 4.192944
## 2 NotJoined 4.439358
Part 28: Mean Plot for the above
plotmeans(RexInYrs~Status,data=HR.df,mean.labels=T)

Part 29: Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)
boxplot(RexInYrs~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 30: Average of DurationToAcceptOffer (Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined)
aggregate(DurationToAcceptOffer,list(Status), mean)
## Group.1 x
## 1 Joined 20.62437
## 2 NotJoined 24.95660
Part 31: Mean Plot for the above
plotmeans(DurationToAcceptOffer~Status,data=HR.df,mean.labels=T)

Part 32: Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)
boxplot(DurationToAcceptOffer~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 33: Average (Age, Relevant Years of Experience and Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined) by Gender (Male / Female)
aggregate(HR.df[,c("Age","RexInYrs","DurationToAcceptOffer","NoticePeriod")],list(Status,Gender), mean)
## Group.1 Group.2 Age RexInYrs DurationToAcceptOffer NoticePeriod
## 1 Joined Female 29.08529 3.449922 19.43192 35.48122
## 2 NotJoined Female 28.02198 3.608059 24.10256 46.64835
## 3 Joined Male 30.19867 4.350290 20.87688 37.61806
## 4 NotJoined Male 29.80767 4.600426 25.12207 48.49184
Part 34: Correlation Matrix for all the Continuous Variable
cor(HR.df[,c(2:3,5:7,12,15)])
## DurationToAcceptOffer NoticePeriod
## DurationToAcceptOffer 1.000000000 0.363760039
## NoticePeriod 0.363760039 1.000000000
## PercentHikeExpectedInCTC 0.018820560 -0.005262408
## PercentHikeOfferedInCTC 0.007307491 -0.013457512
## PercentDifferenceCTC -0.009395037 -0.015066608
## RexInYrs 0.113951452 0.181786080
## Age 0.020131119 -0.004967358
## PercentHikeExpectedInCTC PercentHikeOfferedInCTC
## DurationToAcceptOffer 0.018820560 0.007307491
## NoticePeriod -0.005262408 -0.013457512
## PercentHikeExpectedInCTC 1.000000000 0.668594399
## PercentHikeOfferedInCTC 0.668594399 1.000000000
## PercentDifferenceCTC -0.161500548 0.598778760
## RexInYrs -0.217390695 -0.110243489
## Age -0.137705377 -0.080010484
## PercentDifferenceCTC RexInYrs Age
## DurationToAcceptOffer -0.009395037 0.1139515 0.020131119
## NoticePeriod -0.015066608 0.1817861 -0.004967358
## PercentHikeExpectedInCTC -0.161500548 -0.2173907 -0.137705377
## PercentHikeOfferedInCTC 0.598778760 -0.1102435 -0.080010484
## PercentDifferenceCTC 1.000000000 0.0815471 0.037955053
## RexInYrs 0.081547098 1.0000000 0.568227965
## Age 0.037955053 0.5682280 1.000000000
Part 35: Plotting Correlation Matrix
#install.packages("PerformanceAnalytics")
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
##
## textplot
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(HR.df[,c(2:3,5:7,12,15)],histogram=TRUE,pch=19)

Part 36: Scatter Plot of Experience and Duration to Accept Offer by Status (Joined / Not Joined)
plot(RexInYrs, DurationToAcceptOffer,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Duration to Accept Offer by Status")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 37: Scatter Plot of Experience and Notice Period by Status (Joined / Not Joined)
plot(RexInYrs, NoticePeriod,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Notice Period by Status")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 38: Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status (Joined / Not Joined)
plot(RexInYrs, PercentHikeExpectedInCTC,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status ")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 39: Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status (Joined / Not Joined)
plot(RexInYrs, PercentHikeOfferedInCTC,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status ")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)
