Part 1: Read the data..

# reading external data and storing into a dataframe called "airline.df"
load("HRData.rda")

Part 2: Column names

# Display the column names
colnames(HR.df)
##  [1] "DOJExtended"              "DurationToAcceptOffer"   
##  [3] "NoticePeriod"             "OfferedBand"             
##  [5] "PercentHikeExpectedInCTC" "PercentHikeOfferedInCTC" 
##  [7] "PercentDifferenceCTC"     "JoiningBonus"            
##  [9] "CandidateRelocateActual"  "Gender"                  
## [11] "CandidateSource"          "RexInYrs"                
## [13] "LOB"                      "Location"                
## [15] "Age"                      "Status"

Part 3: Data Dimensions

# Display the Data Dimensions
dim(HR.df)
## [1] 8995   16

Part 4: Descriptive Statistics of the dataframe

library(psych)
describe(HR.df)[,2:5]
##                             n  mean    sd median
## DOJExtended*             8995  1.47  0.50      1
## DurationToAcceptOffer    8995 21.43 25.81     10
## NoticePeriod             8995 39.29 22.22     30
## OfferedBand*             8995  2.39  0.63      2
## PercentHikeExpectedInCTC 8995 43.86 29.79     40
## PercentHikeOfferedInCTC  8995 40.66 36.06     36
## PercentDifferenceCTC     8995 -1.57 19.61      0
## JoiningBonus*            8995  1.05  0.21      1
## CandidateRelocateActual* 8995  1.14  0.35      1
## Gender*                  8995  1.83  0.38      2
## CandidateSource*         8995  1.89  0.67      2
## RexInYrs                 8995  4.24  2.55      4
## LOB*                     8995  5.18  2.38      5
## Location*                8995  4.94  3.00      3
## Age                      8995 29.91  4.10     29
## Status*                  8995  1.19  0.39      1

Part 5: Percentage of the candidates (Joined / Not joined)

attach(HR.df)
tab <- table(Status)
PercentProportion <- round(prop.table(tab)*100,1) 
PercentProportion
## Status
##    Joined NotJoined 
##      81.3      18.7

Part 6: Bar Chart for % of the Candidates Who Joined / Did Not Join the Company

bar <- barplot(PercentProportion, col = c("blue","red"),
                       xlab = "Status",ylab = "Percentage (%)",
                       main = "% Of Candidates who joined / Not joined the company",legend = rownames(PercentProportion))
text(bar, 0, PercentProportion,cex=1,pos=3)

Part 7: Percentage of the candidates Joined / Did Not Join the Company, Split by DOJ extended

tab <- table(DOJExtended,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##            Status
## DOJExtended Joined NotJoined
##         No    81.1      18.9
##         Yes   81.6      18.4

Part 8: Bar Chart for % of Candidates Who Did Not Join, Split by DOJ Extended

temp <- PercentProportion[,2]
bar <- barplot(temp,xlab = "DOJExtended",ylab = "Percentage (%)",
            main = "Bar chart for % Of Candidates, Who did not join,split by  DOJExtended",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)

Part 9: Percentage of Candidates Who (Joined / Did Not Join), split by Notice Period

tab <- table(NoticePeriod,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##             Status
## NoticePeriod Joined NotJoined
##          0     93.4       6.6
##          30    85.2      14.8
##          45    75.5      24.5
##          60    73.2      26.8
##          75    68.2      31.8
##          90    66.2      33.8
##          120   52.4      47.6

Part 10: Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar chart for % Of Candidates, Who did not join,split by Notice Period",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)                    

Part 11: Percentage of the Candidates Who (Joined / Did Not Join), split by Joining Bonus

tab <- table(JoiningBonus,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion 
##             Status
## JoiningBonus Joined NotJoined
##          No    81.3      18.7
##          Yes   80.6      19.4

Part 12: Bar Chart for % of Candidates, Who Did Not Join, Split by Joining Bonus

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "JoiningBonus",ylab = "Percentage (%)",
main = "Bar chart for % Of Candidates, Who did not join,split by JoiningBonus",
legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)                    

Part 13: Percentage of the Candidates Who (Joined / Did Not Join), Split by Gender

tab <- table(Gender,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##         Status
## Gender   Joined NotJoined
##   Female   82.4      17.6
##   Male     81.1      18.9

Part 14: Bar Chart for % of the Candidates Who Did Not join, Split by Gender

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar chart for % Of Candidates, Who did not join,split by Gender",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3)                    

Part 15: Percentage of the Candidates Who (Joined / Did Not Join), Split by Candidate Source

tab <- table(CandidateSource,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##                    Status
## CandidateSource     Joined NotJoined
##   Agency              75.8      24.2
##   Direct              82.0      18.0
##   Employee Referral   88.0      12.0

Part 16: Bar Chart for % of Candidates, Who Did Not Join, Split by Notice Period

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Candidate Source",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3) 

Part 17: Percentage of the Candidates Who (Joined / Did Not Join), Split by Offered Band

tab <- table(OfferedBand,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##            Status
## OfferedBand Joined NotJoined
##          E0   76.3      23.7
##          E1   81.3      18.7
##          E2   81.0      19.0
##          E3   85.1      14.9

Part 18: Bar Chart for % Candidates Who Did Not Join, Split by Offered Band

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Bar Chart for % Candidates Who Did Not Join, Split by Offered Band",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3) 

Part 19: Percentage of the Candidates Who (Joined / Did Not join), Split by Line of Business (LOB)

tab <- table(LOB,Status)
PercentProportion <- round(prop.table(tab,1)*100,1) 
PercentProportion
##             Status
## LOB          Joined NotJoined
##   AXON         77.5      22.5
##   BFSI         75.9      24.1
##   CSMP         81.5      18.5
##   EAS          73.4      26.6
##   ERS          78.1      21.9
##   ETS          83.1      16.9
##   Healthcare   82.3      17.7
##   INFRA        87.8      12.2
##   MMS         100.0       0.0

Part 20: Bar Chart for % Candidates Who Did Not Join, Split by Offered Band

temp <- PercentProportion[,2]

bar <- barplot(temp,xlab = "NoticePeriod",ylab = "Percentage (%)",main = "Bar Chart for % of Candidates Who Did Not Join the Company, Split by Bar Chart for % Candidates Who Did Not Join, Split by LOB",legend = rownames(temp))
text(bar, 0, temp,cex=1,pos=3) 

Part 21: Average Age of the Candidates (Joined / Did Not join)

aggregate(Age,list(Status), mean)
##     Group.1        x
## 1    Joined 30.00410
## 2 NotJoined 29.51784

Part 22: Mean Plot for the Age, Split by Status

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
plotmeans(Age~Status,data=HR.df,mean.labels=T)

Part 23: Boxplot of Age, Split by Status (Joined / Did Not Join)

boxplot(Age~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 24: Average Notice Period of the Candidates, Split by Status (Joined / Did Not join)

aggregate(NoticePeriod,list(Status), mean)
##     Group.1        x
## 1    Joined 37.24463
## 2 NotJoined 48.19263

Part 25: Mean Plot for Notice Period, Split by Status (Joined / Did Not join)

plotmeans(NoticePeriod~Status,data=HR.df,mean.labels=T)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

Part 26: Boxplot for Notice Period of the Candidates, Split by Status (Joined / Did Not join)

boxplot(NoticePeriod~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 27: Average (Relevant Years of Experience) of the candidates (Joined / Not joined)

aggregate(RexInYrs,list(Status), mean)
##     Group.1        x
## 1    Joined 4.192944
## 2 NotJoined 4.439358

Part 28: Mean Plot for the above

plotmeans(RexInYrs~Status,data=HR.df,mean.labels=T)

Part 29: Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)

boxplot(RexInYrs~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 30: Average of DurationToAcceptOffer (Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined)

aggregate(DurationToAcceptOffer,list(Status), mean)
##     Group.1        x
## 1    Joined 20.62437
## 2 NotJoined 24.95660

Part 31: Mean Plot for the above

plotmeans(DurationToAcceptOffer~Status,data=HR.df,mean.labels=T)

Part 32: Boxplot of Relevant Years of Experience of the candidates (Joined / Not joined)

boxplot(DurationToAcceptOffer~Status,data=HR.df,mean.labels=T,col= c("blue","red"))

Part 33: Average (Age, Relevant Years of Experience and Number of days taken by the candidate to accept the offer) of candidates (Joined / Not joined) by Gender (Male / Female)

aggregate(HR.df[,c("Age","RexInYrs","DurationToAcceptOffer","NoticePeriod")],list(Status,Gender), mean)
##     Group.1 Group.2      Age RexInYrs DurationToAcceptOffer NoticePeriod
## 1    Joined  Female 29.08529 3.449922              19.43192     35.48122
## 2 NotJoined  Female 28.02198 3.608059              24.10256     46.64835
## 3    Joined    Male 30.19867 4.350290              20.87688     37.61806
## 4 NotJoined    Male 29.80767 4.600426              25.12207     48.49184

Part 34: Correlation Matrix for all the Continuous Variable

cor(HR.df[,c(2:3,5:7,12,15)])
##                          DurationToAcceptOffer NoticePeriod
## DurationToAcceptOffer              1.000000000  0.363760039
## NoticePeriod                       0.363760039  1.000000000
## PercentHikeExpectedInCTC           0.018820560 -0.005262408
## PercentHikeOfferedInCTC            0.007307491 -0.013457512
## PercentDifferenceCTC              -0.009395037 -0.015066608
## RexInYrs                           0.113951452  0.181786080
## Age                                0.020131119 -0.004967358
##                          PercentHikeExpectedInCTC PercentHikeOfferedInCTC
## DurationToAcceptOffer                 0.018820560             0.007307491
## NoticePeriod                         -0.005262408            -0.013457512
## PercentHikeExpectedInCTC              1.000000000             0.668594399
## PercentHikeOfferedInCTC               0.668594399             1.000000000
## PercentDifferenceCTC                 -0.161500548             0.598778760
## RexInYrs                             -0.217390695            -0.110243489
## Age                                  -0.137705377            -0.080010484
##                          PercentDifferenceCTC   RexInYrs          Age
## DurationToAcceptOffer            -0.009395037  0.1139515  0.020131119
## NoticePeriod                     -0.015066608  0.1817861 -0.004967358
## PercentHikeExpectedInCTC         -0.161500548 -0.2173907 -0.137705377
## PercentHikeOfferedInCTC           0.598778760 -0.1102435 -0.080010484
## PercentDifferenceCTC              1.000000000  0.0815471  0.037955053
## RexInYrs                          0.081547098  1.0000000  0.568227965
## Age                               0.037955053  0.5682280  1.000000000

Part 35: Plotting Correlation Matrix

#install.packages("PerformanceAnalytics")

library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
## 
##     textplot
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(HR.df[,c(2:3,5:7,12,15)],histogram=TRUE,pch=19)

Part 36: Scatter Plot of Experience and Duration to Accept Offer by Status (Joined / Not Joined)

plot(RexInYrs, DurationToAcceptOffer,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Duration to Accept Offer by Status")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 37: Scatter Plot of Experience and Notice Period by Status (Joined / Not Joined)

plot(RexInYrs, NoticePeriod,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Notice Period by Status")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 38: Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status (Joined / Not Joined)

plot(RexInYrs, PercentHikeExpectedInCTC,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Percent Hike (CTC) Expected by Candidate by Status ")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)

Part 39: Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status (Joined / Not Joined)

plot(RexInYrs, PercentHikeOfferedInCTC,col=c("red", "blue")[Status], main = "Scatter Plot of Experience and Percent Hike (CTC) Offered by Candidate by Status ")
legend(x="topright", legend = levels(Status), col=c("red","blue"), pch=1)