setwd("C:/Users/CJ With HP/Desktop/IIM Lucknow/Datasets")
att.df <- read.csv(paste("WA_Fn-UseC_-HR-Employee-Attrition.csv",sep = ""))
names(att.df)[1]<-"Age"
View(att.df)
attach(att.df)
str(att.df)
## 'data.frame': 1470 obs. of 35 variables:
## $ Age : int 41 49 37 33 27 32 59 30 38 36 ...
## $ Attrition : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
## $ BusinessTravel : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 2 3 2 3 2 3 3 2 3 ...
## $ DailyRate : int 1102 279 1373 1392 591 1005 1324 1358 216 1299 ...
## $ Department : Factor w/ 3 levels "Human Resources",..: 3 2 2 2 2 2 2 2 2 2 ...
## $ DistanceFromHome : int 1 8 2 3 2 2 3 24 23 27 ...
## $ Education : int 2 1 2 4 1 2 3 1 3 3 ...
## $ EducationField : Factor w/ 6 levels "Human Resources",..: 2 2 5 2 4 2 4 2 2 4 ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1 2 4 5 7 8 10 11 12 13 ...
## $ EnvironmentSatisfaction : int 2 3 4 4 1 4 3 4 4 3 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 2 1 2 2 2 ...
## $ HourlyRate : int 94 61 92 56 40 79 81 67 44 94 ...
## $ JobInvolvement : int 3 2 2 3 3 3 4 3 2 3 ...
## $ JobLevel : int 2 2 1 1 1 1 1 1 3 2 ...
## $ JobRole : Factor w/ 9 levels "Healthcare Representative",..: 8 7 3 7 3 3 3 3 5 1 ...
## $ JobSatisfaction : int 4 2 3 3 2 4 1 3 3 3 ...
## $ MaritalStatus : Factor w/ 3 levels "Divorced","Married",..: 3 2 3 2 2 3 2 1 3 2 ...
## $ MonthlyIncome : int 5993 5130 2090 2909 3468 3068 2670 2693 9526 5237 ...
## $ MonthlyRate : int 19479 24907 2396 23159 16632 11864 9964 13335 8787 16577 ...
## $ NumCompaniesWorked : int 8 1 6 1 9 0 4 1 0 6 ...
## $ Over18 : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ OverTime : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 2 1 1 1 ...
## $ PercentSalaryHike : int 11 23 15 11 12 13 20 22 21 13 ...
## $ PerformanceRating : int 3 4 3 3 3 3 4 4 4 3 ...
## $ RelationshipSatisfaction: int 1 4 2 3 4 3 1 2 2 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 0 1 0 0 1 0 3 1 0 2 ...
## $ TotalWorkingYears : int 8 10 7 8 6 8 12 1 10 17 ...
## $ TrainingTimesLastYear : int 0 3 3 3 3 2 3 2 2 3 ...
## $ WorkLifeBalance : int 1 3 3 3 3 2 2 3 3 2 ...
## $ YearsAtCompany : int 6 10 0 8 2 7 1 1 9 7 ...
## $ YearsInCurrentRole : int 4 7 0 7 2 7 0 0 7 7 ...
## $ YearsSinceLastPromotion : int 0 1 0 3 2 3 0 0 1 7 ...
## $ YearsWithCurrManager : int 5 7 0 0 2 6 0 0 8 7 ...
library(psych)
describe(att.df)
## vars n mean sd median trimmed
## Age 1 1470 36.92 9.14 36.0 36.47
## Attrition* 2 1470 1.16 0.37 1.0 1.08
## BusinessTravel* 3 1470 2.61 0.67 3.0 2.76
## DailyRate 4 1470 802.49 403.51 802.0 803.83
## Department* 5 1470 2.26 0.53 2.0 2.25
## DistanceFromHome 6 1470 9.19 8.11 7.0 8.08
## Education 7 1470 2.91 1.02 3.0 2.98
## EducationField* 8 1470 3.25 1.33 3.0 3.10
## EmployeeCount 9 1470 1.00 0.00 1.0 1.00
## EmployeeNumber 10 1470 1024.87 602.02 1020.5 1023.40
## EnvironmentSatisfaction 11 1470 2.72 1.09 3.0 2.78
## Gender* 12 1470 1.60 0.49 2.0 1.62
## HourlyRate 13 1470 65.89 20.33 66.0 66.02
## JobInvolvement 14 1470 2.73 0.71 3.0 2.74
## JobLevel 15 1470 2.06 1.11 2.0 1.90
## JobRole* 16 1470 5.46 2.46 6.0 5.61
## JobSatisfaction 17 1470 2.73 1.10 3.0 2.79
## MaritalStatus* 18 1470 2.10 0.73 2.0 2.12
## MonthlyIncome 19 1470 6502.93 4707.96 4919.0 5667.24
## MonthlyRate 20 1470 14313.10 7117.79 14235.5 14286.48
## NumCompaniesWorked 21 1470 2.69 2.50 2.0 2.36
## Over18* 22 1470 1.00 0.00 1.0 1.00
## OverTime* 23 1470 1.28 0.45 1.0 1.23
## PercentSalaryHike 24 1470 15.21 3.66 14.0 14.80
## PerformanceRating 25 1470 3.15 0.36 3.0 3.07
## RelationshipSatisfaction 26 1470 2.71 1.08 3.0 2.77
## StandardHours 27 1470 80.00 0.00 80.0 80.00
## StockOptionLevel 28 1470 0.79 0.85 1.0 0.67
## TotalWorkingYears 29 1470 11.28 7.78 10.0 10.37
## TrainingTimesLastYear 30 1470 2.80 1.29 3.0 2.72
## WorkLifeBalance 31 1470 2.76 0.71 3.0 2.77
## YearsAtCompany 32 1470 7.01 6.13 5.0 5.99
## YearsInCurrentRole 33 1470 4.23 3.62 3.0 3.85
## YearsSinceLastPromotion 34 1470 2.19 3.22 1.0 1.48
## YearsWithCurrManager 35 1470 4.12 3.57 3.0 3.77
## mad min max range skew kurtosis se
## Age 8.90 18 60 42 0.41 -0.41 0.24
## Attrition* 0.00 1 2 1 1.84 1.39 0.01
## BusinessTravel* 0.00 1 3 2 -1.44 0.69 0.02
## DailyRate 510.01 102 1499 1397 0.00 -1.21 10.52
## Department* 0.00 1 3 2 0.17 -0.40 0.01
## DistanceFromHome 7.41 1 29 28 0.96 -0.23 0.21
## Education 1.48 1 5 4 -0.29 -0.56 0.03
## EducationField* 1.48 1 6 5 0.55 -0.69 0.03
## EmployeeCount 0.00 1 1 0 NaN NaN 0.00
## EmployeeNumber 790.97 1 2068 2067 0.02 -1.23 15.70
## EnvironmentSatisfaction 1.48 1 4 3 -0.32 -1.20 0.03
## Gender* 0.00 1 2 1 -0.41 -1.83 0.01
## HourlyRate 26.69 30 100 70 -0.03 -1.20 0.53
## JobInvolvement 0.00 1 4 3 -0.50 0.26 0.02
## JobLevel 1.48 1 5 4 1.02 0.39 0.03
## JobRole* 2.97 1 9 8 -0.36 -1.20 0.06
## JobSatisfaction 1.48 1 4 3 -0.33 -1.22 0.03
## MaritalStatus* 1.48 1 3 2 -0.15 -1.12 0.02
## MonthlyIncome 3260.24 1009 19999 18990 1.37 0.99 122.79
## MonthlyRate 9201.76 2094 26999 24905 0.02 -1.22 185.65
## NumCompaniesWorked 1.48 0 9 9 1.02 0.00 0.07
## Over18* 0.00 1 1 0 NaN NaN 0.00
## OverTime* 0.00 1 2 1 0.96 -1.07 0.01
## PercentSalaryHike 2.97 11 25 14 0.82 -0.31 0.10
## PerformanceRating 0.00 3 4 1 1.92 1.68 0.01
## RelationshipSatisfaction 1.48 1 4 3 -0.30 -1.19 0.03
## StandardHours 0.00 80 80 0 NaN NaN 0.00
## StockOptionLevel 1.48 0 3 3 0.97 0.35 0.02
## TotalWorkingYears 5.93 0 40 40 1.11 0.91 0.20
## TrainingTimesLastYear 1.48 0 6 6 0.55 0.48 0.03
## WorkLifeBalance 0.00 1 4 3 -0.55 0.41 0.02
## YearsAtCompany 4.45 0 40 40 1.76 3.91 0.16
## YearsInCurrentRole 4.45 0 18 18 0.92 0.47 0.09
## YearsSinceLastPromotion 1.48 0 15 15 1.98 3.59 0.08
## YearsWithCurrManager 4.45 0 17 17 0.83 0.16 0.09
table(Attrition)
## Attrition
## No Yes
## 1233 237
table(BusinessTravel)
## BusinessTravel
## Non-Travel Travel_Frequently Travel_Rarely
## 150 277 1043
table(Department)
## Department
## Human Resources Research & Development Sales
## 63 961 446
table(EducationField)
## EducationField
## Human Resources Life Sciences Marketing Medical
## 27 606 159 464
## Other Technical Degree
## 82 132
table(Education)
## Education
## 1 2 3 4 5
## 170 282 572 398 48
table(EnvironmentSatisfaction)
## EnvironmentSatisfaction
## 1 2 3 4
## 284 287 453 446
table(Gender)
## Gender
## Female Male
## 588 882
table(JobInvolvement)
## JobInvolvement
## 1 2 3 4
## 83 375 868 144
table(JobRole)
## JobRole
## Healthcare Representative Human Resources
## 131 52
## Laboratory Technician Manager
## 259 102
## Manufacturing Director Research Director
## 145 80
## Research Scientist Sales Executive
## 292 326
## Sales Representative
## 83
table(JobLevel)
## JobLevel
## 1 2 3 4 5
## 543 534 218 106 69
table(JobSatisfaction)
## JobSatisfaction
## 1 2 3 4
## 289 280 442 459
table(MaritalStatus)
## MaritalStatus
## Divorced Married Single
## 327 673 470
table(OverTime)
## OverTime
## No Yes
## 1054 416
mytable<- xtabs(~PerformanceRating+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## PerformanceRating No Yes
## 3 83.92 16.08
## 4 83.63 16.37
mytable<- xtabs(~OverTime+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## OverTime No Yes
## No 89.56 10.44
## Yes 69.47 30.53
mytable<- xtabs(~WorkLifeBalance+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## WorkLifeBalance No Yes
## 1 68.75 31.25
## 2 83.14 16.86
## 3 85.78 14.22
## 4 82.35 17.65
mytable<- xtabs(~JobRole+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## JobRole No Yes
## Healthcare Representative 93.13 6.87
## Human Resources 76.92 23.08
## Laboratory Technician 76.06 23.94
## Manager 95.10 4.90
## Manufacturing Director 93.10 6.90
## Research Director 97.50 2.50
## Research Scientist 83.90 16.10
## Sales Executive 82.52 17.48
## Sales Representative 60.24 39.76
mytable<- xtabs(~NumCompaniesWorked+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## NumCompaniesWorked No Yes
## 0 88.32 11.68
## 1 81.19 18.81
## 2 89.04 10.96
## 3 89.94 10.06
## 4 87.77 12.23
## 5 74.60 25.40
## 6 77.14 22.86
## 7 77.03 22.97
## 8 87.76 12.24
## 9 76.92 23.08
mytable<- xtabs(~MaritalStatus+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## MaritalStatus No Yes
## Divorced 89.91 10.09
## Married 87.52 12.48
## Single 74.47 25.53
mytable<- xtabs(~Gender+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## Gender No Yes
## Female 85.20 14.80
## Male 82.99 17.01
mytable<- xtabs(~EnvironmentSatisfaction+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## EnvironmentSatisfaction No Yes
## 1 74.65 25.35
## 2 85.02 14.98
## 3 86.31 13.69
## 4 86.55 13.45
mytable<- xtabs(~BusinessTravel+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## BusinessTravel No Yes
## Non-Travel 92.00 8.00
## Travel_Frequently 75.09 24.91
## Travel_Rarely 85.04 14.96
mytable<- xtabs(~EducationField+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## EducationField No Yes
## Human Resources 74.07 25.93
## Life Sciences 85.31 14.69
## Marketing 77.99 22.01
## Medical 86.42 13.58
## Other 86.59 13.41
## Technical Degree 75.76 24.24
mytable<- xtabs(~JobSatisfaction+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## JobSatisfaction No Yes
## 1 77.16 22.84
## 2 83.57 16.43
## 3 83.48 16.52
## 4 88.67 11.33
mytable<- xtabs(~Education+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## Education No Yes
## 1 81.76 18.24
## 2 84.40 15.60
## 3 82.69 17.31
## 4 85.43 14.57
## 5 89.58 10.42
boxplot(Age~Attrition,main="Boxplot",xlab="Age",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(DistanceFromHome~Attrition,main="Boxplot",xlab="DistanceFromHome",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(MonthlyIncome~Attrition,main="Boxplot",xlab="MonthlyIncome",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(YearsWithCurrManager~Attrition,main="Boxplot",xlab="YearsWithCurManager",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(MonthlyRate~Attrition,main="Boxplot",xlab="MonthlyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(DailyRate~Attrition,main="Boxplot",xlab="DailyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
boxplot(HourlyRate~Attrition,main="Boxplot",xlab="HourlyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))
hist(Age,xlab="age",ylab="count",breaks=20,main="Age variability in the company",col="lightblue",freq=FALSE)
hist(MonthlyIncome,xlab="MonthlyIncome",ylab="count",breaks=20,main="MonthlyIncome",col="lightblue",ylim=c(0,400))
hist(YearsAtCompany,xlab="YearsAtCompany",ylab="count",breaks=20,main="YearsAtcompany",col="lightblue",ylim=c(0,400))
hist(YearsWithCurrManager,xlab="YearswithCurManager",ylab="count",breaks=20,main="YearsWithCurManager",col="lightblue",ylim=c(0,400))
hist(PercentSalaryHike,xlab="PercentSalaryHike",ylab="count",breaks=20,main="PercentSalaryHike",col="lightblue")
library(lattice)
histogram(~Attrition|JobRole)
histogram(~Attrition|Department,layout=c(4,1),col=c("lightblue","pink"))
histogram(~PercentSalaryHike|Attrition)
histogram(~Education|Attrition)
round(cor(att.df[,c(1,4,6,7,11,13,14,15,17,19,20,21,24,25,26,29:35)]),2)
## Age DailyRate DistanceFromHome Education
## Age 1.00 0.01 0.00 0.21
## DailyRate 0.01 1.00 0.00 -0.02
## DistanceFromHome 0.00 0.00 1.00 0.02
## Education 0.21 -0.02 0.02 1.00
## EnvironmentSatisfaction 0.01 0.02 -0.02 -0.03
## HourlyRate 0.02 0.02 0.03 0.02
## JobInvolvement 0.03 0.05 0.01 0.04
## JobLevel 0.51 0.00 0.01 0.10
## JobSatisfaction 0.00 0.03 0.00 -0.01
## MonthlyIncome 0.50 0.01 -0.02 0.09
## MonthlyRate 0.03 -0.03 0.03 -0.03
## NumCompaniesWorked 0.30 0.04 -0.03 0.13
## PercentSalaryHike 0.00 0.02 0.04 -0.01
## PerformanceRating 0.00 0.00 0.03 -0.02
## RelationshipSatisfaction 0.05 0.01 0.01 -0.01
## TotalWorkingYears 0.68 0.01 0.00 0.15
## TrainingTimesLastYear -0.02 0.00 -0.04 -0.03
## WorkLifeBalance -0.02 -0.04 -0.03 0.01
## YearsAtCompany 0.31 -0.03 0.01 0.07
## YearsInCurrentRole 0.21 0.01 0.02 0.06
## YearsSinceLastPromotion 0.22 -0.03 0.01 0.05
## YearsWithCurrManager 0.20 -0.03 0.01 0.07
## EnvironmentSatisfaction HourlyRate JobInvolvement
## Age 0.01 0.02 0.03
## DailyRate 0.02 0.02 0.05
## DistanceFromHome -0.02 0.03 0.01
## Education -0.03 0.02 0.04
## EnvironmentSatisfaction 1.00 -0.05 -0.01
## HourlyRate -0.05 1.00 0.04
## JobInvolvement -0.01 0.04 1.00
## JobLevel 0.00 -0.03 -0.01
## JobSatisfaction -0.01 -0.07 -0.02
## MonthlyIncome -0.01 -0.02 -0.02
## MonthlyRate 0.04 -0.02 -0.02
## NumCompaniesWorked 0.01 0.02 0.02
## PercentSalaryHike -0.03 -0.01 -0.02
## PerformanceRating -0.03 0.00 -0.03
## RelationshipSatisfaction 0.01 0.00 0.03
## TotalWorkingYears 0.00 0.00 -0.01
## TrainingTimesLastYear -0.02 -0.01 -0.02
## WorkLifeBalance 0.03 0.00 -0.01
## YearsAtCompany 0.00 -0.02 -0.02
## YearsInCurrentRole 0.02 -0.02 0.01
## YearsSinceLastPromotion 0.02 -0.03 -0.02
## YearsWithCurrManager 0.00 -0.02 0.03
## JobLevel JobSatisfaction MonthlyIncome
## Age 0.51 0.00 0.50
## DailyRate 0.00 0.03 0.01
## DistanceFromHome 0.01 0.00 -0.02
## Education 0.10 -0.01 0.09
## EnvironmentSatisfaction 0.00 -0.01 -0.01
## HourlyRate -0.03 -0.07 -0.02
## JobInvolvement -0.01 -0.02 -0.02
## JobLevel 1.00 0.00 0.95
## JobSatisfaction 0.00 1.00 -0.01
## MonthlyIncome 0.95 -0.01 1.00
## MonthlyRate 0.04 0.00 0.03
## NumCompaniesWorked 0.14 -0.06 0.15
## PercentSalaryHike -0.03 0.02 -0.03
## PerformanceRating -0.02 0.00 -0.02
## RelationshipSatisfaction 0.02 -0.01 0.03
## TotalWorkingYears 0.78 -0.02 0.77
## TrainingTimesLastYear -0.02 -0.01 -0.02
## WorkLifeBalance 0.04 -0.02 0.03
## YearsAtCompany 0.53 0.00 0.51
## YearsInCurrentRole 0.39 0.00 0.36
## YearsSinceLastPromotion 0.35 -0.02 0.34
## YearsWithCurrManager 0.38 -0.03 0.34
## MonthlyRate NumCompaniesWorked PercentSalaryHike
## Age 0.03 0.30 0.00
## DailyRate -0.03 0.04 0.02
## DistanceFromHome 0.03 -0.03 0.04
## Education -0.03 0.13 -0.01
## EnvironmentSatisfaction 0.04 0.01 -0.03
## HourlyRate -0.02 0.02 -0.01
## JobInvolvement -0.02 0.02 -0.02
## JobLevel 0.04 0.14 -0.03
## JobSatisfaction 0.00 -0.06 0.02
## MonthlyIncome 0.03 0.15 -0.03
## MonthlyRate 1.00 0.02 -0.01
## NumCompaniesWorked 0.02 1.00 -0.01
## PercentSalaryHike -0.01 -0.01 1.00
## PerformanceRating -0.01 -0.01 0.77
## RelationshipSatisfaction 0.00 0.05 -0.04
## TotalWorkingYears 0.03 0.24 -0.02
## TrainingTimesLastYear 0.00 -0.07 -0.01
## WorkLifeBalance 0.01 -0.01 0.00
## YearsAtCompany -0.02 -0.12 -0.04
## YearsInCurrentRole -0.01 -0.09 0.00
## YearsSinceLastPromotion 0.00 -0.04 -0.02
## YearsWithCurrManager -0.04 -0.11 -0.01
## PerformanceRating RelationshipSatisfaction
## Age 0.00 0.05
## DailyRate 0.00 0.01
## DistanceFromHome 0.03 0.01
## Education -0.02 -0.01
## EnvironmentSatisfaction -0.03 0.01
## HourlyRate 0.00 0.00
## JobInvolvement -0.03 0.03
## JobLevel -0.02 0.02
## JobSatisfaction 0.00 -0.01
## MonthlyIncome -0.02 0.03
## MonthlyRate -0.01 0.00
## NumCompaniesWorked -0.01 0.05
## PercentSalaryHike 0.77 -0.04
## PerformanceRating 1.00 -0.03
## RelationshipSatisfaction -0.03 1.00
## TotalWorkingYears 0.01 0.02
## TrainingTimesLastYear -0.02 0.00
## WorkLifeBalance 0.00 0.02
## YearsAtCompany 0.00 0.02
## YearsInCurrentRole 0.03 -0.02
## YearsSinceLastPromotion 0.02 0.03
## YearsWithCurrManager 0.02 0.00
## TotalWorkingYears TrainingTimesLastYear
## Age 0.68 -0.02
## DailyRate 0.01 0.00
## DistanceFromHome 0.00 -0.04
## Education 0.15 -0.03
## EnvironmentSatisfaction 0.00 -0.02
## HourlyRate 0.00 -0.01
## JobInvolvement -0.01 -0.02
## JobLevel 0.78 -0.02
## JobSatisfaction -0.02 -0.01
## MonthlyIncome 0.77 -0.02
## MonthlyRate 0.03 0.00
## NumCompaniesWorked 0.24 -0.07
## PercentSalaryHike -0.02 -0.01
## PerformanceRating 0.01 -0.02
## RelationshipSatisfaction 0.02 0.00
## TotalWorkingYears 1.00 -0.04
## TrainingTimesLastYear -0.04 1.00
## WorkLifeBalance 0.00 0.03
## YearsAtCompany 0.63 0.00
## YearsInCurrentRole 0.46 -0.01
## YearsSinceLastPromotion 0.40 0.00
## YearsWithCurrManager 0.46 0.00
## WorkLifeBalance YearsAtCompany YearsInCurrentRole
## Age -0.02 0.31 0.21
## DailyRate -0.04 -0.03 0.01
## DistanceFromHome -0.03 0.01 0.02
## Education 0.01 0.07 0.06
## EnvironmentSatisfaction 0.03 0.00 0.02
## HourlyRate 0.00 -0.02 -0.02
## JobInvolvement -0.01 -0.02 0.01
## JobLevel 0.04 0.53 0.39
## JobSatisfaction -0.02 0.00 0.00
## MonthlyIncome 0.03 0.51 0.36
## MonthlyRate 0.01 -0.02 -0.01
## NumCompaniesWorked -0.01 -0.12 -0.09
## PercentSalaryHike 0.00 -0.04 0.00
## PerformanceRating 0.00 0.00 0.03
## RelationshipSatisfaction 0.02 0.02 -0.02
## TotalWorkingYears 0.00 0.63 0.46
## TrainingTimesLastYear 0.03 0.00 -0.01
## WorkLifeBalance 1.00 0.01 0.05
## YearsAtCompany 0.01 1.00 0.76
## YearsInCurrentRole 0.05 0.76 1.00
## YearsSinceLastPromotion 0.01 0.62 0.55
## YearsWithCurrManager 0.00 0.77 0.71
## YearsSinceLastPromotion YearsWithCurrManager
## Age 0.22 0.20
## DailyRate -0.03 -0.03
## DistanceFromHome 0.01 0.01
## Education 0.05 0.07
## EnvironmentSatisfaction 0.02 0.00
## HourlyRate -0.03 -0.02
## JobInvolvement -0.02 0.03
## JobLevel 0.35 0.38
## JobSatisfaction -0.02 -0.03
## MonthlyIncome 0.34 0.34
## MonthlyRate 0.00 -0.04
## NumCompaniesWorked -0.04 -0.11
## PercentSalaryHike -0.02 -0.01
## PerformanceRating 0.02 0.02
## RelationshipSatisfaction 0.03 0.00
## TotalWorkingYears 0.40 0.46
## TrainingTimesLastYear 0.00 0.00
## WorkLifeBalance 0.01 0.00
## YearsAtCompany 0.62 0.77
## YearsInCurrentRole 0.55 0.71
## YearsSinceLastPromotion 1.00 0.51
## YearsWithCurrManager 0.51 1.00
library(corrplot)
## corrplot 0.84 loaded
corrplot(corr=cor(att.df[,c(1,4,6,7,11,13,14,15,17,19,20,21,24,25,26,29:35)],use="complete.obs"),method="ellipse")
H1. To check whether there is a significant difference in the means of the monthly income of the employees who leave and those who don’t
log.trans.Income = log(MonthlyIncome)
t.test(log.trans.Income~Attrition,var.equal=TRUE)
##
## Two Sample t-test
##
## data: log.trans.Income by Attrition
## t = 7.7481, df = 1468, p-value = 1.73e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2673770 0.4486555
## sample estimates:
## mean in group No mean in group Yes
## 8.610236 8.252220
H2. To check whether there is a significant difference in the means of the monthly rate of the employees who leave and those who don’t
log.trans.Rate = log(MonthlyRate)
t.test(log.trans.Rate~Attrition,var.equal=TRUE)
##
## Two Sample t-test
##
## data: log.trans.Rate by Attrition
## t = -0.47592, df = 1468, p-value = 0.6342
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.10954526 0.06676779
## sample estimates:
## mean in group No mean in group Yes
## 9.398882 9.420271
H3. To check whether there is a significant difference in the means of the daily rate of the employees who leave and those who don’t
log.trans.DailyRate = log(DailyRate)
t.test(log.trans.DailyRate~Attrition,var.equal=TRUE)
##
## Two Sample t-test
##
## data: log.trans.DailyRate by Attrition
## t = 1.9013, df = 1468, p-value = 0.05746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.002824123 0.180830501
## sample estimates:
## mean in group No mean in group Yes
## 6.525604 6.436601
mytable<- xtabs(~BusinessTravel+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## BusinessTravel No Yes
## Non-Travel 92.00 8.00
## Travel_Frequently 75.09 24.91
## Travel_Rarely 85.04 14.96
chisq.test(mytable)
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 24.182, df = 2, p-value = 5.609e-06
mytable<- xtabs(~JobSatisfaction+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
## Attrition
## JobSatisfaction No Yes
## 1 77.16 22.84
## 2 83.57 16.43
## 3 83.48 16.52
## 4 88.67 11.33
chisq.test(mytable)
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 17.505, df = 3, p-value = 0.0005563
library(vcd)
## Loading required package: grid
##
## Attaching package: 'vcd'
## The following object is masked from 'att.df':
##
## JobSatisfaction
fisher.test(mytable)
##
## Fisher's Exact Test for Count Data
##
## data: mytable
## p-value = 0.0005767
## alternative hypothesis: two.sided
assocstats(mytable)
## X^2 df P(> X^2)
## Likelihood Ratio 17.356 3 0.00059691
## Pearson 17.505 3 0.00055630
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.108
## Cramer's V : 0.109