setwd("C:/Users/lenovo/Desktop/Dataset")
employees.df=read.csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
View(employees.df)
dim(employees.df)
## [1] 1470 35
The above output shows that the data set contains 1470 rows and 35 columns.
Summary and Description of the Data set.
library(psych)
## Warning: package 'psych' was built under R version 3.3.3
describe(employees.df)[,c(1:5)]
## vars n mean sd median
## ï..Age 1 1470 36.92 9.14 36.0
## Attrition* 2 1470 1.16 0.37 1.0
## BusinessTravel* 3 1470 2.61 0.67 3.0
## DailyRate 4 1470 802.49 403.51 802.0
## Department* 5 1470 2.26 0.53 2.0
## DistanceFromHome 6 1470 9.19 8.11 7.0
## Education 7 1470 2.91 1.02 3.0
## EducationField* 8 1470 3.25 1.33 3.0
## EmployeeCount 9 1470 1.00 0.00 1.0
## EmployeeNumber 10 1470 1024.87 602.02 1020.5
## EnvironmentSatisfaction 11 1470 2.72 1.09 3.0
## Gender* 12 1470 1.60 0.49 2.0
## HourlyRate 13 1470 65.89 20.33 66.0
## JobInvolvement 14 1470 2.73 0.71 3.0
## JobLevel 15 1470 2.06 1.11 2.0
## JobRole* 16 1470 5.46 2.46 6.0
## JobSatisfaction 17 1470 2.73 1.10 3.0
## MaritalStatus* 18 1470 2.10 0.73 2.0
## MonthlyIncome 19 1470 6502.93 4707.96 4919.0
## MonthlyRate 20 1470 14313.10 7117.79 14235.5
## NumCompaniesWorked 21 1470 2.69 2.50 2.0
## Over18* 22 1470 1.00 0.00 1.0
## OverTime* 23 1470 1.28 0.45 1.0
## PercentSalaryHike 24 1470 15.21 3.66 14.0
## PerformanceRating 25 1470 3.15 0.36 3.0
## RelationshipSatisfaction 26 1470 2.71 1.08 3.0
## StandardHours 27 1470 80.00 0.00 80.0
## StockOptionLevel 28 1470 0.79 0.85 1.0
## TotalWorkingYears 29 1470 11.28 7.78 10.0
## TrainingTimesLastYear 30 1470 2.80 1.29 3.0
## WorkLifeBalance 31 1470 2.76 0.71 3.0
## YearsAtCompany 32 1470 7.01 6.13 5.0
## YearsInCurrentRole 33 1470 4.23 3.62 3.0
## YearsSinceLastPromotion 34 1470 2.19 3.22 1.0
## YearsWithCurrManager 35 1470 4.12 3.57 3.0
summary(employees.df)
## ï..Age Attrition BusinessTravel DailyRate
## Min. :18.00 No :1233 Non-Travel : 150 Min. : 102.0
## 1st Qu.:30.00 Yes: 237 Travel_Frequently: 277 1st Qu.: 465.0
## Median :36.00 Travel_Rarely :1043 Median : 802.0
## Mean :36.92 Mean : 802.5
## 3rd Qu.:43.00 3rd Qu.:1157.0
## Max. :60.00 Max. :1499.0
##
## Department DistanceFromHome Education
## Human Resources : 63 Min. : 1.000 Min. :1.000
## Research & Development:961 1st Qu.: 2.000 1st Qu.:2.000
## Sales :446 Median : 7.000 Median :3.000
## Mean : 9.193 Mean :2.913
## 3rd Qu.:14.000 3rd Qu.:4.000
## Max. :29.000 Max. :5.000
##
## EducationField EmployeeCount EmployeeNumber
## Human Resources : 27 Min. :1 Min. : 1.0
## Life Sciences :606 1st Qu.:1 1st Qu.: 491.2
## Marketing :159 Median :1 Median :1020.5
## Medical :464 Mean :1 Mean :1024.9
## Other : 82 3rd Qu.:1 3rd Qu.:1555.8
## Technical Degree:132 Max. :1 Max. :2068.0
##
## EnvironmentSatisfaction Gender HourlyRate JobInvolvement
## Min. :1.000 Female:588 Min. : 30.00 Min. :1.00
## 1st Qu.:2.000 Male :882 1st Qu.: 48.00 1st Qu.:2.00
## Median :3.000 Median : 66.00 Median :3.00
## Mean :2.722 Mean : 65.89 Mean :2.73
## 3rd Qu.:4.000 3rd Qu.: 83.75 3rd Qu.:3.00
## Max. :4.000 Max. :100.00 Max. :4.00
##
## JobLevel JobRole JobSatisfaction
## Min. :1.000 Sales Executive :326 Min. :1.000
## 1st Qu.:1.000 Research Scientist :292 1st Qu.:2.000
## Median :2.000 Laboratory Technician :259 Median :3.000
## Mean :2.064 Manufacturing Director :145 Mean :2.729
## 3rd Qu.:3.000 Healthcare Representative:131 3rd Qu.:4.000
## Max. :5.000 Manager :102 Max. :4.000
## (Other) :215
## MaritalStatus MonthlyIncome MonthlyRate NumCompaniesWorked
## Divorced:327 Min. : 1009 Min. : 2094 Min. :0.000
## Married :673 1st Qu.: 2911 1st Qu.: 8047 1st Qu.:1.000
## Single :470 Median : 4919 Median :14236 Median :2.000
## Mean : 6503 Mean :14313 Mean :2.693
## 3rd Qu.: 8379 3rd Qu.:20462 3rd Qu.:4.000
## Max. :19999 Max. :26999 Max. :9.000
##
## Over18 OverTime PercentSalaryHike PerformanceRating
## Y:1470 No :1054 Min. :11.00 Min. :3.000
## Yes: 416 1st Qu.:12.00 1st Qu.:3.000
## Median :14.00 Median :3.000
## Mean :15.21 Mean :3.154
## 3rd Qu.:18.00 3rd Qu.:3.000
## Max. :25.00 Max. :4.000
##
## RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
## Min. :1.000 Min. :80 Min. :0.0000 Min. : 0.00
## 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.0000 1st Qu.: 6.00
## Median :3.000 Median :80 Median :1.0000 Median :10.00
## Mean :2.712 Mean :80 Mean :0.7939 Mean :11.28
## 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.0000 3rd Qu.:15.00
## Max. :4.000 Max. :80 Max. :3.0000 Max. :40.00
##
## TrainingTimesLastYear WorkLifeBalance YearsAtCompany YearsInCurrentRole
## Min. :0.000 Min. :1.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 3.000 1st Qu.: 2.000
## Median :3.000 Median :3.000 Median : 5.000 Median : 3.000
## Mean :2.799 Mean :2.761 Mean : 7.008 Mean : 4.229
## 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.: 9.000 3rd Qu.: 7.000
## Max. :6.000 Max. :4.000 Max. :40.000 Max. :18.000
##
## YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 2.000
## Median : 1.000 Median : 3.000
## Mean : 2.188 Mean : 4.123
## 3rd Qu.: 3.000 3rd Qu.: 7.000
## Max. :15.000 Max. :17.000
##
One way contigency tables
attach(employees.df)
table(Attrition)
## Attrition
## No Yes
## 1233 237
table(BusinessTravel)
## BusinessTravel
## Non-Travel Travel_Frequently Travel_Rarely
## 150 277 1043
table(Department)
## Department
## Human Resources Research & Development Sales
## 63 961 446
table(Education)
## Education
## 1 2 3 4 5
## 170 282 572 398 48
table(EducationField)
## EducationField
## Human Resources Life Sciences Marketing Medical
## 27 606 159 464
## Other Technical Degree
## 82 132
table(EnvironmentSatisfaction)
## EnvironmentSatisfaction
## 1 2 3 4
## 284 287 453 446
table(Gender)
## Gender
## Female Male
## 588 882
table(JobInvolvement)
## JobInvolvement
## 1 2 3 4
## 83 375 868 144
table(JobLevel)
## JobLevel
## 1 2 3 4 5
## 543 534 218 106 69
table(JobSatisfaction)
## JobSatisfaction
## 1 2 3 4
## 289 280 442 459
table(MaritalStatus)
## MaritalStatus
## Divorced Married Single
## 327 673 470
table(OverTime)
## OverTime
## No Yes
## 1054 416
table(PerformanceRating)
## PerformanceRating
## 3 4
## 1244 226
table(RelationshipSatisfaction)
## RelationshipSatisfaction
## 1 2 3 4
## 276 303 459 432
table(StockOptionLevel)
## StockOptionLevel
## 0 1 2 3
## 631 596 158 85
table(WorkLifeBalance)
## WorkLifeBalance
## 1 2 3 4
## 80 344 893 153
Two way contigency table
xtabs(~Education+EducationField, data=employees.df)
## EducationField
## Education Human Resources Life Sciences Marketing Medical Other
## 1 2 67 14 63 5
## 2 2 116 24 99 19
## 3 16 233 59 183 24
## 4 5 173 52 104 33
## 5 2 17 10 15 1
## EducationField
## Education Technical Degree
## 1 19
## 2 22
## 3 57
## 4 31
## 5 3
xtabs(~Department+EducationField, data=employees.df)
## EducationField
## Department Human Resources Life Sciences Marketing Medical
## Human Resources 27 16 0 13
## Research & Development 0 440 0 363
## Sales 0 150 159 88
## EducationField
## Department Other Technical Degree
## Human Resources 3 4
## Research & Development 64 94
## Sales 15 34
xtabs(~JobLevel+JobInvolvement, data=employees.df)
## JobInvolvement
## JobLevel 1 2 3 4
## 1 30 137 318 58
## 2 35 128 317 54
## 3 10 66 128 14
## 4 3 27 62 14
## 5 5 17 43 4
xtabs(~WorkLifeBalance+EnvironmentSatisfaction, data=employees.df)
## EnvironmentSatisfaction
## WorkLifeBalance 1 2 3 4
## 1 20 13 22 25
## 2 67 78 100 99
## 3 167 171 278 277
## 4 30 25 53 45
xtabs(~OverTime+PerformanceRating, data=employees.df)
## PerformanceRating
## OverTime 3 4
## No 893 161
## Yes 351 65
aggregate(cbind(DailyRate,YearsAtCompany,MonthlyIncome) ~ Gender,
data = employees.df, mean)
## Gender DailyRate YearsAtCompany MonthlyIncome
## 1 Female 808.2738 7.231293 6686.566
## 2 Male 798.6270 6.859410 6380.508
library(car)
## Warning: package 'car' was built under R version 3.3.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(MonthlyIncome~YearsAtCompany,data=employees.df,main="Distribution of monthly income with work experience",ylab="Monthly Income",xlab = "Years at company")

boxplot(MonthlyIncome~YearsAtCompany,data=employees.df,main="Distribution of monthly income with work experience",ylab="Monthly Income",xlab = "Years at company")

boxplot(MonthlyIncome~JobSatisfaction,data=employees.df,main="Distribution of monthly income with job satisfaction",ylab="Monthly Income",xlab = "Job Satisfaction")

boxplot(MonthlyIncome~JobSatisfaction,data=employees.df,main="Distribution of monthly income with job satisfaction",ylab="Monthly Income",xlab = "Job Satisfaction")

boxplot(PercentSalaryHike~JobSatisfaction,data=employees.df,main="Distribution of percent salary hike with job satisfaction",ylab="Percent Salary Hike",xlab = "Job Satisfaction")

boxplot(DistanceFromHome~EnvironmentSatisfaction,data=employees.df,main="Distribution of distance from home with enviornment satisfaction",ylab="Distance from home",xlab = "Enviornment Satisfaction")

library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.3
## corrplot 0.84 loaded
colnames(employees.df)
## [1] "ï..Age" "Attrition"
## [3] "BusinessTravel" "DailyRate"
## [5] "Department" "DistanceFromHome"
## [7] "Education" "EducationField"
## [9] "EmployeeCount" "EmployeeNumber"
## [11] "EnvironmentSatisfaction" "Gender"
## [13] "HourlyRate" "JobInvolvement"
## [15] "JobLevel" "JobRole"
## [17] "JobSatisfaction" "MaritalStatus"
## [19] "MonthlyIncome" "MonthlyRate"
## [21] "NumCompaniesWorked" "Over18"
## [23] "OverTime" "PercentSalaryHike"
## [25] "PerformanceRating" "RelationshipSatisfaction"
## [27] "StandardHours" "StockOptionLevel"
## [29] "TotalWorkingYears" "TrainingTimesLastYear"
## [31] "WorkLifeBalance" "YearsAtCompany"
## [33] "YearsInCurrentRole" "YearsSinceLastPromotion"
## [35] "YearsWithCurrManager"
dataColumns <-employees.df[,c("ï..Age","DailyRate","DistanceFromHome","Education","EmployeeCount","EnvironmentSatisfaction","HourlyRate","JobInvolvement","JobLevel","JobSatisfaction","MonthlyIncome","MonthlyRate","NumCompaniesWorked","PercentSalaryHike","PerformanceRating","RelationshipSatisfaction","StandardHours","StockOptionLevel","TotalWorkingYears","TrainingTimesLastYear","WorkLifeBalance","YearsAtCompany","YearsInCurrentRole","YearsSinceLastPromotion","YearsWithCurrManager")]
Co relation matrix
res <- cor(dataColumns)
## Warning in cor(dataColumns): the standard deviation is zero
round(res, 2)
## ï..Age DailyRate DistanceFromHome Education
## ï..Age 1.00 0.01 0.00 0.21
## DailyRate 0.01 1.00 0.00 -0.02
## DistanceFromHome 0.00 0.00 1.00 0.02
## Education 0.21 -0.02 0.02 1.00
## EmployeeCount NA NA NA NA
## EnvironmentSatisfaction 0.01 0.02 -0.02 -0.03
## HourlyRate 0.02 0.02 0.03 0.02
## JobInvolvement 0.03 0.05 0.01 0.04
## JobLevel 0.51 0.00 0.01 0.10
## JobSatisfaction 0.00 0.03 0.00 -0.01
## MonthlyIncome 0.50 0.01 -0.02 0.09
## MonthlyRate 0.03 -0.03 0.03 -0.03
## NumCompaniesWorked 0.30 0.04 -0.03 0.13
## PercentSalaryHike 0.00 0.02 0.04 -0.01
## PerformanceRating 0.00 0.00 0.03 -0.02
## RelationshipSatisfaction 0.05 0.01 0.01 -0.01
## StandardHours NA NA NA NA
## StockOptionLevel 0.04 0.04 0.04 0.02
## TotalWorkingYears 0.68 0.01 0.00 0.15
## TrainingTimesLastYear -0.02 0.00 -0.04 -0.03
## WorkLifeBalance -0.02 -0.04 -0.03 0.01
## YearsAtCompany 0.31 -0.03 0.01 0.07
## YearsInCurrentRole 0.21 0.01 0.02 0.06
## YearsSinceLastPromotion 0.22 -0.03 0.01 0.05
## YearsWithCurrManager 0.20 -0.03 0.01 0.07
## EmployeeCount EnvironmentSatisfaction HourlyRate
## ï..Age NA 0.01 0.02
## DailyRate NA 0.02 0.02
## DistanceFromHome NA -0.02 0.03
## Education NA -0.03 0.02
## EmployeeCount 1 NA NA
## EnvironmentSatisfaction NA 1.00 -0.05
## HourlyRate NA -0.05 1.00
## JobInvolvement NA -0.01 0.04
## JobLevel NA 0.00 -0.03
## JobSatisfaction NA -0.01 -0.07
## MonthlyIncome NA -0.01 -0.02
## MonthlyRate NA 0.04 -0.02
## NumCompaniesWorked NA 0.01 0.02
## PercentSalaryHike NA -0.03 -0.01
## PerformanceRating NA -0.03 0.00
## RelationshipSatisfaction NA 0.01 0.00
## StandardHours NA NA NA
## StockOptionLevel NA 0.00 0.05
## TotalWorkingYears NA 0.00 0.00
## TrainingTimesLastYear NA -0.02 -0.01
## WorkLifeBalance NA 0.03 0.00
## YearsAtCompany NA 0.00 -0.02
## YearsInCurrentRole NA 0.02 -0.02
## YearsSinceLastPromotion NA 0.02 -0.03
## YearsWithCurrManager NA 0.00 -0.02
## JobInvolvement JobLevel JobSatisfaction
## ï..Age 0.03 0.51 0.00
## DailyRate 0.05 0.00 0.03
## DistanceFromHome 0.01 0.01 0.00
## Education 0.04 0.10 -0.01
## EmployeeCount NA NA NA
## EnvironmentSatisfaction -0.01 0.00 -0.01
## HourlyRate 0.04 -0.03 -0.07
## JobInvolvement 1.00 -0.01 -0.02
## JobLevel -0.01 1.00 0.00
## JobSatisfaction -0.02 0.00 1.00
## MonthlyIncome -0.02 0.95 -0.01
## MonthlyRate -0.02 0.04 0.00
## NumCompaniesWorked 0.02 0.14 -0.06
## PercentSalaryHike -0.02 -0.03 0.02
## PerformanceRating -0.03 -0.02 0.00
## RelationshipSatisfaction 0.03 0.02 -0.01
## StandardHours NA NA NA
## StockOptionLevel 0.02 0.01 0.01
## TotalWorkingYears -0.01 0.78 -0.02
## TrainingTimesLastYear -0.02 -0.02 -0.01
## WorkLifeBalance -0.01 0.04 -0.02
## YearsAtCompany -0.02 0.53 0.00
## YearsInCurrentRole 0.01 0.39 0.00
## YearsSinceLastPromotion -0.02 0.35 -0.02
## YearsWithCurrManager 0.03 0.38 -0.03
## MonthlyIncome MonthlyRate NumCompaniesWorked
## ï..Age 0.50 0.03 0.30
## DailyRate 0.01 -0.03 0.04
## DistanceFromHome -0.02 0.03 -0.03
## Education 0.09 -0.03 0.13
## EmployeeCount NA NA NA
## EnvironmentSatisfaction -0.01 0.04 0.01
## HourlyRate -0.02 -0.02 0.02
## JobInvolvement -0.02 -0.02 0.02
## JobLevel 0.95 0.04 0.14
## JobSatisfaction -0.01 0.00 -0.06
## MonthlyIncome 1.00 0.03 0.15
## MonthlyRate 0.03 1.00 0.02
## NumCompaniesWorked 0.15 0.02 1.00
## PercentSalaryHike -0.03 -0.01 -0.01
## PerformanceRating -0.02 -0.01 -0.01
## RelationshipSatisfaction 0.03 0.00 0.05
## StandardHours NA NA NA
## StockOptionLevel 0.01 -0.03 0.03
## TotalWorkingYears 0.77 0.03 0.24
## TrainingTimesLastYear -0.02 0.00 -0.07
## WorkLifeBalance 0.03 0.01 -0.01
## YearsAtCompany 0.51 -0.02 -0.12
## YearsInCurrentRole 0.36 -0.01 -0.09
## YearsSinceLastPromotion 0.34 0.00 -0.04
## YearsWithCurrManager 0.34 -0.04 -0.11
## PercentSalaryHike PerformanceRating
## ï..Age 0.00 0.00
## DailyRate 0.02 0.00
## DistanceFromHome 0.04 0.03
## Education -0.01 -0.02
## EmployeeCount NA NA
## EnvironmentSatisfaction -0.03 -0.03
## HourlyRate -0.01 0.00
## JobInvolvement -0.02 -0.03
## JobLevel -0.03 -0.02
## JobSatisfaction 0.02 0.00
## MonthlyIncome -0.03 -0.02
## MonthlyRate -0.01 -0.01
## NumCompaniesWorked -0.01 -0.01
## PercentSalaryHike 1.00 0.77
## PerformanceRating 0.77 1.00
## RelationshipSatisfaction -0.04 -0.03
## StandardHours NA NA
## StockOptionLevel 0.01 0.00
## TotalWorkingYears -0.02 0.01
## TrainingTimesLastYear -0.01 -0.02
## WorkLifeBalance 0.00 0.00
## YearsAtCompany -0.04 0.00
## YearsInCurrentRole 0.00 0.03
## YearsSinceLastPromotion -0.02 0.02
## YearsWithCurrManager -0.01 0.02
## RelationshipSatisfaction StandardHours
## ï..Age 0.05 NA
## DailyRate 0.01 NA
## DistanceFromHome 0.01 NA
## Education -0.01 NA
## EmployeeCount NA NA
## EnvironmentSatisfaction 0.01 NA
## HourlyRate 0.00 NA
## JobInvolvement 0.03 NA
## JobLevel 0.02 NA
## JobSatisfaction -0.01 NA
## MonthlyIncome 0.03 NA
## MonthlyRate 0.00 NA
## NumCompaniesWorked 0.05 NA
## PercentSalaryHike -0.04 NA
## PerformanceRating -0.03 NA
## RelationshipSatisfaction 1.00 NA
## StandardHours NA 1
## StockOptionLevel -0.05 NA
## TotalWorkingYears 0.02 NA
## TrainingTimesLastYear 0.00 NA
## WorkLifeBalance 0.02 NA
## YearsAtCompany 0.02 NA
## YearsInCurrentRole -0.02 NA
## YearsSinceLastPromotion 0.03 NA
## YearsWithCurrManager 0.00 NA
## StockOptionLevel TotalWorkingYears
## ï..Age 0.04 0.68
## DailyRate 0.04 0.01
## DistanceFromHome 0.04 0.00
## Education 0.02 0.15
## EmployeeCount NA NA
## EnvironmentSatisfaction 0.00 0.00
## HourlyRate 0.05 0.00
## JobInvolvement 0.02 -0.01
## JobLevel 0.01 0.78
## JobSatisfaction 0.01 -0.02
## MonthlyIncome 0.01 0.77
## MonthlyRate -0.03 0.03
## NumCompaniesWorked 0.03 0.24
## PercentSalaryHike 0.01 -0.02
## PerformanceRating 0.00 0.01
## RelationshipSatisfaction -0.05 0.02
## StandardHours NA NA
## StockOptionLevel 1.00 0.01
## TotalWorkingYears 0.01 1.00
## TrainingTimesLastYear 0.01 -0.04
## WorkLifeBalance 0.00 0.00
## YearsAtCompany 0.02 0.63
## YearsInCurrentRole 0.05 0.46
## YearsSinceLastPromotion 0.01 0.40
## YearsWithCurrManager 0.02 0.46
## TrainingTimesLastYear WorkLifeBalance
## ï..Age -0.02 -0.02
## DailyRate 0.00 -0.04
## DistanceFromHome -0.04 -0.03
## Education -0.03 0.01
## EmployeeCount NA NA
## EnvironmentSatisfaction -0.02 0.03
## HourlyRate -0.01 0.00
## JobInvolvement -0.02 -0.01
## JobLevel -0.02 0.04
## JobSatisfaction -0.01 -0.02
## MonthlyIncome -0.02 0.03
## MonthlyRate 0.00 0.01
## NumCompaniesWorked -0.07 -0.01
## PercentSalaryHike -0.01 0.00
## PerformanceRating -0.02 0.00
## RelationshipSatisfaction 0.00 0.02
## StandardHours NA NA
## StockOptionLevel 0.01 0.00
## TotalWorkingYears -0.04 0.00
## TrainingTimesLastYear 1.00 0.03
## WorkLifeBalance 0.03 1.00
## YearsAtCompany 0.00 0.01
## YearsInCurrentRole -0.01 0.05
## YearsSinceLastPromotion 0.00 0.01
## YearsWithCurrManager 0.00 0.00
## YearsAtCompany YearsInCurrentRole
## ï..Age 0.31 0.21
## DailyRate -0.03 0.01
## DistanceFromHome 0.01 0.02
## Education 0.07 0.06
## EmployeeCount NA NA
## EnvironmentSatisfaction 0.00 0.02
## HourlyRate -0.02 -0.02
## JobInvolvement -0.02 0.01
## JobLevel 0.53 0.39
## JobSatisfaction 0.00 0.00
## MonthlyIncome 0.51 0.36
## MonthlyRate -0.02 -0.01
## NumCompaniesWorked -0.12 -0.09
## PercentSalaryHike -0.04 0.00
## PerformanceRating 0.00 0.03
## RelationshipSatisfaction 0.02 -0.02
## StandardHours NA NA
## StockOptionLevel 0.02 0.05
## TotalWorkingYears 0.63 0.46
## TrainingTimesLastYear 0.00 -0.01
## WorkLifeBalance 0.01 0.05
## YearsAtCompany 1.00 0.76
## YearsInCurrentRole 0.76 1.00
## YearsSinceLastPromotion 0.62 0.55
## YearsWithCurrManager 0.77 0.71
## YearsSinceLastPromotion YearsWithCurrManager
## ï..Age 0.22 0.20
## DailyRate -0.03 -0.03
## DistanceFromHome 0.01 0.01
## Education 0.05 0.07
## EmployeeCount NA NA
## EnvironmentSatisfaction 0.02 0.00
## HourlyRate -0.03 -0.02
## JobInvolvement -0.02 0.03
## JobLevel 0.35 0.38
## JobSatisfaction -0.02 -0.03
## MonthlyIncome 0.34 0.34
## MonthlyRate 0.00 -0.04
## NumCompaniesWorked -0.04 -0.11
## PercentSalaryHike -0.02 -0.01
## PerformanceRating 0.02 0.02
## RelationshipSatisfaction 0.03 0.00
## StandardHours NA NA
## StockOptionLevel 0.01 0.02
## TotalWorkingYears 0.40 0.46
## TrainingTimesLastYear 0.00 0.00
## WorkLifeBalance 0.01 0.00
## YearsAtCompany 0.62 0.77
## YearsInCurrentRole 0.55 0.71
## YearsSinceLastPromotion 1.00 0.51
## YearsWithCurrManager 0.51 1.00
N <- cor(dataColumns)
## Warning in cor(dataColumns): the standard deviation is zero
pairs(dataColumns[1:6])

pairs(dataColumns[7:12])

pairs(dataColumns[13:18])

library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(employees.df,aes(BusinessTravel,fill=Attrition))+geom_bar(position=position_dodge())+labs(x="Travel Frequency",y="Count",title="Attrition Vs Business Travel")

table_travel<-table(employees.df$BusinessTravel, employees.df$Attrition)
chisq.test(table_travel)
##
## Pearson's Chi-squared test
##
## data: table_travel
## X-squared = 24.182, df = 2, p-value = 5.609e-06
t.test(DistanceFromHome~Attrition, data=employees.df)
##
## Welch Two Sample t-test
##
## data: DistanceFromHome by Attrition
## t = -2.8882, df = 322.72, p-value = 0.004137
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.8870025 -0.5475146
## sample estimates:
## mean in group No mean in group Yes
## 8.915653 10.632911
t.test(JobLevel~Attrition, data=employees.df)
##
## Welch Two Sample t-test
##
## data: JobLevel by Attrition
## t = 7.3859, df = 376.25, p-value = 9.845e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3733861 0.6443231
## sample estimates:
## mean in group No mean in group Yes
## 2.145985 1.637131
table_role<-table(employees.df$JobRole, employees.df$Attrition)
chisq.test(table_role)
##
## Pearson's Chi-squared test
##
## data: table_role
## X-squared = 86.19, df = 8, p-value = 2.752e-15
table_job_sat<-table(employees.df$JobSatisfaction, employees.df$Attrition)
chisq.test(table_job_sat)
##
## Pearson's Chi-squared test
##
## data: table_job_sat
## X-squared = 17.505, df = 3, p-value = 0.0005563
So we can conclude that Attrition depends on : Business Travel Distance From Home ,Job Level ,Job Role Job, Satisfaction