setwd("C:/Users/lenovo/Desktop/Dataset")
employees.df=read.csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
View(employees.df)
dim(employees.df)
## [1] 1470   35

The above output shows that the data set contains 1470 rows and 35 columns.

Summary and Description of the Data set.

library(psych)
## Warning: package 'psych' was built under R version 3.3.3
describe(employees.df)[,c(1:5)]
##                          vars    n     mean      sd  median
## ï..Age                      1 1470    36.92    9.14    36.0
## Attrition*                  2 1470     1.16    0.37     1.0
## BusinessTravel*             3 1470     2.61    0.67     3.0
## DailyRate                   4 1470   802.49  403.51   802.0
## Department*                 5 1470     2.26    0.53     2.0
## DistanceFromHome            6 1470     9.19    8.11     7.0
## Education                   7 1470     2.91    1.02     3.0
## EducationField*             8 1470     3.25    1.33     3.0
## EmployeeCount               9 1470     1.00    0.00     1.0
## EmployeeNumber             10 1470  1024.87  602.02  1020.5
## EnvironmentSatisfaction    11 1470     2.72    1.09     3.0
## Gender*                    12 1470     1.60    0.49     2.0
## HourlyRate                 13 1470    65.89   20.33    66.0
## JobInvolvement             14 1470     2.73    0.71     3.0
## JobLevel                   15 1470     2.06    1.11     2.0
## JobRole*                   16 1470     5.46    2.46     6.0
## JobSatisfaction            17 1470     2.73    1.10     3.0
## MaritalStatus*             18 1470     2.10    0.73     2.0
## MonthlyIncome              19 1470  6502.93 4707.96  4919.0
## MonthlyRate                20 1470 14313.10 7117.79 14235.5
## NumCompaniesWorked         21 1470     2.69    2.50     2.0
## Over18*                    22 1470     1.00    0.00     1.0
## OverTime*                  23 1470     1.28    0.45     1.0
## PercentSalaryHike          24 1470    15.21    3.66    14.0
## PerformanceRating          25 1470     3.15    0.36     3.0
## RelationshipSatisfaction   26 1470     2.71    1.08     3.0
## StandardHours              27 1470    80.00    0.00    80.0
## StockOptionLevel           28 1470     0.79    0.85     1.0
## TotalWorkingYears          29 1470    11.28    7.78    10.0
## TrainingTimesLastYear      30 1470     2.80    1.29     3.0
## WorkLifeBalance            31 1470     2.76    0.71     3.0
## YearsAtCompany             32 1470     7.01    6.13     5.0
## YearsInCurrentRole         33 1470     4.23    3.62     3.0
## YearsSinceLastPromotion    34 1470     2.19    3.22     1.0
## YearsWithCurrManager       35 1470     4.12    3.57     3.0
summary(employees.df)
##      ï..Age      Attrition            BusinessTravel   DailyRate     
##  Min.   :18.00   No :1233   Non-Travel       : 150   Min.   : 102.0  
##  1st Qu.:30.00   Yes: 237   Travel_Frequently: 277   1st Qu.: 465.0  
##  Median :36.00              Travel_Rarely    :1043   Median : 802.0  
##  Mean   :36.92                                       Mean   : 802.5  
##  3rd Qu.:43.00                                       3rd Qu.:1157.0  
##  Max.   :60.00                                       Max.   :1499.0  
##                                                                      
##                   Department  DistanceFromHome   Education    
##  Human Resources       : 63   Min.   : 1.000   Min.   :1.000  
##  Research & Development:961   1st Qu.: 2.000   1st Qu.:2.000  
##  Sales                 :446   Median : 7.000   Median :3.000  
##                               Mean   : 9.193   Mean   :2.913  
##                               3rd Qu.:14.000   3rd Qu.:4.000  
##                               Max.   :29.000   Max.   :5.000  
##                                                               
##           EducationField EmployeeCount EmployeeNumber  
##  Human Resources : 27    Min.   :1     Min.   :   1.0  
##  Life Sciences   :606    1st Qu.:1     1st Qu.: 491.2  
##  Marketing       :159    Median :1     Median :1020.5  
##  Medical         :464    Mean   :1     Mean   :1024.9  
##  Other           : 82    3rd Qu.:1     3rd Qu.:1555.8  
##  Technical Degree:132    Max.   :1     Max.   :2068.0  
##                                                        
##  EnvironmentSatisfaction    Gender      HourlyRate     JobInvolvement
##  Min.   :1.000           Female:588   Min.   : 30.00   Min.   :1.00  
##  1st Qu.:2.000           Male  :882   1st Qu.: 48.00   1st Qu.:2.00  
##  Median :3.000                        Median : 66.00   Median :3.00  
##  Mean   :2.722                        Mean   : 65.89   Mean   :2.73  
##  3rd Qu.:4.000                        3rd Qu.: 83.75   3rd Qu.:3.00  
##  Max.   :4.000                        Max.   :100.00   Max.   :4.00  
##                                                                      
##     JobLevel                          JobRole    JobSatisfaction
##  Min.   :1.000   Sales Executive          :326   Min.   :1.000  
##  1st Qu.:1.000   Research Scientist       :292   1st Qu.:2.000  
##  Median :2.000   Laboratory Technician    :259   Median :3.000  
##  Mean   :2.064   Manufacturing Director   :145   Mean   :2.729  
##  3rd Qu.:3.000   Healthcare Representative:131   3rd Qu.:4.000  
##  Max.   :5.000   Manager                  :102   Max.   :4.000  
##                  (Other)                  :215                  
##   MaritalStatus MonthlyIncome    MonthlyRate    NumCompaniesWorked
##  Divorced:327   Min.   : 1009   Min.   : 2094   Min.   :0.000     
##  Married :673   1st Qu.: 2911   1st Qu.: 8047   1st Qu.:1.000     
##  Single  :470   Median : 4919   Median :14236   Median :2.000     
##                 Mean   : 6503   Mean   :14313   Mean   :2.693     
##                 3rd Qu.: 8379   3rd Qu.:20462   3rd Qu.:4.000     
##                 Max.   :19999   Max.   :26999   Max.   :9.000     
##                                                                   
##  Over18   OverTime   PercentSalaryHike PerformanceRating
##  Y:1470   No :1054   Min.   :11.00     Min.   :3.000    
##           Yes: 416   1st Qu.:12.00     1st Qu.:3.000    
##                      Median :14.00     Median :3.000    
##                      Mean   :15.21     Mean   :3.154    
##                      3rd Qu.:18.00     3rd Qu.:3.000    
##                      Max.   :25.00     Max.   :4.000    
##                                                         
##  RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears
##  Min.   :1.000            Min.   :80    Min.   :0.0000   Min.   : 0.00    
##  1st Qu.:2.000            1st Qu.:80    1st Qu.:0.0000   1st Qu.: 6.00    
##  Median :3.000            Median :80    Median :1.0000   Median :10.00    
##  Mean   :2.712            Mean   :80    Mean   :0.7939   Mean   :11.28    
##  3rd Qu.:4.000            3rd Qu.:80    3rd Qu.:1.0000   3rd Qu.:15.00    
##  Max.   :4.000            Max.   :80    Max.   :3.0000   Max.   :40.00    
##                                                                           
##  TrainingTimesLastYear WorkLifeBalance YearsAtCompany   YearsInCurrentRole
##  Min.   :0.000         Min.   :1.000   Min.   : 0.000   Min.   : 0.000    
##  1st Qu.:2.000         1st Qu.:2.000   1st Qu.: 3.000   1st Qu.: 2.000    
##  Median :3.000         Median :3.000   Median : 5.000   Median : 3.000    
##  Mean   :2.799         Mean   :2.761   Mean   : 7.008   Mean   : 4.229    
##  3rd Qu.:3.000         3rd Qu.:3.000   3rd Qu.: 9.000   3rd Qu.: 7.000    
##  Max.   :6.000         Max.   :4.000   Max.   :40.000   Max.   :18.000    
##                                                                           
##  YearsSinceLastPromotion YearsWithCurrManager
##  Min.   : 0.000          Min.   : 0.000      
##  1st Qu.: 0.000          1st Qu.: 2.000      
##  Median : 1.000          Median : 3.000      
##  Mean   : 2.188          Mean   : 4.123      
##  3rd Qu.: 3.000          3rd Qu.: 7.000      
##  Max.   :15.000          Max.   :17.000      
## 

One way contigency tables

attach(employees.df)
table(Attrition)
## Attrition
##   No  Yes 
## 1233  237
table(BusinessTravel)
## BusinessTravel
##        Non-Travel Travel_Frequently     Travel_Rarely 
##               150               277              1043
table(Department)
## Department
##        Human Resources Research & Development                  Sales 
##                     63                    961                    446
table(Education)
## Education
##   1   2   3   4   5 
## 170 282 572 398  48
table(EducationField)
## EducationField
##  Human Resources    Life Sciences        Marketing          Medical 
##               27              606              159              464 
##            Other Technical Degree 
##               82              132
table(EnvironmentSatisfaction)
## EnvironmentSatisfaction
##   1   2   3   4 
## 284 287 453 446
table(Gender)
## Gender
## Female   Male 
##    588    882
table(JobInvolvement)
## JobInvolvement
##   1   2   3   4 
##  83 375 868 144
table(JobLevel)
## JobLevel
##   1   2   3   4   5 
## 543 534 218 106  69
table(JobSatisfaction)
## JobSatisfaction
##   1   2   3   4 
## 289 280 442 459
table(MaritalStatus)
## MaritalStatus
## Divorced  Married   Single 
##      327      673      470
table(OverTime)
## OverTime
##   No  Yes 
## 1054  416
table(PerformanceRating)
## PerformanceRating
##    3    4 
## 1244  226
table(RelationshipSatisfaction)
## RelationshipSatisfaction
##   1   2   3   4 
## 276 303 459 432
table(StockOptionLevel)
## StockOptionLevel
##   0   1   2   3 
## 631 596 158  85
table(WorkLifeBalance)
## WorkLifeBalance
##   1   2   3   4 
##  80 344 893 153

Two way contigency table

xtabs(~Education+EducationField, data=employees.df)
##          EducationField
## Education Human Resources Life Sciences Marketing Medical Other
##         1               2            67        14      63     5
##         2               2           116        24      99    19
##         3              16           233        59     183    24
##         4               5           173        52     104    33
##         5               2            17        10      15     1
##          EducationField
## Education Technical Degree
##         1               19
##         2               22
##         3               57
##         4               31
##         5                3
xtabs(~Department+EducationField, data=employees.df)
##                         EducationField
## Department               Human Resources Life Sciences Marketing Medical
##   Human Resources                     27            16         0      13
##   Research & Development               0           440         0     363
##   Sales                                0           150       159      88
##                         EducationField
## Department               Other Technical Degree
##   Human Resources            3                4
##   Research & Development    64               94
##   Sales                     15               34
xtabs(~JobLevel+JobInvolvement, data=employees.df)
##         JobInvolvement
## JobLevel   1   2   3   4
##        1  30 137 318  58
##        2  35 128 317  54
##        3  10  66 128  14
##        4   3  27  62  14
##        5   5  17  43   4
xtabs(~WorkLifeBalance+EnvironmentSatisfaction, data=employees.df)
##                EnvironmentSatisfaction
## WorkLifeBalance   1   2   3   4
##               1  20  13  22  25
##               2  67  78 100  99
##               3 167 171 278 277
##               4  30  25  53  45
xtabs(~OverTime+PerformanceRating, data=employees.df)
##         PerformanceRating
## OverTime   3   4
##      No  893 161
##      Yes 351  65
aggregate(cbind(DailyRate,YearsAtCompany,MonthlyIncome) ~ Gender,
data = employees.df, mean)
##   Gender DailyRate YearsAtCompany MonthlyIncome
## 1 Female  808.2738       7.231293      6686.566
## 2   Male  798.6270       6.859410      6380.508
library(car)
## Warning: package 'car' was built under R version 3.3.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(MonthlyIncome~YearsAtCompany,data=employees.df,main="Distribution of monthly income with work experience",ylab="Monthly Income",xlab = "Years at company")

boxplot(MonthlyIncome~YearsAtCompany,data=employees.df,main="Distribution of monthly income with work experience",ylab="Monthly Income",xlab = "Years at company",col="blue")

boxplot(MonthlyIncome~JobSatisfaction,data=employees.df,main="Distribution of monthly income with job satisfaction",ylab="Monthly Income",xlab = "Job Satisfaction")

boxplot(MonthlyIncome~JobSatisfaction,data=employees.df,main="Distribution of monthly income with job satisfaction",ylab="Monthly Income",xlab = "Job Satisfaction",col="red")

boxplot(PercentSalaryHike~JobSatisfaction,data=employees.df,main="Distribution of percent salary hike with job satisfaction",ylab="Percent Salary Hike",xlab = "Job Satisfaction",col="blue")

boxplot(DistanceFromHome~EnvironmentSatisfaction,data=employees.df,main="Distribution of distance from home with enviornment satisfaction",ylab="Distance from home",xlab = "Enviornment Satisfaction",col="red")

library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.3
## corrplot 0.84 loaded
colnames(employees.df)
##  [1] "ï..Age"                   "Attrition"               
##  [3] "BusinessTravel"           "DailyRate"               
##  [5] "Department"               "DistanceFromHome"        
##  [7] "Education"                "EducationField"          
##  [9] "EmployeeCount"            "EmployeeNumber"          
## [11] "EnvironmentSatisfaction"  "Gender"                  
## [13] "HourlyRate"               "JobInvolvement"          
## [15] "JobLevel"                 "JobRole"                 
## [17] "JobSatisfaction"          "MaritalStatus"           
## [19] "MonthlyIncome"            "MonthlyRate"             
## [21] "NumCompaniesWorked"       "Over18"                  
## [23] "OverTime"                 "PercentSalaryHike"       
## [25] "PerformanceRating"        "RelationshipSatisfaction"
## [27] "StandardHours"            "StockOptionLevel"        
## [29] "TotalWorkingYears"        "TrainingTimesLastYear"   
## [31] "WorkLifeBalance"          "YearsAtCompany"          
## [33] "YearsInCurrentRole"       "YearsSinceLastPromotion" 
## [35] "YearsWithCurrManager"
dataColumns <-employees.df[,c("ï..Age","DailyRate","DistanceFromHome","Education","EmployeeCount","EnvironmentSatisfaction","HourlyRate","JobInvolvement","JobLevel","JobSatisfaction","MonthlyIncome","MonthlyRate","NumCompaniesWorked","PercentSalaryHike","PerformanceRating","RelationshipSatisfaction","StandardHours","StockOptionLevel","TotalWorkingYears","TrainingTimesLastYear","WorkLifeBalance","YearsAtCompany","YearsInCurrentRole","YearsSinceLastPromotion","YearsWithCurrManager")]

Co relation matrix

res <- cor(dataColumns)
## Warning in cor(dataColumns): the standard deviation is zero
round(res, 2)
##                          ï..Age DailyRate DistanceFromHome Education
## ï..Age                     1.00      0.01             0.00      0.21
## DailyRate                  0.01      1.00             0.00     -0.02
## DistanceFromHome           0.00      0.00             1.00      0.02
## Education                  0.21     -0.02             0.02      1.00
## EmployeeCount                NA        NA               NA        NA
## EnvironmentSatisfaction    0.01      0.02            -0.02     -0.03
## HourlyRate                 0.02      0.02             0.03      0.02
## JobInvolvement             0.03      0.05             0.01      0.04
## JobLevel                   0.51      0.00             0.01      0.10
## JobSatisfaction            0.00      0.03             0.00     -0.01
## MonthlyIncome              0.50      0.01            -0.02      0.09
## MonthlyRate                0.03     -0.03             0.03     -0.03
## NumCompaniesWorked         0.30      0.04            -0.03      0.13
## PercentSalaryHike          0.00      0.02             0.04     -0.01
## PerformanceRating          0.00      0.00             0.03     -0.02
## RelationshipSatisfaction   0.05      0.01             0.01     -0.01
## StandardHours                NA        NA               NA        NA
## StockOptionLevel           0.04      0.04             0.04      0.02
## TotalWorkingYears          0.68      0.01             0.00      0.15
## TrainingTimesLastYear     -0.02      0.00            -0.04     -0.03
## WorkLifeBalance           -0.02     -0.04            -0.03      0.01
## YearsAtCompany             0.31     -0.03             0.01      0.07
## YearsInCurrentRole         0.21      0.01             0.02      0.06
## YearsSinceLastPromotion    0.22     -0.03             0.01      0.05
## YearsWithCurrManager       0.20     -0.03             0.01      0.07
##                          EmployeeCount EnvironmentSatisfaction HourlyRate
## ï..Age                              NA                    0.01       0.02
## DailyRate                           NA                    0.02       0.02
## DistanceFromHome                    NA                   -0.02       0.03
## Education                           NA                   -0.03       0.02
## EmployeeCount                        1                      NA         NA
## EnvironmentSatisfaction             NA                    1.00      -0.05
## HourlyRate                          NA                   -0.05       1.00
## JobInvolvement                      NA                   -0.01       0.04
## JobLevel                            NA                    0.00      -0.03
## JobSatisfaction                     NA                   -0.01      -0.07
## MonthlyIncome                       NA                   -0.01      -0.02
## MonthlyRate                         NA                    0.04      -0.02
## NumCompaniesWorked                  NA                    0.01       0.02
## PercentSalaryHike                   NA                   -0.03      -0.01
## PerformanceRating                   NA                   -0.03       0.00
## RelationshipSatisfaction            NA                    0.01       0.00
## StandardHours                       NA                      NA         NA
## StockOptionLevel                    NA                    0.00       0.05
## TotalWorkingYears                   NA                    0.00       0.00
## TrainingTimesLastYear               NA                   -0.02      -0.01
## WorkLifeBalance                     NA                    0.03       0.00
## YearsAtCompany                      NA                    0.00      -0.02
## YearsInCurrentRole                  NA                    0.02      -0.02
## YearsSinceLastPromotion             NA                    0.02      -0.03
## YearsWithCurrManager                NA                    0.00      -0.02
##                          JobInvolvement JobLevel JobSatisfaction
## ï..Age                             0.03     0.51            0.00
## DailyRate                          0.05     0.00            0.03
## DistanceFromHome                   0.01     0.01            0.00
## Education                          0.04     0.10           -0.01
## EmployeeCount                        NA       NA              NA
## EnvironmentSatisfaction           -0.01     0.00           -0.01
## HourlyRate                         0.04    -0.03           -0.07
## JobInvolvement                     1.00    -0.01           -0.02
## JobLevel                          -0.01     1.00            0.00
## JobSatisfaction                   -0.02     0.00            1.00
## MonthlyIncome                     -0.02     0.95           -0.01
## MonthlyRate                       -0.02     0.04            0.00
## NumCompaniesWorked                 0.02     0.14           -0.06
## PercentSalaryHike                 -0.02    -0.03            0.02
## PerformanceRating                 -0.03    -0.02            0.00
## RelationshipSatisfaction           0.03     0.02           -0.01
## StandardHours                        NA       NA              NA
## StockOptionLevel                   0.02     0.01            0.01
## TotalWorkingYears                 -0.01     0.78           -0.02
## TrainingTimesLastYear             -0.02    -0.02           -0.01
## WorkLifeBalance                   -0.01     0.04           -0.02
## YearsAtCompany                    -0.02     0.53            0.00
## YearsInCurrentRole                 0.01     0.39            0.00
## YearsSinceLastPromotion           -0.02     0.35           -0.02
## YearsWithCurrManager               0.03     0.38           -0.03
##                          MonthlyIncome MonthlyRate NumCompaniesWorked
## ï..Age                            0.50        0.03               0.30
## DailyRate                         0.01       -0.03               0.04
## DistanceFromHome                 -0.02        0.03              -0.03
## Education                         0.09       -0.03               0.13
## EmployeeCount                       NA          NA                 NA
## EnvironmentSatisfaction          -0.01        0.04               0.01
## HourlyRate                       -0.02       -0.02               0.02
## JobInvolvement                   -0.02       -0.02               0.02
## JobLevel                          0.95        0.04               0.14
## JobSatisfaction                  -0.01        0.00              -0.06
## MonthlyIncome                     1.00        0.03               0.15
## MonthlyRate                       0.03        1.00               0.02
## NumCompaniesWorked                0.15        0.02               1.00
## PercentSalaryHike                -0.03       -0.01              -0.01
## PerformanceRating                -0.02       -0.01              -0.01
## RelationshipSatisfaction          0.03        0.00               0.05
## StandardHours                       NA          NA                 NA
## StockOptionLevel                  0.01       -0.03               0.03
## TotalWorkingYears                 0.77        0.03               0.24
## TrainingTimesLastYear            -0.02        0.00              -0.07
## WorkLifeBalance                   0.03        0.01              -0.01
## YearsAtCompany                    0.51       -0.02              -0.12
## YearsInCurrentRole                0.36       -0.01              -0.09
## YearsSinceLastPromotion           0.34        0.00              -0.04
## YearsWithCurrManager              0.34       -0.04              -0.11
##                          PercentSalaryHike PerformanceRating
## ï..Age                                0.00              0.00
## DailyRate                             0.02              0.00
## DistanceFromHome                      0.04              0.03
## Education                            -0.01             -0.02
## EmployeeCount                           NA                NA
## EnvironmentSatisfaction              -0.03             -0.03
## HourlyRate                           -0.01              0.00
## JobInvolvement                       -0.02             -0.03
## JobLevel                             -0.03             -0.02
## JobSatisfaction                       0.02              0.00
## MonthlyIncome                        -0.03             -0.02
## MonthlyRate                          -0.01             -0.01
## NumCompaniesWorked                   -0.01             -0.01
## PercentSalaryHike                     1.00              0.77
## PerformanceRating                     0.77              1.00
## RelationshipSatisfaction             -0.04             -0.03
## StandardHours                           NA                NA
## StockOptionLevel                      0.01              0.00
## TotalWorkingYears                    -0.02              0.01
## TrainingTimesLastYear                -0.01             -0.02
## WorkLifeBalance                       0.00              0.00
## YearsAtCompany                       -0.04              0.00
## YearsInCurrentRole                    0.00              0.03
## YearsSinceLastPromotion              -0.02              0.02
## YearsWithCurrManager                 -0.01              0.02
##                          RelationshipSatisfaction StandardHours
## ï..Age                                       0.05            NA
## DailyRate                                    0.01            NA
## DistanceFromHome                             0.01            NA
## Education                                   -0.01            NA
## EmployeeCount                                  NA            NA
## EnvironmentSatisfaction                      0.01            NA
## HourlyRate                                   0.00            NA
## JobInvolvement                               0.03            NA
## JobLevel                                     0.02            NA
## JobSatisfaction                             -0.01            NA
## MonthlyIncome                                0.03            NA
## MonthlyRate                                  0.00            NA
## NumCompaniesWorked                           0.05            NA
## PercentSalaryHike                           -0.04            NA
## PerformanceRating                           -0.03            NA
## RelationshipSatisfaction                     1.00            NA
## StandardHours                                  NA             1
## StockOptionLevel                            -0.05            NA
## TotalWorkingYears                            0.02            NA
## TrainingTimesLastYear                        0.00            NA
## WorkLifeBalance                              0.02            NA
## YearsAtCompany                               0.02            NA
## YearsInCurrentRole                          -0.02            NA
## YearsSinceLastPromotion                      0.03            NA
## YearsWithCurrManager                         0.00            NA
##                          StockOptionLevel TotalWorkingYears
## ï..Age                               0.04              0.68
## DailyRate                            0.04              0.01
## DistanceFromHome                     0.04              0.00
## Education                            0.02              0.15
## EmployeeCount                          NA                NA
## EnvironmentSatisfaction              0.00              0.00
## HourlyRate                           0.05              0.00
## JobInvolvement                       0.02             -0.01
## JobLevel                             0.01              0.78
## JobSatisfaction                      0.01             -0.02
## MonthlyIncome                        0.01              0.77
## MonthlyRate                         -0.03              0.03
## NumCompaniesWorked                   0.03              0.24
## PercentSalaryHike                    0.01             -0.02
## PerformanceRating                    0.00              0.01
## RelationshipSatisfaction            -0.05              0.02
## StandardHours                          NA                NA
## StockOptionLevel                     1.00              0.01
## TotalWorkingYears                    0.01              1.00
## TrainingTimesLastYear                0.01             -0.04
## WorkLifeBalance                      0.00              0.00
## YearsAtCompany                       0.02              0.63
## YearsInCurrentRole                   0.05              0.46
## YearsSinceLastPromotion              0.01              0.40
## YearsWithCurrManager                 0.02              0.46
##                          TrainingTimesLastYear WorkLifeBalance
## ï..Age                                   -0.02           -0.02
## DailyRate                                 0.00           -0.04
## DistanceFromHome                         -0.04           -0.03
## Education                                -0.03            0.01
## EmployeeCount                               NA              NA
## EnvironmentSatisfaction                  -0.02            0.03
## HourlyRate                               -0.01            0.00
## JobInvolvement                           -0.02           -0.01
## JobLevel                                 -0.02            0.04
## JobSatisfaction                          -0.01           -0.02
## MonthlyIncome                            -0.02            0.03
## MonthlyRate                               0.00            0.01
## NumCompaniesWorked                       -0.07           -0.01
## PercentSalaryHike                        -0.01            0.00
## PerformanceRating                        -0.02            0.00
## RelationshipSatisfaction                  0.00            0.02
## StandardHours                               NA              NA
## StockOptionLevel                          0.01            0.00
## TotalWorkingYears                        -0.04            0.00
## TrainingTimesLastYear                     1.00            0.03
## WorkLifeBalance                           0.03            1.00
## YearsAtCompany                            0.00            0.01
## YearsInCurrentRole                       -0.01            0.05
## YearsSinceLastPromotion                   0.00            0.01
## YearsWithCurrManager                      0.00            0.00
##                          YearsAtCompany YearsInCurrentRole
## ï..Age                             0.31               0.21
## DailyRate                         -0.03               0.01
## DistanceFromHome                   0.01               0.02
## Education                          0.07               0.06
## EmployeeCount                        NA                 NA
## EnvironmentSatisfaction            0.00               0.02
## HourlyRate                        -0.02              -0.02
## JobInvolvement                    -0.02               0.01
## JobLevel                           0.53               0.39
## JobSatisfaction                    0.00               0.00
## MonthlyIncome                      0.51               0.36
## MonthlyRate                       -0.02              -0.01
## NumCompaniesWorked                -0.12              -0.09
## PercentSalaryHike                 -0.04               0.00
## PerformanceRating                  0.00               0.03
## RelationshipSatisfaction           0.02              -0.02
## StandardHours                        NA                 NA
## StockOptionLevel                   0.02               0.05
## TotalWorkingYears                  0.63               0.46
## TrainingTimesLastYear              0.00              -0.01
## WorkLifeBalance                    0.01               0.05
## YearsAtCompany                     1.00               0.76
## YearsInCurrentRole                 0.76               1.00
## YearsSinceLastPromotion            0.62               0.55
## YearsWithCurrManager               0.77               0.71
##                          YearsSinceLastPromotion YearsWithCurrManager
## ï..Age                                      0.22                 0.20
## DailyRate                                  -0.03                -0.03
## DistanceFromHome                            0.01                 0.01
## Education                                   0.05                 0.07
## EmployeeCount                                 NA                   NA
## EnvironmentSatisfaction                     0.02                 0.00
## HourlyRate                                 -0.03                -0.02
## JobInvolvement                             -0.02                 0.03
## JobLevel                                    0.35                 0.38
## JobSatisfaction                            -0.02                -0.03
## MonthlyIncome                               0.34                 0.34
## MonthlyRate                                 0.00                -0.04
## NumCompaniesWorked                         -0.04                -0.11
## PercentSalaryHike                          -0.02                -0.01
## PerformanceRating                           0.02                 0.02
## RelationshipSatisfaction                    0.03                 0.00
## StandardHours                                 NA                   NA
## StockOptionLevel                            0.01                 0.02
## TotalWorkingYears                           0.40                 0.46
## TrainingTimesLastYear                       0.00                 0.00
## WorkLifeBalance                             0.01                 0.00
## YearsAtCompany                              0.62                 0.77
## YearsInCurrentRole                          0.55                 0.71
## YearsSinceLastPromotion                     1.00                 0.51
## YearsWithCurrManager                        0.51                 1.00
N <- cor(dataColumns)
## Warning in cor(dataColumns): the standard deviation is zero
pairs(dataColumns[1:6])

pairs(dataColumns[7:12])

pairs(dataColumns[13:18])

table_travel<-table(employees.df$BusinessTravel, employees.df$Attrition)
chisq.test(table_travel)
## 
##  Pearson's Chi-squared test
## 
## data:  table_travel
## X-squared = 24.182, df = 2, p-value = 5.609e-06
t.test(DistanceFromHome~Attrition, data=employees.df)
## 
##  Welch Two Sample t-test
## 
## data:  DistanceFromHome by Attrition
## t = -2.8882, df = 322.72, p-value = 0.004137
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.8870025 -0.5475146
## sample estimates:
##  mean in group No mean in group Yes 
##          8.915653         10.632911
t.test(JobLevel~Attrition, data=employees.df)
## 
##  Welch Two Sample t-test
## 
## data:  JobLevel by Attrition
## t = 7.3859, df = 376.25, p-value = 9.845e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.3733861 0.6443231
## sample estimates:
##  mean in group No mean in group Yes 
##          2.145985          1.637131
table_role<-table(employees.df$JobRole, employees.df$Attrition)
chisq.test(table_role)
## 
##  Pearson's Chi-squared test
## 
## data:  table_role
## X-squared = 86.19, df = 8, p-value = 2.752e-15
table_job_sat<-table(employees.df$JobSatisfaction, employees.df$Attrition)
chisq.test(table_job_sat)
## 
##  Pearson's Chi-squared test
## 
## data:  table_job_sat
## X-squared = 17.505, df = 3, p-value = 0.0005563

So we can conclude that Attrition depends on : Business Travel ,Distance From Home ,Job Level ,Job Role ,Job Satisfaction

Relation between attrition and other variables:

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.3.3
a<-ggplot(employees.df,aes(ï..Age,fill=Attrition))+geom_density()+facet_grid(~Attrition)
b<-ggplot(employees.df,aes(BusinessTravel,fill=Attrition))+geom_bar()
grid.arrange(a,b,ncol=2)

It can be infered that employee with the age close to 30 face attrition.Employee who travel rarely leave the most.

c<-ggplot(employees.df,aes(DailyRate,fill=Attrition))+geom_bar()
d<-ggplot(employees.df,aes(Department,fill=Attrition))+geom_bar()
grid.arrange(c,d,ncol=2)

Hence attrition was lowest fom the HR department.

e<-ggplot(employees.df,aes(DistanceFromHome,fill=Attrition))+geom_bar()
f<-ggplot(employees.df,aes(Education,fill=Attrition))+geom_bar()
grid.arrange(e,f,ncol=2)

Employees who have left the organization are near to the Office.

g<-ggplot(employees.df,aes(EducationField,fill=Attrition))+geom_bar()
h<-ggplot(employees.df,aes(EmployeeCount,fill=Attrition))+geom_bar()
grid.arrange(g,h,ncol=2)

i<-ggplot(employees.df,aes(EmployeeNumber,fill=Attrition))+geom_bar()
j<-ggplot(employees.df,aes(EnvironmentSatisfaction,fill=Attrition))+geom_bar()
grid.arrange(i,j,ncol=2)

k<-ggplot(employees.df,aes(Gender,fill=Attrition))+geom_bar()
l<-ggplot(employees.df,aes(HourlyRate,fill=Attrition))+geom_bar()
grid.arrange(k,l,ncol=2)

m<-ggplot(employees.df,aes(JobInvolvement,fill=Attrition))+geom_bar()
n<-ggplot(employees.df,aes(JobLevel,fill=Attrition))+geom_bar()
grid.arrange(m,n,ncol=2)

o<-ggplot(employees.df,aes(JobRole,fill=Attrition))+geom_bar()
p<-ggplot(employees.df,aes(JobSatisfaction,fill=Attrition))+geom_bar()
grid.arrange(o,p,ncol=2)

q<-ggplot(employees.df,aes(MaritalStatus,fill=Attrition))+geom_bar()
r<-ggplot(employees.df,aes(MonthlyIncome,fill=Attrition))+geom_density()
grid.arrange(q,r,ncol=2)

s<-ggplot(employees.df,aes(MonthlyRate,fill=Attrition))+geom_bar()
t<-ggplot(employees.df,aes(NumCompaniesWorked,fill=Attrition))+geom_bar()
grid.arrange(s,t,ncol=2)

u<-ggplot(employees.df,aes(Over18,fill=Attrition))+geom_bar()
v<-ggplot(employees.df,aes(OverTime,fill=Attrition))+geom_bar()
grid.arrange(u,v,ncol=2)

w<-ggplot(employees.df,aes(PercentSalaryHike,fill=Attrition))+geom_bar()
x<-ggplot(employees.df,aes(PerformanceRating,fill=Attrition))+geom_bar()
grid.arrange(w,x,ncol=2)

y<-ggplot(employees.df,aes(RelationshipSatisfaction,fill=Attrition))+geom_bar()
z<-ggplot(employees.df,aes(StandardHours,fill=Attrition))+geom_bar()
grid.arrange(w,z,ncol=2)

Employees whose percent salary hike is lesser usually leave.

a1<-ggplot(employees.df,aes(StockOptionLevel,fill=Attrition))+geom_bar()
a2<-ggplot(employees.df,aes(TotalWorkingYears,fill=Attrition))+geom_bar()
grid.arrange(a1,a2,ncol=2)

a3<-ggplot(employees.df,aes(TrainingTimesLastYear,fill=Attrition))+geom_bar()
a4<-ggplot(employees.df,aes(WorkLifeBalance,fill=Attrition))+geom_bar()
grid.arrange(a3,a4,ncol=2)

a5<-ggplot(employees.df,aes(YearsAtCompany,fill=Attrition))+geom_bar()
a6<-ggplot(employees.df,aes(YearsInCurrentRole,fill=Attrition))+geom_bar()
grid.arrange(a5,a6,ncol=2)

a7<-ggplot(employees.df,aes(YearsSinceLastPromotion,fill=Attrition))+geom_bar()
a8<-ggplot(employees.df,aes(YearsWithCurrManager,fill=Attrition))+geom_bar()
grid.arrange(a7,a8,ncol=2)

A large proportion of employees having no experience with current manager leave.