1. Reading the dataset

setwd("C:/Users/CJ With HP/Desktop/IIM Lucknow/Datasets")
att.df <- read.csv(paste("WA_Fn-UseC_-HR-Employee-Attrition.csv",sep = ""))
names(att.df)[1]<-"Age"
View(att.df)
attach(att.df)

2. Creating a descriptive analysis for each variable

str(att.df)
## 'data.frame':    1470 obs. of  35 variables:
##  $ Age                     : int  41 49 37 33 27 32 59 30 38 36 ...
##  $ Attrition               : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ BusinessTravel          : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 2 3 2 3 2 3 3 2 3 ...
##  $ DailyRate               : int  1102 279 1373 1392 591 1005 1324 1358 216 1299 ...
##  $ Department              : Factor w/ 3 levels "Human Resources",..: 3 2 2 2 2 2 2 2 2 2 ...
##  $ DistanceFromHome        : int  1 8 2 3 2 2 3 24 23 27 ...
##  $ Education               : int  2 1 2 4 1 2 3 1 3 3 ...
##  $ EducationField          : Factor w/ 6 levels "Human Resources",..: 2 2 5 2 4 2 4 2 2 4 ...
##  $ EmployeeCount           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ EmployeeNumber          : int  1 2 4 5 7 8 10 11 12 13 ...
##  $ EnvironmentSatisfaction : int  2 3 4 4 1 4 3 4 4 3 ...
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 2 2 1 2 2 1 2 2 2 ...
##  $ HourlyRate              : int  94 61 92 56 40 79 81 67 44 94 ...
##  $ JobInvolvement          : int  3 2 2 3 3 3 4 3 2 3 ...
##  $ JobLevel                : int  2 2 1 1 1 1 1 1 3 2 ...
##  $ JobRole                 : Factor w/ 9 levels "Healthcare Representative",..: 8 7 3 7 3 3 3 3 5 1 ...
##  $ JobSatisfaction         : int  4 2 3 3 2 4 1 3 3 3 ...
##  $ MaritalStatus           : Factor w/ 3 levels "Divorced","Married",..: 3 2 3 2 2 3 2 1 3 2 ...
##  $ MonthlyIncome           : int  5993 5130 2090 2909 3468 3068 2670 2693 9526 5237 ...
##  $ MonthlyRate             : int  19479 24907 2396 23159 16632 11864 9964 13335 8787 16577 ...
##  $ NumCompaniesWorked      : int  8 1 6 1 9 0 4 1 0 6 ...
##  $ Over18                  : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
##  $ OverTime                : Factor w/ 2 levels "No","Yes": 2 1 2 2 1 1 2 1 1 1 ...
##  $ PercentSalaryHike       : int  11 23 15 11 12 13 20 22 21 13 ...
##  $ PerformanceRating       : int  3 4 3 3 3 3 4 4 4 3 ...
##  $ RelationshipSatisfaction: int  1 4 2 3 4 3 1 2 2 2 ...
##  $ StandardHours           : int  80 80 80 80 80 80 80 80 80 80 ...
##  $ StockOptionLevel        : int  0 1 0 0 1 0 3 1 0 2 ...
##  $ TotalWorkingYears       : int  8 10 7 8 6 8 12 1 10 17 ...
##  $ TrainingTimesLastYear   : int  0 3 3 3 3 2 3 2 2 3 ...
##  $ WorkLifeBalance         : int  1 3 3 3 3 2 2 3 3 2 ...
##  $ YearsAtCompany          : int  6 10 0 8 2 7 1 1 9 7 ...
##  $ YearsInCurrentRole      : int  4 7 0 7 2 7 0 0 7 7 ...
##  $ YearsSinceLastPromotion : int  0 1 0 3 2 3 0 0 1 7 ...
##  $ YearsWithCurrManager    : int  5 7 0 0 2 6 0 0 8 7 ...
library(psych)
describe(att.df)
##                          vars    n     mean      sd  median  trimmed
## Age                         1 1470    36.92    9.14    36.0    36.47
## Attrition*                  2 1470     1.16    0.37     1.0     1.08
## BusinessTravel*             3 1470     2.61    0.67     3.0     2.76
## DailyRate                   4 1470   802.49  403.51   802.0   803.83
## Department*                 5 1470     2.26    0.53     2.0     2.25
## DistanceFromHome            6 1470     9.19    8.11     7.0     8.08
## Education                   7 1470     2.91    1.02     3.0     2.98
## EducationField*             8 1470     3.25    1.33     3.0     3.10
## EmployeeCount               9 1470     1.00    0.00     1.0     1.00
## EmployeeNumber             10 1470  1024.87  602.02  1020.5  1023.40
## EnvironmentSatisfaction    11 1470     2.72    1.09     3.0     2.78
## Gender*                    12 1470     1.60    0.49     2.0     1.62
## HourlyRate                 13 1470    65.89   20.33    66.0    66.02
## JobInvolvement             14 1470     2.73    0.71     3.0     2.74
## JobLevel                   15 1470     2.06    1.11     2.0     1.90
## JobRole*                   16 1470     5.46    2.46     6.0     5.61
## JobSatisfaction            17 1470     2.73    1.10     3.0     2.79
## MaritalStatus*             18 1470     2.10    0.73     2.0     2.12
## MonthlyIncome              19 1470  6502.93 4707.96  4919.0  5667.24
## MonthlyRate                20 1470 14313.10 7117.79 14235.5 14286.48
## NumCompaniesWorked         21 1470     2.69    2.50     2.0     2.36
## Over18*                    22 1470     1.00    0.00     1.0     1.00
## OverTime*                  23 1470     1.28    0.45     1.0     1.23
## PercentSalaryHike          24 1470    15.21    3.66    14.0    14.80
## PerformanceRating          25 1470     3.15    0.36     3.0     3.07
## RelationshipSatisfaction   26 1470     2.71    1.08     3.0     2.77
## StandardHours              27 1470    80.00    0.00    80.0    80.00
## StockOptionLevel           28 1470     0.79    0.85     1.0     0.67
## TotalWorkingYears          29 1470    11.28    7.78    10.0    10.37
## TrainingTimesLastYear      30 1470     2.80    1.29     3.0     2.72
## WorkLifeBalance            31 1470     2.76    0.71     3.0     2.77
## YearsAtCompany             32 1470     7.01    6.13     5.0     5.99
## YearsInCurrentRole         33 1470     4.23    3.62     3.0     3.85
## YearsSinceLastPromotion    34 1470     2.19    3.22     1.0     1.48
## YearsWithCurrManager       35 1470     4.12    3.57     3.0     3.77
##                              mad  min   max range  skew kurtosis     se
## Age                         8.90   18    60    42  0.41    -0.41   0.24
## Attrition*                  0.00    1     2     1  1.84     1.39   0.01
## BusinessTravel*             0.00    1     3     2 -1.44     0.69   0.02
## DailyRate                 510.01  102  1499  1397  0.00    -1.21  10.52
## Department*                 0.00    1     3     2  0.17    -0.40   0.01
## DistanceFromHome            7.41    1    29    28  0.96    -0.23   0.21
## Education                   1.48    1     5     4 -0.29    -0.56   0.03
## EducationField*             1.48    1     6     5  0.55    -0.69   0.03
## EmployeeCount               0.00    1     1     0   NaN      NaN   0.00
## EmployeeNumber            790.97    1  2068  2067  0.02    -1.23  15.70
## EnvironmentSatisfaction     1.48    1     4     3 -0.32    -1.20   0.03
## Gender*                     0.00    1     2     1 -0.41    -1.83   0.01
## HourlyRate                 26.69   30   100    70 -0.03    -1.20   0.53
## JobInvolvement              0.00    1     4     3 -0.50     0.26   0.02
## JobLevel                    1.48    1     5     4  1.02     0.39   0.03
## JobRole*                    2.97    1     9     8 -0.36    -1.20   0.06
## JobSatisfaction             1.48    1     4     3 -0.33    -1.22   0.03
## MaritalStatus*              1.48    1     3     2 -0.15    -1.12   0.02
## MonthlyIncome            3260.24 1009 19999 18990  1.37     0.99 122.79
## MonthlyRate              9201.76 2094 26999 24905  0.02    -1.22 185.65
## NumCompaniesWorked          1.48    0     9     9  1.02     0.00   0.07
## Over18*                     0.00    1     1     0   NaN      NaN   0.00
## OverTime*                   0.00    1     2     1  0.96    -1.07   0.01
## PercentSalaryHike           2.97   11    25    14  0.82    -0.31   0.10
## PerformanceRating           0.00    3     4     1  1.92     1.68   0.01
## RelationshipSatisfaction    1.48    1     4     3 -0.30    -1.19   0.03
## StandardHours               0.00   80    80     0   NaN      NaN   0.00
## StockOptionLevel            1.48    0     3     3  0.97     0.35   0.02
## TotalWorkingYears           5.93    0    40    40  1.11     0.91   0.20
## TrainingTimesLastYear       1.48    0     6     6  0.55     0.48   0.03
## WorkLifeBalance             0.00    1     4     3 -0.55     0.41   0.02
## YearsAtCompany              4.45    0    40    40  1.76     3.91   0.16
## YearsInCurrentRole          4.45    0    18    18  0.92     0.47   0.09
## YearsSinceLastPromotion     1.48    0    15    15  1.98     3.59   0.08
## YearsWithCurrManager        4.45    0    17    17  0.83     0.16   0.09

3. Creating a one way contingency table for all the categorical variables in the dataset

table(Attrition)
## Attrition
##   No  Yes 
## 1233  237
table(BusinessTravel)
## BusinessTravel
##        Non-Travel Travel_Frequently     Travel_Rarely 
##               150               277              1043
table(Department)
## Department
##        Human Resources Research & Development                  Sales 
##                     63                    961                    446
table(EducationField)
## EducationField
##  Human Resources    Life Sciences        Marketing          Medical 
##               27              606              159              464 
##            Other Technical Degree 
##               82              132
table(Education)
## Education
##   1   2   3   4   5 
## 170 282 572 398  48
table(EnvironmentSatisfaction)
## EnvironmentSatisfaction
##   1   2   3   4 
## 284 287 453 446
table(Gender)
## Gender
## Female   Male 
##    588    882
table(JobInvolvement)
## JobInvolvement
##   1   2   3   4 
##  83 375 868 144
table(JobRole)
## JobRole
## Healthcare Representative           Human Resources 
##                       131                        52 
##     Laboratory Technician                   Manager 
##                       259                       102 
##    Manufacturing Director         Research Director 
##                       145                        80 
##        Research Scientist           Sales Executive 
##                       292                       326 
##      Sales Representative 
##                        83
table(JobLevel)
## JobLevel
##   1   2   3   4   5 
## 543 534 218 106  69
table(JobSatisfaction)
## JobSatisfaction
##   1   2   3   4 
## 289 280 442 459
table(MaritalStatus)
## MaritalStatus
## Divorced  Married   Single 
##      327      673      470
table(OverTime)
## OverTime
##   No  Yes 
## 1054  416

3. Creating a two way contingency table for all the categorical variables in the dataset

mytable<- xtabs(~PerformanceRating+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
##                  Attrition
## PerformanceRating    No   Yes
##                 3 83.92 16.08
##                 4 83.63 16.37
mytable<- xtabs(~OverTime+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
##         Attrition
## OverTime    No   Yes
##      No  89.56 10.44
##      Yes 69.47 30.53
mytable<- xtabs(~WorkLifeBalance+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                Attrition
## WorkLifeBalance    No   Yes
##               1 68.75 31.25
##               2 83.14 16.86
##               3 85.78 14.22
##               4 82.35 17.65
mytable<- xtabs(~JobRole+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                            Attrition
## JobRole                        No   Yes
##   Healthcare Representative 93.13  6.87
##   Human Resources           76.92 23.08
##   Laboratory Technician     76.06 23.94
##   Manager                   95.10  4.90
##   Manufacturing Director    93.10  6.90
##   Research Director         97.50  2.50
##   Research Scientist        83.90 16.10
##   Sales Executive           82.52 17.48
##   Sales Representative      60.24 39.76
mytable<- xtabs(~NumCompaniesWorked+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
##                   Attrition
## NumCompaniesWorked    No   Yes
##                  0 88.32 11.68
##                  1 81.19 18.81
##                  2 89.04 10.96
##                  3 89.94 10.06
##                  4 87.77 12.23
##                  5 74.60 25.40
##                  6 77.14 22.86
##                  7 77.03 22.97
##                  8 87.76 12.24
##                  9 76.92 23.08
mytable<- xtabs(~MaritalStatus+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2) 
##              Attrition
## MaritalStatus    No   Yes
##      Divorced 89.91 10.09
##      Married  87.52 12.48
##      Single   74.47 25.53
mytable<- xtabs(~Gender+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##         Attrition
## Gender      No   Yes
##   Female 85.20 14.80
##   Male   82.99 17.01
mytable<- xtabs(~EnvironmentSatisfaction+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                        Attrition
## EnvironmentSatisfaction    No   Yes
##                       1 74.65 25.35
##                       2 85.02 14.98
##                       3 86.31 13.69
##                       4 86.55 13.45
mytable<- xtabs(~BusinessTravel+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                    Attrition
## BusinessTravel         No   Yes
##   Non-Travel        92.00  8.00
##   Travel_Frequently 75.09 24.91
##   Travel_Rarely     85.04 14.96
mytable<- xtabs(~EducationField+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                   Attrition
## EducationField        No   Yes
##   Human Resources  74.07 25.93
##   Life Sciences    85.31 14.69
##   Marketing        77.99 22.01
##   Medical          86.42 13.58
##   Other            86.59 13.41
##   Technical Degree 75.76 24.24
mytable<- xtabs(~JobSatisfaction+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
##                Attrition
## JobSatisfaction    No   Yes
##               1 77.16 22.84
##               2 83.57 16.43
##               3 83.48 16.52
##               4 88.67 11.33
mytable<- xtabs(~Education+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##          Attrition
## Education    No   Yes
##         1 81.76 18.24
##         2 84.40 15.60
##         3 82.69 17.31
##         4 85.43 14.57
##         5 89.58 10.42

5. Boxplots

boxplot(Age~Attrition,main="Boxplot",xlab="Age",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(DistanceFromHome~Attrition,main="Boxplot",xlab="DistanceFromHome",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(MonthlyIncome~Attrition,main="Boxplot",xlab="MonthlyIncome",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(YearsWithCurrManager~Attrition,main="Boxplot",xlab="YearsWithCurManager",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(MonthlyRate~Attrition,main="Boxplot",xlab="MonthlyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(DailyRate~Attrition,main="Boxplot",xlab="DailyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

boxplot(HourlyRate~Attrition,main="Boxplot",xlab="HourlyRate",ylab = "Attrition(Yes/No)",horizontal=TRUE,col=c("pink","lightblue"))

6.Histograms

hist(Age,xlab="age",ylab="count",breaks=20,main="Age variability in the company",col="lightblue",freq=FALSE)

hist(MonthlyIncome,xlab="MonthlyIncome",ylab="count",breaks=20,main="MonthlyIncome",col="lightblue",ylim=c(0,400))

hist(YearsAtCompany,xlab="YearsAtCompany",ylab="count",breaks=20,main="YearsAtcompany",col="lightblue",ylim=c(0,400))

hist(YearsWithCurrManager,xlab="YearswithCurManager",ylab="count",breaks=20,main="YearsWithCurManager",col="lightblue",ylim=c(0,400))

hist(PercentSalaryHike,xlab="PercentSalaryHike",ylab="count",breaks=20,main="PercentSalaryHike",col="lightblue")

library(lattice)
histogram(~Attrition|JobRole) 

histogram(~Attrition|Department,layout=c(4,1),col=c("lightblue","pink")) 

histogram(~PercentSalaryHike|Attrition) 

histogram(~Education|Attrition) 

7. CorrelationMatrix

round(cor(att.df[,c(1,4,6,7,11,13,14,15,17,19,20,21,24,25,26,29:35)]),2)
##                            Age DailyRate DistanceFromHome Education
## Age                       1.00      0.01             0.00      0.21
## DailyRate                 0.01      1.00             0.00     -0.02
## DistanceFromHome          0.00      0.00             1.00      0.02
## Education                 0.21     -0.02             0.02      1.00
## EnvironmentSatisfaction   0.01      0.02            -0.02     -0.03
## HourlyRate                0.02      0.02             0.03      0.02
## JobInvolvement            0.03      0.05             0.01      0.04
## JobLevel                  0.51      0.00             0.01      0.10
## JobSatisfaction           0.00      0.03             0.00     -0.01
## MonthlyIncome             0.50      0.01            -0.02      0.09
## MonthlyRate               0.03     -0.03             0.03     -0.03
## NumCompaniesWorked        0.30      0.04            -0.03      0.13
## PercentSalaryHike         0.00      0.02             0.04     -0.01
## PerformanceRating         0.00      0.00             0.03     -0.02
## RelationshipSatisfaction  0.05      0.01             0.01     -0.01
## TotalWorkingYears         0.68      0.01             0.00      0.15
## TrainingTimesLastYear    -0.02      0.00            -0.04     -0.03
## WorkLifeBalance          -0.02     -0.04            -0.03      0.01
## YearsAtCompany            0.31     -0.03             0.01      0.07
## YearsInCurrentRole        0.21      0.01             0.02      0.06
## YearsSinceLastPromotion   0.22     -0.03             0.01      0.05
## YearsWithCurrManager      0.20     -0.03             0.01      0.07
##                          EnvironmentSatisfaction HourlyRate JobInvolvement
## Age                                         0.01       0.02           0.03
## DailyRate                                   0.02       0.02           0.05
## DistanceFromHome                           -0.02       0.03           0.01
## Education                                  -0.03       0.02           0.04
## EnvironmentSatisfaction                     1.00      -0.05          -0.01
## HourlyRate                                 -0.05       1.00           0.04
## JobInvolvement                             -0.01       0.04           1.00
## JobLevel                                    0.00      -0.03          -0.01
## JobSatisfaction                            -0.01      -0.07          -0.02
## MonthlyIncome                              -0.01      -0.02          -0.02
## MonthlyRate                                 0.04      -0.02          -0.02
## NumCompaniesWorked                          0.01       0.02           0.02
## PercentSalaryHike                          -0.03      -0.01          -0.02
## PerformanceRating                          -0.03       0.00          -0.03
## RelationshipSatisfaction                    0.01       0.00           0.03
## TotalWorkingYears                           0.00       0.00          -0.01
## TrainingTimesLastYear                      -0.02      -0.01          -0.02
## WorkLifeBalance                             0.03       0.00          -0.01
## YearsAtCompany                              0.00      -0.02          -0.02
## YearsInCurrentRole                          0.02      -0.02           0.01
## YearsSinceLastPromotion                     0.02      -0.03          -0.02
## YearsWithCurrManager                        0.00      -0.02           0.03
##                          JobLevel JobSatisfaction MonthlyIncome
## Age                          0.51            0.00          0.50
## DailyRate                    0.00            0.03          0.01
## DistanceFromHome             0.01            0.00         -0.02
## Education                    0.10           -0.01          0.09
## EnvironmentSatisfaction      0.00           -0.01         -0.01
## HourlyRate                  -0.03           -0.07         -0.02
## JobInvolvement              -0.01           -0.02         -0.02
## JobLevel                     1.00            0.00          0.95
## JobSatisfaction              0.00            1.00         -0.01
## MonthlyIncome                0.95           -0.01          1.00
## MonthlyRate                  0.04            0.00          0.03
## NumCompaniesWorked           0.14           -0.06          0.15
## PercentSalaryHike           -0.03            0.02         -0.03
## PerformanceRating           -0.02            0.00         -0.02
## RelationshipSatisfaction     0.02           -0.01          0.03
## TotalWorkingYears            0.78           -0.02          0.77
## TrainingTimesLastYear       -0.02           -0.01         -0.02
## WorkLifeBalance              0.04           -0.02          0.03
## YearsAtCompany               0.53            0.00          0.51
## YearsInCurrentRole           0.39            0.00          0.36
## YearsSinceLastPromotion      0.35           -0.02          0.34
## YearsWithCurrManager         0.38           -0.03          0.34
##                          MonthlyRate NumCompaniesWorked PercentSalaryHike
## Age                             0.03               0.30              0.00
## DailyRate                      -0.03               0.04              0.02
## DistanceFromHome                0.03              -0.03              0.04
## Education                      -0.03               0.13             -0.01
## EnvironmentSatisfaction         0.04               0.01             -0.03
## HourlyRate                     -0.02               0.02             -0.01
## JobInvolvement                 -0.02               0.02             -0.02
## JobLevel                        0.04               0.14             -0.03
## JobSatisfaction                 0.00              -0.06              0.02
## MonthlyIncome                   0.03               0.15             -0.03
## MonthlyRate                     1.00               0.02             -0.01
## NumCompaniesWorked              0.02               1.00             -0.01
## PercentSalaryHike              -0.01              -0.01              1.00
## PerformanceRating              -0.01              -0.01              0.77
## RelationshipSatisfaction        0.00               0.05             -0.04
## TotalWorkingYears               0.03               0.24             -0.02
## TrainingTimesLastYear           0.00              -0.07             -0.01
## WorkLifeBalance                 0.01              -0.01              0.00
## YearsAtCompany                 -0.02              -0.12             -0.04
## YearsInCurrentRole             -0.01              -0.09              0.00
## YearsSinceLastPromotion         0.00              -0.04             -0.02
## YearsWithCurrManager           -0.04              -0.11             -0.01
##                          PerformanceRating RelationshipSatisfaction
## Age                                   0.00                     0.05
## DailyRate                             0.00                     0.01
## DistanceFromHome                      0.03                     0.01
## Education                            -0.02                    -0.01
## EnvironmentSatisfaction              -0.03                     0.01
## HourlyRate                            0.00                     0.00
## JobInvolvement                       -0.03                     0.03
## JobLevel                             -0.02                     0.02
## JobSatisfaction                       0.00                    -0.01
## MonthlyIncome                        -0.02                     0.03
## MonthlyRate                          -0.01                     0.00
## NumCompaniesWorked                   -0.01                     0.05
## PercentSalaryHike                     0.77                    -0.04
## PerformanceRating                     1.00                    -0.03
## RelationshipSatisfaction             -0.03                     1.00
## TotalWorkingYears                     0.01                     0.02
## TrainingTimesLastYear                -0.02                     0.00
## WorkLifeBalance                       0.00                     0.02
## YearsAtCompany                        0.00                     0.02
## YearsInCurrentRole                    0.03                    -0.02
## YearsSinceLastPromotion               0.02                     0.03
## YearsWithCurrManager                  0.02                     0.00
##                          TotalWorkingYears TrainingTimesLastYear
## Age                                   0.68                 -0.02
## DailyRate                             0.01                  0.00
## DistanceFromHome                      0.00                 -0.04
## Education                             0.15                 -0.03
## EnvironmentSatisfaction               0.00                 -0.02
## HourlyRate                            0.00                 -0.01
## JobInvolvement                       -0.01                 -0.02
## JobLevel                              0.78                 -0.02
## JobSatisfaction                      -0.02                 -0.01
## MonthlyIncome                         0.77                 -0.02
## MonthlyRate                           0.03                  0.00
## NumCompaniesWorked                    0.24                 -0.07
## PercentSalaryHike                    -0.02                 -0.01
## PerformanceRating                     0.01                 -0.02
## RelationshipSatisfaction              0.02                  0.00
## TotalWorkingYears                     1.00                 -0.04
## TrainingTimesLastYear                -0.04                  1.00
## WorkLifeBalance                       0.00                  0.03
## YearsAtCompany                        0.63                  0.00
## YearsInCurrentRole                    0.46                 -0.01
## YearsSinceLastPromotion               0.40                  0.00
## YearsWithCurrManager                  0.46                  0.00
##                          WorkLifeBalance YearsAtCompany YearsInCurrentRole
## Age                                -0.02           0.31               0.21
## DailyRate                          -0.04          -0.03               0.01
## DistanceFromHome                   -0.03           0.01               0.02
## Education                           0.01           0.07               0.06
## EnvironmentSatisfaction             0.03           0.00               0.02
## HourlyRate                          0.00          -0.02              -0.02
## JobInvolvement                     -0.01          -0.02               0.01
## JobLevel                            0.04           0.53               0.39
## JobSatisfaction                    -0.02           0.00               0.00
## MonthlyIncome                       0.03           0.51               0.36
## MonthlyRate                         0.01          -0.02              -0.01
## NumCompaniesWorked                 -0.01          -0.12              -0.09
## PercentSalaryHike                   0.00          -0.04               0.00
## PerformanceRating                   0.00           0.00               0.03
## RelationshipSatisfaction            0.02           0.02              -0.02
## TotalWorkingYears                   0.00           0.63               0.46
## TrainingTimesLastYear               0.03           0.00              -0.01
## WorkLifeBalance                     1.00           0.01               0.05
## YearsAtCompany                      0.01           1.00               0.76
## YearsInCurrentRole                  0.05           0.76               1.00
## YearsSinceLastPromotion             0.01           0.62               0.55
## YearsWithCurrManager                0.00           0.77               0.71
##                          YearsSinceLastPromotion YearsWithCurrManager
## Age                                         0.22                 0.20
## DailyRate                                  -0.03                -0.03
## DistanceFromHome                            0.01                 0.01
## Education                                   0.05                 0.07
## EnvironmentSatisfaction                     0.02                 0.00
## HourlyRate                                 -0.03                -0.02
## JobInvolvement                             -0.02                 0.03
## JobLevel                                    0.35                 0.38
## JobSatisfaction                            -0.02                -0.03
## MonthlyIncome                               0.34                 0.34
## MonthlyRate                                 0.00                -0.04
## NumCompaniesWorked                         -0.04                -0.11
## PercentSalaryHike                          -0.02                -0.01
## PerformanceRating                           0.02                 0.02
## RelationshipSatisfaction                    0.03                 0.00
## TotalWorkingYears                           0.40                 0.46
## TrainingTimesLastYear                       0.00                 0.00
## WorkLifeBalance                             0.01                 0.00
## YearsAtCompany                              0.62                 0.77
## YearsInCurrentRole                          0.55                 0.71
## YearsSinceLastPromotion                     1.00                 0.51
## YearsWithCurrManager                        0.51                 1.00

8. Creating a corrgram

library(corrplot)
## corrplot 0.84 loaded
corrplot(corr=cor(att.df[,c(1,4,6,7,11,13,14,15,17,19,20,21,24,25,26,29:35)],use="complete.obs"),method="ellipse")

9. Checking hypothesis using t-test

H1. To check whether there is a significant difference in the means of the monthly income of the employees who leave and those who don’t

log.trans.Income = log(MonthlyIncome)
t.test(log.trans.Income~Attrition,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  log.trans.Income by Attrition
## t = 7.7481, df = 1468, p-value = 1.73e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2673770 0.4486555
## sample estimates:
##  mean in group No mean in group Yes 
##          8.610236          8.252220

H2. To check whether there is a significant difference in the means of the monthly rate of the employees who leave and those who don’t

log.trans.Rate = log(MonthlyRate)
t.test(log.trans.Rate~Attrition,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  log.trans.Rate by Attrition
## t = -0.47592, df = 1468, p-value = 0.6342
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.10954526  0.06676779
## sample estimates:
##  mean in group No mean in group Yes 
##          9.398882          9.420271

H3. To check whether there is a significant difference in the means of the daily rate of the employees who leave and those who don’t

log.trans.DailyRate = log(DailyRate)
t.test(log.trans.DailyRate~Attrition,var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  log.trans.DailyRate by Attrition
## t = 1.9013, df = 1468, p-value = 0.05746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002824123  0.180830501
## sample estimates:
##  mean in group No mean in group Yes 
##          6.525604          6.436601

10. Chi-square test

mytable<- xtabs(~BusinessTravel+Attrition,data=att.df) 
round(prop.table(mytable,1)*100,2)
##                    Attrition
## BusinessTravel         No   Yes
##   Non-Travel        92.00  8.00
##   Travel_Frequently 75.09 24.91
##   Travel_Rarely     85.04 14.96
chisq.test(mytable)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 24.182, df = 2, p-value = 5.609e-06
mytable<- xtabs(~JobSatisfaction+Attrition,data=att.df)
round(prop.table(mytable,1)*100,2)
##                Attrition
## JobSatisfaction    No   Yes
##               1 77.16 22.84
##               2 83.57 16.43
##               3 83.48 16.52
##               4 88.67 11.33
chisq.test(mytable) 
## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 17.505, df = 3, p-value = 0.0005563
library(vcd)
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'att.df':
## 
##     JobSatisfaction
fisher.test(mytable)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  mytable
## p-value = 0.0005767
## alternative hypothesis: two.sided
assocstats(mytable)
##                     X^2 df   P(> X^2)
## Likelihood Ratio 17.356  3 0.00059691
## Pearson          17.505  3 0.00055630
## 
## Phi-Coefficient   : NA 
## Contingency Coeff.: 0.108 
## Cramer's V        : 0.109