This analysis is submitted as the Capstone Project work done for the internship pursued under the guidance of Prof.Sameer Mathur for duration Jan 1,2018-Jan 27,2018.
monthinc.df <- read.csv(paste("Monthly Income Project_data.csv", sep= ""))
View(monthinc.df)
dim(monthinc.df)
## [1] 700 14
library(psych)
describe(monthinc.df)
table(monthinc.df$Gender)
##
## Female Male
## 286 414
counts_gen <-table(monthinc.df$Gender)
barplot(counts_gen, width=1, space=1, main = "Gender Distribution", xlab="Gender",col=c( "pink","navy"),
ylim=c(0,450), xlim=c(0,10), names.arg=c("Female","Male"))
Axis(side=1, labels=FALSE)
table(monthinc.df$Department)
##
## Human Resources Research & Development Sales
## 24 460 216
counts_dep <-table(monthinc.df$Department)
barplot(counts_dep, width=1, space=1, main = "Department Distribution", xlab="Department",
col=c("red","gold","purple"), ylim=c(0,500), xlim=c(0,10), names.arg=c("HR","R&D","Sales"))
Axis(side=1, labels=FALSE)
table(monthinc.df$BusinessTravel)
##
## Non-Travel Travel_Frequently Travel_Rarely
## 64 129 507
counts_bustr <-table(monthinc.df$BusinessTravel)
barplot(counts_bustr, width=1, space=1, main = "Business Travelling Status", xlab="Status",
col=c("wheat","green1","tomato"), ylim=c(0,500), xlim=c(0,10),
names.arg=c(" Don't Travel","Frequently","Rarely"))
Axis(side=1, labels=FALSE)
table(monthinc.df$MaritalStatus)
##
## Divorced Married Single
## 161 312 227
counts_marstat <-table(monthinc.df$MaritalStatus)
barplot(counts_marstat, width=1, space=1, main = "Marital Status", xlab="Status",
col=c("white","firebrick","skyblue"), ylim=c(0,500), xlim=c(0,10),
names.arg=c("Divorced","Married","Single"))
Axis(side=1, labels=FALSE)
table(monthinc.df$JobRole)
##
## Healthcare Representative Human Resources
## 58 19
## Laboratory Technician Manager
## 123 51
## Manufacturing Director Research Director
## 69 39
## Research Scientist Sales Executive
## 142 155
## Sales Representative
## 44
counts_jobr <-table(monthinc.df$JobRole)
barplot(counts_jobr, width=1, space=1, main = "Job Role", xlab="Role", ylim=c(0,380), xlim=c(0,20), xaxt="n", col=c("violet","ivory","blue","green","yellow","darkorange","red","cyan","magenta"))
legend("topleft", c("Health Rep.","HR","Lab. Tech.","Manager","Manufac. Director","Research Director", "Research Scientist","Sales Executive","Sales Representative"), fill=c("violet","ivory","blue","green", "yellow","darkorange","red","cyan","magenta"))
Axis(side=1, labels=FALSE)
two_way_tab <-xtabs(~ Gender + Department, data = monthinc.df)
addmargins(two_way_tab)
## Department
## Gender Human Resources Research & Development Sales Sum
## Female 8 185 93 286
## Male 16 275 123 414
## Sum 24 460 216 700
two_way_tab <-xtabs(~ JobRole + BusinessTravel, data = monthinc.df)
addmargins(two_way_tab)
## BusinessTravel
## JobRole Non-Travel Travel_Frequently Travel_Rarely Sum
## Healthcare Representative 7 12 39 58
## Human Resources 0 2 17 19
## Laboratory Technician 13 26 84 123
## Manager 6 7 38 51
## Manufacturing Director 7 12 50 69
## Research Director 2 5 32 39
## Research Scientist 7 23 112 142
## Sales Executive 21 30 104 155
## Sales Representative 1 12 31 44
## Sum 64 129 507 700
two_way_tab <-xtabs(~ Gender + MaritalStatus, data = monthinc.df)
addmargins(two_way_tab)
## MaritalStatus
## Gender Divorced Married Single Sum
## Female 61 122 103 286
## Male 100 190 124 414
## Sum 161 312 227 700
two_way_tab <-xtabs(~ Department + BusinessTravel, data = monthinc.df)
addmargins(two_way_tab)
## BusinessTravel
## Department Non-Travel Travel_Frequently Travel_Rarely Sum
## Human Resources 2 3 19 24
## Research & Development 39 83 338 460
## Sales 23 43 150 216
## Sum 64 129 507 700
three_way_tab <- xtabs(~ Department + JobRole + Gender, data = monthinc.df)
ftable(three_way_tab)
## Gender Female Male
## Department JobRole
## Human Resources Healthcare Representative 0 0
## Human Resources 6 13
## Laboratory Technician 0 0
## Manager 2 3
## Manufacturing Director 0 0
## Research Director 0 0
## Research Scientist 0 0
## Sales Executive 0 0
## Sales Representative 0 0
## Research & Development Healthcare Representative 27 31
## Human Resources 0 0
## Laboratory Technician 41 82
## Manager 12 17
## Manufacturing Director 32 37
## Research Director 15 24
## Research Scientist 58 84
## Sales Executive 0 0
## Sales Representative 0 0
## Sales Healthcare Representative 0 0
## Human Resources 0 0
## Laboratory Technician 0 0
## Manager 11 6
## Manufacturing Director 0 0
## Research Director 0 0
## Research Scientist 0 0
## Sales Executive 63 92
## Sales Representative 19 25
boxplot(monthinc.df$Age, horizontal = TRUE, main = "Box Plot for Age", xlab = "Age", col = "chocolate")
boxplot(monthinc.df$TotalWorkingYears, horizontal = TRUE, main = "Box Plot for Work Experience", xlab = "Years", col = "gold")
boxplot(monthinc.df$MonthlyIncome, horizontal = TRUE, main = "Box Plot for Monthly Income", xlab = "Amount", col = "red")
table(monthinc.df$PercentSalaryHike)
##
## 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 103 95 94 92 47 39 34 45 37 25 25 29 17 12 6
hist(monthinc.df$PercentSalaryHike,
main="Variation in Percentage hike in the Salary",
xlab="Percentage",
ylab="Count",
xlim=c(10,30), ylim=c(0,270),
breaks=5,
col=c("orange", "darkorange", "orangered", "orangered3", "red", "red3", "firebrick3", "firebrick"))
table(monthinc.df$YearsAtCompany)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 24 25
## 21 87 61 63 50 96 36 42 39 39 53 15 5 12 6 7 5 6 7 2 12 6 7 5 3
## 26 27 29 31 32 33 34 36 37 40
## 2 2 1 3 1 2 1 1 1 1
hist(monthinc.df$YearsAtCompany,
main="No. of years in the company",
xlab="Years",
ylab="Count",
xlim=c(0,40), ylim=c(0,300),
breaks=10,
col=c("lightblue", "lightblue3", "royalblue", "blue", "royalblue4", "blue3", "dark blue", "navy"))
round(cor(monthinc.df[,7:14]), 3)
## Age Education JobLevel NumCompaniesWorked
## Age 1.000 0.196 0.495 0.299
## Education 0.196 1.000 0.136 0.120
## JobLevel 0.495 0.136 1.000 0.121
## NumCompaniesWorked 0.299 0.120 0.121 1.000
## PercentSalaryHike -0.054 -0.030 -0.096 -0.004
## TotalWorkingYears 0.675 0.157 0.780 0.220
## YearsAtCompany 0.304 0.086 0.566 -0.137
## MonthlyIncome 0.484 0.134 0.951 0.123
## PercentSalaryHike TotalWorkingYears YearsAtCompany
## Age -0.054 0.675 0.304
## Education -0.030 0.157 0.086
## JobLevel -0.096 0.780 0.566
## NumCompaniesWorked -0.004 0.220 -0.137
## PercentSalaryHike 1.000 -0.096 -0.085
## TotalWorkingYears -0.096 1.000 0.640
## YearsAtCompany -0.085 0.640 1.000
## MonthlyIncome -0.093 0.763 0.543
## MonthlyIncome
## Age 0.484
## Education 0.134
## JobLevel 0.951
## NumCompaniesWorked 0.123
## PercentSalaryHike -0.093
## TotalWorkingYears 0.763
## YearsAtCompany 0.543
## MonthlyIncome 1.000
library(corrgram)
## Warning: replacing previous import by 'magrittr::%>%' when loading
## 'dendextend'
corrgram(monthinc.df, order=TRUE, lower.panel=panel.shade,upper.panel=panel.pie, text.panel=panel.txt,main="Corrgram for different variables")
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(~ Gender+ Department + MonthlyIncome + MaritalStatus + Age, data=monthinc.df,
main="Variation of Monthly Income with Gender, Department,
Marital Status and Age")
scatterplotMatrix(~ Education + JobLevel + MonthlyIncome + TotalWorkingYears + YearsAtCompany , data=monthinc.df, main="Variation of Monthly Income with Education, Job Level,
Total Work Experience and Years in the Company")
Considering the null hypothesis H0: There is no significant difference in the value of both means. H1: There is a significant difference in the value of both means.
t.test(monthinc.df$MonthlyIncome, monthinc.df$Age)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$Age
## t = 36.282, df = 699.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6037.062 6727.818
## sample estimates:
## mean of x mean of y
## 6419.27429 36.83429
-> Since p-value < 2.2e-16, so Age is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis
t.test(monthinc.df$MonthlyIncome, monthinc.df$Education)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$Education
## t = 36.475, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6071.017 6761.771
## sample estimates:
## mean of x mean of y
## 6419.274 2.880
-> Since p-value < 2.2e-16, so Education is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis
t.test(monthinc.df$MonthlyIncome, monthinc.df$JobLevel)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$JobLevel
## t = 36.48, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6071.850 6762.604
## sample estimates:
## mean of x mean of y
## 6419.274286 2.047143
-> Since p-value < 2.2e-16, so Job Level is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
t.test(monthinc.df$MonthlyIncome, monthinc.df$NumCompaniesWorked)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$NumCompaniesWorked
## t = 36.476, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6071.187 6761.941
## sample estimates:
## mean of x mean of y
## 6419.274 2.710
-> Since p-value < 2.2e-16, so Number of companies worked earlier is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
t.test(monthinc.df$MonthlyIncome, monthinc.df$PercentSalaryHike)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$PercentSalaryHike
## t = 36.405, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6058.619 6749.373
## sample estimates:
## mean of x mean of y
## 6419.27429 15.27857
-> Since p-value < 2.2e-16, so Percentage Hike in salary is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
t.test(monthinc.df$MonthlyIncome, monthinc.df$TotalWorkingYears)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$TotalWorkingYears
## t = 36.428, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6062.698 6753.453
## sample estimates:
## mean of x mean of y
## 6419.27429 11.19857
-> Since p-value < 2.2e-16, so Total Work Experience is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
t.test(monthinc.df$MonthlyIncome, monthinc.df$YearsAtCompany)
##
## Welch Two Sample t-test
##
## data: monthinc.df$MonthlyIncome and monthinc.df$YearsAtCompany
## t = 36.452, df = 699, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6066.918 6757.673
## sample estimates:
## mean of x mean of y
## 6419.274286 6.978571
-> Since p-value < 2.2e-16, so Total Work Experience is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
# Converting Gender into factor variable
monthinc.df$Gender[monthinc.df$Res == 0] <- 'Female'
monthinc.df$Gender[monthinc.df$Res == 1] <- 'Male'
monthinc.df$Gender<- factor(monthinc.df$Gender)
# Converting Department into factor variable
monthinc.df$Department[monthinc.df$Res == 1] <- 'Human Resources'
monthinc.df$Department[monthinc.df$Res == 2] <- 'Research & Development'
monthinc.df$Department[monthinc.df$Res == 3] <- 'Sales'
monthinc.df$Department<- factor(monthinc.df$Department)
# Converting Business Travel Status into factor variable
monthinc.df$BusinessTravel[monthinc.df$Res == 0] <- 'Non-Travel'
monthinc.df$BusinessTravel[monthinc.df$Res == 1] <- 'Travel_Frequently'
monthinc.df$BusinessTravel[monthinc.df$Res == 2] <- 'Travel_Rarely'
monthinc.df$BusinessTravel<- factor(monthinc.df$BusinessTravel)
# Converting Marital Status into factor variable
monthinc.df$MaritalStatus[monthinc.df$Res == 0] <- 'Divorced'
monthinc.df$MaritalStatus[monthinc.df$Res == 1] <- 'Married'
monthinc.df$MaritalStatus[monthinc.df$Res == 2] <- 'Single'
monthinc.df$MaritalStatus<- factor(monthinc.df$MaritalStatus)
# Converting Job Role into factor variable
monthinc.df$JobRole[monthinc.df$Res == 1] <- 'Healthcare Representative'
monthinc.df$JobRole[monthinc.df$Res == 2] <- 'Human Resources'
monthinc.df$JobRole[monthinc.df$Res == 3] <- 'Laboratory Technician'
monthinc.df$JobRole[monthinc.df$Res == 4] <- 'Manager'
monthinc.df$JobRole[monthinc.df$Res == 5] <- 'Manufacturing Director'
monthinc.df$JobRole[monthinc.df$Res == 6] <- 'Research Director'
monthinc.df$JobRole[monthinc.df$Res == 7] <- 'Research Scientist'
monthinc.df$JobRole[monthinc.df$Res == 8] <- 'Sales Executive'
monthinc.df$JobRole[monthinc.df$Res == 9] <- 'Sales Representative'
monthinc.df$JobRole<- factor(monthinc.df$JobRole)
# Adding MonthlyIncome_Range variable
monthinc.df$MonthlyIncome_Range<-cut(monthinc.df$MonthlyIncome, seq(0,20000,4000), right=FALSE, labels=c(1:5))
chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df$Gender)
##
## Pearson's Chi-squared test
##
## data: monthinc.df$MonthlyIncome_Range and monthinc.df$Gender
## X-squared = 9.9068, df = 4, p-value = 0.04203
-> Since p-value = 0.04203, so Gender is a significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df$Department)
## Warning in chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df
## $Department): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: monthinc.df$MonthlyIncome_Range and monthinc.df$Department
## X-squared = 63.027, df = 8, p-value = 1.183e-10
-> Since p-value = 1.183e-10, so Department is a very significant contributor in estimating Monthly Income. Hence we reject the null hypothesis.
chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df$BusinessTravel)
## Warning in chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df
## $BusinessTravel): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: monthinc.df$MonthlyIncome_Range and monthinc.df$BusinessTravel
## X-squared = 14.648, df = 8, p-value = 0.06636
-> Since p-value = 0.06636, so Business Travel Status is not a significant contributor in estimating Monthly Income. Hence we accept the null hypothesis.
chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df$MaritalStatus)
##
## Pearson's Chi-squared test
##
## data: monthinc.df$MonthlyIncome_Range and monthinc.df$MaritalStatus
## X-squared = 8.6358, df = 8, p-value = 0.3739
-> Since p-value = 0.3739, so Marital Status is not a significant contributor in estimating Monthly Income. Hence we accept the null hypothesis.
chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df$JobRole)
## Warning in chisq.test(monthinc.df$MonthlyIncome_Range, monthinc.df
## $JobRole): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: monthinc.df$MonthlyIncome_Range and monthinc.df$JobRole
## X-squared = 1076, df = 32, p-value < 2.2e-16
-> Since p-value < 2.2e-16, so Job Role is a very significant contributor in estimating Monthly Income. Hence we accept the null hypothesis.
# Model 1
regress1 <- lm(MonthlyIncome ~ Gender + Department + BusinessTravel + MaritalStatus + JobRole + Age + Education + JobLevel + NumCompaniesWorked + PercentSalaryHike + TotalWorkingYears + YearsAtCompany , data = monthinc.df)
summary(regress1)
##
## Call:
## lm(formula = MonthlyIncome ~ Gender + Department + BusinessTravel +
## MaritalStatus + JobRole + Age + Education + JobLevel + NumCompaniesWorked +
## PercentSalaryHike + TotalWorkingYears + YearsAtCompany, data = monthinc.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3650.8 -640.1 39.5 668.0 4086.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -300.8557 668.5264 -0.450 0.65283
## GenderMale 88.5381 84.9566 1.042 0.29771
## DepartmentResearch & Development 38.5753 529.7776 0.073 0.94198
## DepartmentSales -323.8944 559.7732 -0.579 0.56304
## BusinessTravelTravel_Frequently 448.5799 169.7570 2.642 0.00842 **
## BusinessTravelTravel_Rarely 275.4782 147.7588 1.864 0.06270 .
## MaritalStatusMarried 58.2552 106.3658 0.548 0.58409
## MaritalStatusSingle 61.1117 113.6017 0.538 0.59079
## JobRoleHuman Resources -196.3777 615.0775 -0.319 0.74962
## JobRoleLaboratory Technician -903.0242 197.8531 -4.564 5.96e-06 ***
## JobRoleManager 4164.8585 286.3968 14.542 < 2e-16 ***
## JobRoleManufacturing Director -351.4553 195.0757 -1.802 0.07205 .
## JobRoleResearch Director 3807.5247 252.7972 15.062 < 2e-16 ***
## JobRoleResearch Scientist -511.6669 198.0583 -2.583 0.00999 **
## JobRoleSales Executive 112.4187 380.5108 0.295 0.76775
## JobRoleSales Representative -412.8890 414.7542 -0.996 0.31985
## Age 1.0651 6.2762 0.170 0.86529
## Education 4.3941 41.2531 0.107 0.91520
## JobLevel 2771.3820 98.2868 28.197 < 2e-16 ***
## NumCompaniesWorked 4.9890 18.3792 0.271 0.78613
## PercentSalaryHike 4.0238 11.2243 0.358 0.72009
## TotalWorkingYears 35.9719 11.6733 3.082 0.00214 **
## YearsAtCompany -0.1172 9.3219 -0.013 0.98997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1085 on 677 degrees of freedom
## Multiple R-squared: 0.9473, Adjusted R-squared: 0.9456
## F-statistic: 553.5 on 22 and 677 DF, p-value: < 2.2e-16
# Model 2
regress2 <- lm(MonthlyIncome ~ Gender + BusinessTravel + JobRole + Age + Education + JobLevel + NumCompaniesWorked + PercentSalaryHike + TotalWorkingYears , data = monthinc.df)
summary(regress2)
##
## Call:
## lm(formula = MonthlyIncome ~ Gender + BusinessTravel + JobRole +
## Age + Education + JobLevel + NumCompaniesWorked + PercentSalaryHike +
## TotalWorkingYears, data = monthinc.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3637.0 -646.7 53.5 660.9 4100.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -217.4699 379.9958 -0.572 0.567310
## GenderMale 91.8727 84.4311 1.088 0.276918
## BusinessTravelTravel_Frequently 450.4878 168.7186 2.670 0.007765 **
## BusinessTravelTravel_Rarely 276.2277 146.3652 1.887 0.059551 .
## JobRoleHuman Resources -240.6726 296.6887 -0.811 0.417536
## JobRoleLaboratory Technician -897.0046 196.6876 -4.561 6.05e-06 ***
## JobRoleManager 4028.3348 249.4139 16.151 < 2e-16 ***
## JobRoleManufacturing Director -356.6867 194.4279 -1.835 0.067009 .
## JobRoleResearch Director 3791.7969 251.3978 15.083 < 2e-16 ***
## JobRoleResearch Scientist -504.6747 196.6720 -2.566 0.010498 *
## JobRoleSales Executive -249.5603 168.6999 -1.479 0.139518
## JobRoleSales Representative -767.9924 242.7296 -3.164 0.001625 **
## Age 1.3406 6.1411 0.218 0.827260
## Education 0.6416 40.8904 0.016 0.987486
## JobLevel 2782.7533 96.8472 28.733 < 2e-16 ***
## NumCompaniesWorked 5.0694 17.2895 0.293 0.769451
## PercentSalaryHike 3.1814 11.1274 0.286 0.775034
## TotalWorkingYears 34.7833 10.2763 3.385 0.000753 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1083 on 682 degrees of freedom
## Multiple R-squared: 0.9472, Adjusted R-squared: 0.9459
## F-statistic: 719.9 on 17 and 682 DF, p-value: < 2.2e-16
# Model 3
regress3 <- lm(MonthlyIncome ~ Gender + BusinessTravel + JobRole + JobLevel + TotalWorkingYears ,
data = monthinc.df)
summary(regress3)
##
## Call:
## lm(formula = MonthlyIncome ~ Gender + BusinessTravel + JobRole +
## JobLevel + TotalWorkingYears, data = monthinc.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3644.3 -645.1 53.5 660.8 4079.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -111.387 268.895 -0.414 0.67883
## GenderMale 89.988 83.764 1.074 0.28307
## BusinessTravelTravel_Frequently 451.610 167.411 2.698 0.00716 **
## BusinessTravelTravel_Rarely 276.331 145.512 1.899 0.05798 .
## JobRoleHuman Resources -237.667 295.366 -0.805 0.42130
## JobRoleLaboratory Technician -900.236 195.818 -4.597 5.10e-06 ***
## JobRoleManager 4022.027 248.321 16.197 < 2e-16 ***
## JobRoleManufacturing Director -356.711 193.798 -1.841 0.06611 .
## JobRoleResearch Director 3791.686 250.255 15.151 < 2e-16 ***
## JobRoleResearch Scientist -509.365 195.868 -2.601 0.00951 **
## JobRoleSales Executive -252.517 168.092 -1.502 0.13349
## JobRoleSales Representative -775.396 241.189 -3.215 0.00137 **
## JobLevel 2779.291 96.307 28.859 < 2e-16 ***
## TotalWorkingYears 36.430 8.802 4.139 3.93e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1080 on 686 degrees of freedom
## Multiple R-squared: 0.9472, Adjusted R-squared: 0.9462
## F-statistic: 946.6 on 13 and 686 DF, p-value: < 2.2e-16
-> Model 3 has Multiple R-squared ~ 0.94 and p-value < 2.2e-16. The values are almost similar to Model 1 and MOdel 2 but the same accuracy level in modelling is achieved in Model3 with less indpendent variables. Hence, best model that can be considered for estimation is Model 3.
The analysis was carried out in order to study the dependency how Monthly Income of the employee is dependent on different variables. The following insights were drawn out of it:
Monthly Income of the employee depends very significantly on Job Role and Job Level of the employee.
Monthly Income also has significant dependency on Gender, Business Travel Status and Total Work Experience of the employee.
Monthly Income has weak dependency on Department and Marital Status of the employee.
Monthly Income is almost independent of Age and Education Level of the employee, Number of companies that the employee has served earlier, Percent Salary Hike obtained by employee and Years the employee has served the company.
Job Level contributes highly to Monthly Income implying that a raise of about 2780 is obtained for each increment in the Job Level.
Monthly income is found to be maximum for Manager and Research Director and minimum for Laboratory Technician and Sales Representative among all the other job roles.
Monthly income is having a difference of just about 90 if the employee is male rather than female ensuring gender equality in the company.
Monthly income of employee who travel frequently exceeds by about 175 than one who travel rarely.
Monthly income of employee rises by about 36 per unit rise in the Total work experience of the employee.