jobsat.df <- read.csv(paste("Job Satisfaction Project_data.csv", sep= ""))
View(jobsat.df)
dim(jobsat.df)
## [1] 598 8
library(psych)
describe(jobsat.df)
## vars n mean sd median trimmed mad min
## EmpID 1 598 1405.44 236.51 1407.5 1403.85 297.26 1001
## Age 2 598 36.91 9.38 36.0 36.43 8.90 18
## Department* 3 598 2.28 0.51 2.0 2.26 0.00 1
## BusinessTravel* 4 598 2.53 0.79 3.0 2.65 0.00 1
## PercentSalaryHike 5 598 15.20 3.72 14.0 14.78 2.97 11
## WorkLifeBalance 6 598 2.76 0.70 3.0 2.78 0.00 1
## YearsInPresentCompany 7 598 1.34 0.67 1.0 1.19 0.00 1
## JobSatisfaction 8 598 2.78 1.10 3.0 2.85 1.48 1
## max range skew kurtosis se
## EmpID 1827 826 0.03 -1.15 9.67
## Age 60 42 0.45 -0.44 0.38
## Department* 3 2 0.30 -0.53 0.02
## BusinessTravel* 3 2 -1.22 -0.31 0.03
## PercentSalaryHike 25 14 0.80 -0.43 0.15
## WorkLifeBalance 4 3 -0.62 0.56 0.03
## YearsInPresentCompany 5 4 2.12 4.40 0.03
## JobSatisfaction 4 3 -0.40 -1.17 0.04
table(jobsat.df$Department)
##
## HR R&D Sales
## 18 397 183
table(jobsat.df$BusinessTravel)
##
## Frequently On-Travel Rarely
## 114 56 428
two_way_tab <-xtabs(~ Department + BusinessTravel, data = jobsat.df)
addmargins(two_way_tab)
## BusinessTravel
## Department Frequently On-Travel Rarely Sum
## HR 3 2 13 18
## R&D 72 33 292 397
## Sales 39 21 123 183
## Sum 114 56 428 598
boxplot(jobsat.df$Age, horizontal = TRUE, main = "Box Plot for Age", xlab = "Age", col = "chocolate")
boxplot(jobsat.df$PercentSalaryHike, horizontal = TRUE, main = "Box Plot for percentage hike in the salary", xlab = "Percentage", col = "gold")
table(jobsat.df$WorkLifeBalance)
##
## 1 2 3 4
## 33 133 374 58
counts_wlb <-table(jobsat.df$WorkLifeBalance)
barplot(counts_wlb, width=1, space=1, main = "Work Life Balance in the company", xlab="Balance Level
(1=Lowest---4=Highest)",col=c( "yellowgreen", "green","darkolivegreen", "darkgreen"), ylim=c(0,450),
xlim=c(0,10), names.arg=c("1","2","3","4"))
table(jobsat.df$YearsInPresentCompany)
##
## 1 2 3 4 5
## 450 103 36 8 1
counts_year <-table(jobsat.df$YearsInPresentCompany)
barplot(counts_year, width=1, space=1, main="Number of years the employee is working in the company",
xlab="Years",col=c("navy","blue", "lightblue3", "skyblue1","lightblue1"), ylim=c(0,450),
xlim=c(0,10), names.arg=c("1","2","3","4","5"))
table(jobsat.df$JobSatisfaction)
##
## 1 2 3 4
## 111 107 183 197
counts_sat <-table(jobsat.df$JobSatisfaction)
barplot(counts_sat, width=1, space=1, main="Job Satisfaction in the company", xlab="Satisfaction Level
(1=Lowest---4=Highest)", ylim=c(0,200), col=c("red","tomato", "green", "darkgreen"),
names.arg=c("1","2","3","4"))
chi1 <- xtabs (~ JobSatisfaction + Department, data=jobsat.df)
chisq.test(chi1)
## Warning in chisq.test(chi1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi1
## X-squared = 0.53625, df = 6, p-value = 0.9974
-> As per the chi-square test no significant dependancy is found between Job Satisfaction and Department.
chi2 <- xtabs (~ JobSatisfaction + BusinessTravel, data=jobsat.df)
chisq.test(chi2)
##
## Pearson's Chi-squared test
##
## data: chi2
## X-squared = 2.5352, df = 6, p-value = 0.8645
-> As per the chi-square test no significant dependancy is found between Job Satisfaction and Business Travel.
chi3 <- xtabs (~ JobSatisfaction + WorkLifeBalance, data=jobsat.df)
chisq.test(chi3)
##
## Pearson's Chi-squared test
##
## data: chi3
## X-squared = 6.4336, df = 9, p-value = 0.6959
-> As per the chi-square test no significant dependancy is found between Job Satisfaction and Work Life Balance.
chi4 <- xtabs (~ JobSatisfaction + YearsInPresentCompany, data=jobsat.df)
chisq.test(chi4)
## Warning in chisq.test(chi4): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi4
## X-squared = 11.013, df = 12, p-value = 0.5278
-> As per the chi-square test no significant dependancy is found between Job Satisfaction and Number of years working in the company.
cor(jobsat.df$Age, jobsat.df$JobSatisfaction)
## [1] -0.03550648
round(cor(jobsat.df[,5:8]), 3)
## PercentSalaryHike WorkLifeBalance
## PercentSalaryHike 1.000 -0.035
## WorkLifeBalance -0.035 1.000
## YearsInPresentCompany -0.061 0.000
## JobSatisfaction 0.031 -0.068
## YearsInPresentCompany JobSatisfaction
## PercentSalaryHike -0.061 0.031
## WorkLifeBalance 0.000 -0.068
## YearsInPresentCompany 1.000 0.000
## JobSatisfaction 0.000 1.000
library(corrgram)
## Warning: replacing previous import by 'magrittr::%>%' when loading
## 'dendextend'
corrgram(jobsat.df, order=TRUE, lower.panel=panel.shade,upper.panel=panel.pie, text.panel=panel.txt,main="MBA starting salary analysis Corrgram")
# Converting Department into factor variable
jobsat.df$Department[jobsat.df$Res == 0] <- 'HR'
jobsat.df$Department[jobsat.df$Res == 1] <- 'R&D'
jobsat.df$Department[jobsat.df$Res == 2] <- 'Sales'
jobsat.df$Department<- factor(jobsat.df$Department)
# Converting BusinessTravel into factor variable
jobsat.df$BusinessTravel[jobsat.df$Res == 0] <- 'On-Travel'
jobsat.df$BusinessTravel[jobsat.df$Res == 1] <- 'Frequently'
jobsat.df$BusinessTravel[jobsat.df$Res == 2] <- 'Rarely'
jobsat.df$BusinessTravel <- factor(jobsat.df$BusinessTravel)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(~JobSatisfaction+Age+BusinessTravel+Department, data=jobsat.df, main="Variation of JobSatisfaction with Age, Business Travel Rate and Department")
scatterplotMatrix(~JobSatisfaction+PercentSalaryHike+WorkLifeBalance+YearsInPresentCompany, data=jobsat.df, main="Variation of JobSatisfaction with Percent Salary Hike, Work-Life Balance Level
and Years in the PresentCompany")
regress1 <- lm(JobSatisfaction ~ Age + Department + BusinessTravel + PercentSalaryHike + WorkLifeBalance + YearsInPresentCompany , data = jobsat.df)
summary(regress1)
##
## Call:
## lm(formula = JobSatisfaction ~ Age + Department + BusinessTravel +
## PercentSalaryHike + WorkLifeBalance + YearsInPresentCompany,
## data = jobsat.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0308 -0.7939 0.2258 1.1380 1.4699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.041956 0.448220 6.787 2.8e-11 ***
## Age -0.004789 0.005022 -0.954 0.341
## DepartmentR&D 0.036196 0.265541 0.136 0.892
## DepartmentSales 0.068315 0.272402 0.251 0.802
## BusinessTravelOn-Travel 0.211088 0.180045 1.172 0.242
## BusinessTravelRarely 0.012896 0.116538 0.111 0.912
## PercentSalaryHike 0.008489 0.012197 0.696 0.487
## WorkLifeBalance -0.116013 0.065034 -1.784 0.075 .
## YearsInPresentCompany 0.023767 0.070341 0.338 0.736
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.099 on 589 degrees of freedom
## Multiple R-squared: 0.01025, Adjusted R-squared: -0.003197
## F-statistic: 0.7622 on 8 and 589 DF, p-value: 0.6364
The analysis of the given data was carried out to find the factors enhancing or depreciating Job Satisfaction Level of the employees in the company. After carring out the analysis of given data, it was found that none of the paramenters signifcantly affect the Job Satisfaction level among the employees. And the model which is used to carry out the regression also fails to model correctly as Multiple R-Square data value comes out to be 0.01. Hence, either more data needs to be collected for finding the desired dependency or it can be concluded that Job Satisfaction is independent of all the above variables.