options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load required packages
install.packages("readxl") # Only needed once
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mnava\AppData\Local\R\win-library\4.5\00LOCK\readxl\libs\x64\readxl.dll
## to C:\Users\mnava\AppData\Local\R\win-library\4.5\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\mnava\AppData\Local\Temp\Rtmp4wrcQ5\downloaded_packages
library(readxl)
#Packages to install
#install.packages("pastecs")#
#install.packages("lm.beta")#
library("pastecs")
library("lm.beta")
#Save the Human Resources Data
Data <- read_xlsx(file.choose())
attach(Data)
options(scipen = 999)
#Dummy Coding :
#sales
#support
#management
Data$SLS_Dummy <-as.numeric(Data$department == "sales",1)
Data$SUPP_Dummy <-as.numeric(Data$department == "support",1)
Data$MGMT_Dummy <-as.numeric(Data$department == "management",1)
attach(Data)
## The following objects are masked from Data (pos = 3):
##
## average_monthly_hours, department, last_evaluation, left,
## number_project, promotion_last_5years, salary, satisfaction_level,
## time_spend_company, Work_accident
names(Data)
## [1] "satisfaction_level" "last_evaluation" "number_project"
## [4] "average_monthly_hours" "time_spend_company" "Work_accident"
## [7] "left" "promotion_last_5years" "department"
## [10] "salary" "SLS_Dummy" "SUPP_Dummy"
## [13] "MGMT_Dummy"
# data frame with HR Data variables
DataFrame <- Data[,c("satisfaction_level", "last_evaluation", "average_monthly_hours", "time_spend_company",
"promotion_last_5years", "Work_accident", "SLS_Dummy", "SUPP_Dummy", "MGMT_Dummy")]
#Descriptive Statistics (mean and SD) and Correlation
stat.desc(DataFrame)
## satisfaction_level last_evaluation average_monthly_hours
## nbr.val 14999.000000000 14999.000000000 14999.0000000
## nbr.null 0.000000000 0.000000000 0.0000000
## nbr.na 0.000000000 0.000000000 0.0000000
## min 0.090000000 0.360000000 96.0000000
## max 1.000000000 1.000000000 310.0000000
## range 0.910000000 0.640000000 214.0000000
## sum 9191.890000000 10740.810000000 3015554.0000000
## median 0.640000000 0.720000000 200.0000000
## mean 0.612833522 0.716101740 201.0503367
## SE.mean 0.002030128 0.001397637 0.4077973
## CI.mean.0.95 0.003979300 0.002739538 0.7993325
## var 0.061817201 0.029298864 2494.3131748
## std.dev 0.248630651 0.171169111 49.9430994
## coef.var 0.405706676 0.239029039 0.2484109
## time_spend_company promotion_last_5years Work_accident
## nbr.val 14999.00000000 14999.000000000 14999.000000000
## nbr.null 0.00000000 14680.000000000 12830.000000000
## nbr.na 0.00000000 0.000000000 0.000000000
## min 2.00000000 0.000000000 0.000000000
## max 10.00000000 1.000000000 1.000000000
## range 8.00000000 1.000000000 1.000000000
## sum 52470.00000000 319.000000000 2169.000000000
## median 3.00000000 0.000000000 0.000000000
## mean 3.49823322 0.021268085 0.144609641
## SE.mean 0.01192236 0.001178092 0.002871866
## CI.mean.0.95 0.02336928 0.002309205 0.005629208
## var 2.13199781 0.020817141 0.123705940
## std.dev 1.46013623 0.144281465 0.351718552
## coef.var 0.41739248 6.783942593 2.432192977
## SLS_Dummy SUPP_Dummy MGMT_Dummy
## nbr.val 14999.000000000 14999.000000000 14999.000000000
## nbr.null 10859.000000000 12770.000000000 14369.000000000
## nbr.na 0.000000000 0.000000000 0.000000000
## min 0.000000000 0.000000000 0.000000000
## max 1.000000000 1.000000000 1.000000000
## range 1.000000000 1.000000000 1.000000000
## sum 4140.000000000 2229.000000000 630.000000000
## median 0.000000000 0.000000000 0.000000000
## mean 0.276018401 0.148609907 0.042002800
## SE.mean 0.003650195 0.002904501 0.001637965
## CI.mean.0.95 0.007154829 0.005693176 0.003210611
## var 0.199845567 0.126533439 0.040241248
## std.dev 0.447040901 0.355715390 0.200602213
## coef.var 1.619605429 2.393618277 4.775924752
cor(DataFrame)
## satisfaction_level last_evaluation average_monthly_hours
## satisfaction_level 1.000000000 0.105021214 -0.0200481132
## last_evaluation 0.105021214 1.000000000 0.3397417998
## average_monthly_hours -0.020048113 0.339741800 1.0000000000
## time_spend_company -0.100866073 0.131590722 0.1277549104
## promotion_last_5years 0.025605186 -0.008683768 -0.0035444143
## Work_accident 0.058697241 -0.007104289 -0.0101428882
## SLS_Dummy 0.004006732 -0.023030902 -0.0017183402
## SUPP_Dummy 0.009185484 0.017104011 -0.0024440113
## MGMT_Dummy 0.007171935 0.009662233 0.0008338051
## time_spend_company promotion_last_5years Work_accident
## satisfaction_level -0.100866073 0.025605186 0.058697241
## last_evaluation 0.131590722 -0.008683768 -0.007104289
## average_monthly_hours 0.127754910 -0.003544414 -0.010142888
## time_spend_company 1.000000000 0.067432925 0.002120418
## promotion_last_5years 0.067432925 1.000000000 0.039245435
## Work_accident 0.002120418 0.039245435 1.000000000
## SLS_Dummy 0.015149894 0.012353257 -0.004954643
## SUPP_Dummy -0.030111194 -0.035604754 0.012078877
## MGMT_Dummy 0.115436290 0.128086638 0.011241750
## SLS_Dummy SUPP_Dummy MGMT_Dummy
## satisfaction_level 0.004006732 0.009185484 0.0071719352
## last_evaluation -0.023030902 0.017104011 0.0096622330
## average_monthly_hours -0.001718340 -0.002444011 0.0008338051
## time_spend_company 0.015149894 -0.030111194 0.1154362901
## promotion_last_5years 0.012353257 -0.035604754 0.1280866376
## Work_accident -0.004954643 0.012078877 0.0112417500
## SLS_Dummy 1.000000000 -0.257967408 -0.1292892024
## SUPP_Dummy -0.257967408 1.000000000 -0.0874815739
## MGMT_Dummy -0.129289202 -0.087481574 1.0000000000
#Running the first regression (control variables)
Model1 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
summary(Model1)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55413 -0.17164 0.02661 0.20228 0.39661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.603388 0.002899 208.135 < 0.0000000000000002 ***
## promotion_last_5years 0.039544 0.014180 2.789 0.0053 **
## Work_accident 0.040738 0.005766 7.065 0.00000000000168 ***
## SLS_Dummy 0.004328 0.004752 0.911 0.3625
## SUPP_Dummy 0.008252 0.005945 1.388 0.1651
## MGMT_Dummy 0.006970 0.010354 0.673 0.5008
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2482 on 14993 degrees of freedom
## Multiple R-squared: 0.00415, Adjusted R-squared: 0.003818
## F-statistic: 12.5 on 5 and 14993 DF, p-value: 0.00000000000395
lm.beta(Model1)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
##
## Standardized Coefficients::
## (Intercept) promotion_last_5years Work_accident
## NA 0.022947395 0.057629395
## SLS_Dummy SUPP_Dummy MGMT_Dummy
## 0.007781361 0.011805725 0.005623655
confint(Model1)
## 2.5 % 97.5 %
## (Intercept) 0.597705308 0.60907020
## promotion_last_5years 0.011749993 0.06733745
## Work_accident 0.029435667 0.05204104
## SLS_Dummy -0.004987441 0.01364296
## SUPP_Dummy -0.003401001 0.01990445
## MGMT_Dummy -0.013324311 0.02726447
#Running the second regression(controls and predictor variables)
Model2 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + SLS_Dummy + SUPP_Dummy + MGMT_Dummy +
last_evaluation + average_monthly_hours + time_spend_company)
summary(Model2)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours +
## time_spend_company)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58054 -0.16083 0.02458 0.19731 0.55208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.57890134 0.01091768 53.024 < 0.0000000000000002 ***
## promotion_last_5years 0.05193644 0.01400415 3.709 0.000209 ***
## Work_accident 0.04099504 0.00568668 7.209 0.000000000000591 ***
## SLS_Dummy 0.00734625 0.00468893 1.567 0.117201
## SUPP_Dummy 0.00592030 0.00586354 1.010 0.312665
## MGMT_Dummy 0.02140970 0.01027172 2.084 0.037147 *
## last_evaluation 0.20121418 0.01247499 16.129 < 0.0000000000000002 ***
## average_monthly_hours -0.00025633 0.00004272 -6.001 0.000000002010579 ***
## time_spend_company -0.01985644 0.00139840 -14.199 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2447 on 14990 degrees of freedom
## Multiple R-squared: 0.03177, Adjusted R-squared: 0.03126
## F-statistic: 61.49 on 8 and 14990 DF, p-value: < 0.00000000000000022
lm.beta(Model2)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours +
## time_spend_company)
##
## Standardized Coefficients::
## (Intercept) promotion_last_5years Work_accident
## NA 0.03013894 0.05799251
## SLS_Dummy SUPP_Dummy MGMT_Dummy
## 0.01320864 0.00847016 0.01727395
## last_evaluation average_monthly_hours time_spend_company
## 0.13852537 -0.05148929 -0.11661114
confint(Model2)
## 2.5 % 97.5 %
## (Intercept) 0.557501348 0.600301328
## promotion_last_5years 0.024486597 0.079386280
## Work_accident 0.029848449 0.052141625
## SLS_Dummy -0.001844631 0.016537130
## SUPP_Dummy -0.005572960 0.017413557
## MGMT_Dummy 0.001275872 0.041543519
## last_evaluation 0.176761682 0.225666678
## average_monthly_hours -0.000340058 -0.000172598
## time_spend_company -0.022597464 -0.017115412
#Compare the two regressions:
anova(Model1, Model2)
## Analysis of Variance Table
##
## Model 1: satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy
## Model 2: satisfaction_level ~ promotion_last_5years + Work_accident +
## SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours +
## time_spend_company
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 14993 923.29
## 2 14990 897.68 3 25.61 142.55 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# R-Squared change
0.004-0.031
## [1] -0.027
#Assumptions Test
par(mfrow=c(2,2))
plot(Model2)
