options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load required packages
install.packages("readxl")  # Only needed once
## Installing package into 'C:/Users/mnava/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\mnava\AppData\Local\R\win-library\4.5\00LOCK\readxl\libs\x64\readxl.dll
## to C:\Users\mnava\AppData\Local\R\win-library\4.5\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
## 
## The downloaded binary packages are in
##  C:\Users\mnava\AppData\Local\Temp\Rtmp4wrcQ5\downloaded_packages
library(readxl)

#Packages to install
#install.packages("pastecs")#
#install.packages("lm.beta")#
library("pastecs")
library("lm.beta")
#Save the Human Resources Data
Data <- read_xlsx(file.choose())
attach(Data)
options(scipen = 999)
#Dummy Coding :

#sales
#support
#management

Data$SLS_Dummy <-as.numeric(Data$department == "sales",1)
Data$SUPP_Dummy <-as.numeric(Data$department == "support",1)
Data$MGMT_Dummy <-as.numeric(Data$department == "management",1)
attach(Data)
## The following objects are masked from Data (pos = 3):
## 
##     average_monthly_hours, department, last_evaluation, left,
##     number_project, promotion_last_5years, salary, satisfaction_level,
##     time_spend_company, Work_accident
names(Data)
##  [1] "satisfaction_level"    "last_evaluation"       "number_project"       
##  [4] "average_monthly_hours" "time_spend_company"    "Work_accident"        
##  [7] "left"                  "promotion_last_5years" "department"           
## [10] "salary"                "SLS_Dummy"             "SUPP_Dummy"           
## [13] "MGMT_Dummy"
# data frame with HR Data variables
DataFrame <- Data[,c("satisfaction_level", "last_evaluation", "average_monthly_hours", "time_spend_company",
                     "promotion_last_5years", "Work_accident", "SLS_Dummy", "SUPP_Dummy", "MGMT_Dummy")]
#Descriptive Statistics (mean and SD) and Correlation
stat.desc(DataFrame)
##              satisfaction_level last_evaluation average_monthly_hours
## nbr.val         14999.000000000 14999.000000000         14999.0000000
## nbr.null            0.000000000     0.000000000             0.0000000
## nbr.na              0.000000000     0.000000000             0.0000000
## min                 0.090000000     0.360000000            96.0000000
## max                 1.000000000     1.000000000           310.0000000
## range               0.910000000     0.640000000           214.0000000
## sum              9191.890000000 10740.810000000       3015554.0000000
## median              0.640000000     0.720000000           200.0000000
## mean                0.612833522     0.716101740           201.0503367
## SE.mean             0.002030128     0.001397637             0.4077973
## CI.mean.0.95        0.003979300     0.002739538             0.7993325
## var                 0.061817201     0.029298864          2494.3131748
## std.dev             0.248630651     0.171169111            49.9430994
## coef.var            0.405706676     0.239029039             0.2484109
##              time_spend_company promotion_last_5years   Work_accident
## nbr.val          14999.00000000       14999.000000000 14999.000000000
## nbr.null             0.00000000       14680.000000000 12830.000000000
## nbr.na               0.00000000           0.000000000     0.000000000
## min                  2.00000000           0.000000000     0.000000000
## max                 10.00000000           1.000000000     1.000000000
## range                8.00000000           1.000000000     1.000000000
## sum              52470.00000000         319.000000000  2169.000000000
## median               3.00000000           0.000000000     0.000000000
## mean                 3.49823322           0.021268085     0.144609641
## SE.mean              0.01192236           0.001178092     0.002871866
## CI.mean.0.95         0.02336928           0.002309205     0.005629208
## var                  2.13199781           0.020817141     0.123705940
## std.dev              1.46013623           0.144281465     0.351718552
## coef.var             0.41739248           6.783942593     2.432192977
##                    SLS_Dummy      SUPP_Dummy      MGMT_Dummy
## nbr.val      14999.000000000 14999.000000000 14999.000000000
## nbr.null     10859.000000000 12770.000000000 14369.000000000
## nbr.na           0.000000000     0.000000000     0.000000000
## min              0.000000000     0.000000000     0.000000000
## max              1.000000000     1.000000000     1.000000000
## range            1.000000000     1.000000000     1.000000000
## sum           4140.000000000  2229.000000000   630.000000000
## median           0.000000000     0.000000000     0.000000000
## mean             0.276018401     0.148609907     0.042002800
## SE.mean          0.003650195     0.002904501     0.001637965
## CI.mean.0.95     0.007154829     0.005693176     0.003210611
## var              0.199845567     0.126533439     0.040241248
## std.dev          0.447040901     0.355715390     0.200602213
## coef.var         1.619605429     2.393618277     4.775924752
cor(DataFrame)
##                       satisfaction_level last_evaluation average_monthly_hours
## satisfaction_level           1.000000000     0.105021214         -0.0200481132
## last_evaluation              0.105021214     1.000000000          0.3397417998
## average_monthly_hours       -0.020048113     0.339741800          1.0000000000
## time_spend_company          -0.100866073     0.131590722          0.1277549104
## promotion_last_5years        0.025605186    -0.008683768         -0.0035444143
## Work_accident                0.058697241    -0.007104289         -0.0101428882
## SLS_Dummy                    0.004006732    -0.023030902         -0.0017183402
## SUPP_Dummy                   0.009185484     0.017104011         -0.0024440113
## MGMT_Dummy                   0.007171935     0.009662233          0.0008338051
##                       time_spend_company promotion_last_5years Work_accident
## satisfaction_level          -0.100866073           0.025605186   0.058697241
## last_evaluation              0.131590722          -0.008683768  -0.007104289
## average_monthly_hours        0.127754910          -0.003544414  -0.010142888
## time_spend_company           1.000000000           0.067432925   0.002120418
## promotion_last_5years        0.067432925           1.000000000   0.039245435
## Work_accident                0.002120418           0.039245435   1.000000000
## SLS_Dummy                    0.015149894           0.012353257  -0.004954643
## SUPP_Dummy                  -0.030111194          -0.035604754   0.012078877
## MGMT_Dummy                   0.115436290           0.128086638   0.011241750
##                          SLS_Dummy   SUPP_Dummy    MGMT_Dummy
## satisfaction_level     0.004006732  0.009185484  0.0071719352
## last_evaluation       -0.023030902  0.017104011  0.0096622330
## average_monthly_hours -0.001718340 -0.002444011  0.0008338051
## time_spend_company     0.015149894 -0.030111194  0.1154362901
## promotion_last_5years  0.012353257 -0.035604754  0.1280866376
## Work_accident         -0.004954643  0.012078877  0.0112417500
## SLS_Dummy              1.000000000 -0.257967408 -0.1292892024
## SUPP_Dummy            -0.257967408  1.000000000 -0.0874815739
## MGMT_Dummy            -0.129289202 -0.087481574  1.0000000000
#Running the first regression (control variables)
Model1 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
summary(Model1)
## 
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55413 -0.17164  0.02661  0.20228  0.39661 
## 
## Coefficients:
##                       Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)           0.603388   0.002899 208.135 < 0.0000000000000002 ***
## promotion_last_5years 0.039544   0.014180   2.789               0.0053 ** 
## Work_accident         0.040738   0.005766   7.065     0.00000000000168 ***
## SLS_Dummy             0.004328   0.004752   0.911               0.3625    
## SUPP_Dummy            0.008252   0.005945   1.388               0.1651    
## MGMT_Dummy            0.006970   0.010354   0.673               0.5008    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2482 on 14993 degrees of freedom
## Multiple R-squared:  0.00415,    Adjusted R-squared:  0.003818 
## F-statistic:  12.5 on 5 and 14993 DF,  p-value: 0.00000000000395
lm.beta(Model1)
## 
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy)
## 
## Standardized Coefficients::
##           (Intercept) promotion_last_5years         Work_accident 
##                    NA           0.022947395           0.057629395 
##             SLS_Dummy            SUPP_Dummy            MGMT_Dummy 
##           0.007781361           0.011805725           0.005623655
confint(Model1)
##                              2.5 %     97.5 %
## (Intercept)            0.597705308 0.60907020
## promotion_last_5years  0.011749993 0.06733745
## Work_accident          0.029435667 0.05204104
## SLS_Dummy             -0.004987441 0.01364296
## SUPP_Dummy            -0.003401001 0.01990445
## MGMT_Dummy            -0.013324311 0.02726447
#Running the second regression(controls and predictor variables)
Model2 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + SLS_Dummy + SUPP_Dummy + MGMT_Dummy +
               last_evaluation + average_monthly_hours + time_spend_company)
summary(Model2)
## 
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours + 
##     time_spend_company)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58054 -0.16083  0.02458  0.19731  0.55208 
## 
## Coefficients:
##                          Estimate  Std. Error t value             Pr(>|t|)    
## (Intercept)            0.57890134  0.01091768  53.024 < 0.0000000000000002 ***
## promotion_last_5years  0.05193644  0.01400415   3.709             0.000209 ***
## Work_accident          0.04099504  0.00568668   7.209    0.000000000000591 ***
## SLS_Dummy              0.00734625  0.00468893   1.567             0.117201    
## SUPP_Dummy             0.00592030  0.00586354   1.010             0.312665    
## MGMT_Dummy             0.02140970  0.01027172   2.084             0.037147 *  
## last_evaluation        0.20121418  0.01247499  16.129 < 0.0000000000000002 ***
## average_monthly_hours -0.00025633  0.00004272  -6.001    0.000000002010579 ***
## time_spend_company    -0.01985644  0.00139840 -14.199 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2447 on 14990 degrees of freedom
## Multiple R-squared:  0.03177,    Adjusted R-squared:  0.03126 
## F-statistic: 61.49 on 8 and 14990 DF,  p-value: < 0.00000000000000022
lm.beta(Model2)
## 
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours + 
##     time_spend_company)
## 
## Standardized Coefficients::
##           (Intercept) promotion_last_5years         Work_accident 
##                    NA            0.03013894            0.05799251 
##             SLS_Dummy            SUPP_Dummy            MGMT_Dummy 
##            0.01320864            0.00847016            0.01727395 
##       last_evaluation average_monthly_hours    time_spend_company 
##            0.13852537           -0.05148929           -0.11661114
confint(Model2)
##                              2.5 %       97.5 %
## (Intercept)            0.557501348  0.600301328
## promotion_last_5years  0.024486597  0.079386280
## Work_accident          0.029848449  0.052141625
## SLS_Dummy             -0.001844631  0.016537130
## SUPP_Dummy            -0.005572960  0.017413557
## MGMT_Dummy             0.001275872  0.041543519
## last_evaluation        0.176761682  0.225666678
## average_monthly_hours -0.000340058 -0.000172598
## time_spend_company    -0.022597464 -0.017115412
#Compare the two regressions:
anova(Model1, Model2)
## Analysis of Variance Table
## 
## Model 1: satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy
## Model 2: satisfaction_level ~ promotion_last_5years + Work_accident + 
##     SLS_Dummy + SUPP_Dummy + MGMT_Dummy + last_evaluation + average_monthly_hours + 
##     time_spend_company
##   Res.Df    RSS Df Sum of Sq      F                Pr(>F)    
## 1  14993 923.29                                              
## 2  14990 897.68  3     25.61 142.55 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# R-Squared change
0.004-0.031
## [1] -0.027
#Assumptions Test
par(mfrow=c(2,2))
plot(Model2)