title: “HR” author: “Les” date: “2025-10-31” output: html_document
#install.packages("pastecs")
#install.packages("lm.beta")
library("pastecs")
library("lm.beta")
#Save the Human Resources Data – Assignment 2 to your computer (make sure it is saved as a CSV file)
Data <- read.csv("C:\\Users\\lesle\\OneDrive\\Документы\\R\\Human Resources Data .csv",header=T)
attach(Data)
options(scipen = 999)
Data$IT_Dummy <-as.numeric(Data$department == "IT",1)
Data$support_Dummy <-as.numeric(Data$department == "support",1)
Data$technical_Dummy <-as.numeric(Data$department == "technical",1)
attach(Data)
## The following objects are masked from Data (pos = 3):
##
## average_monthly_hours, department, last_evaluation, left,
## number_project, promotion_last_5years, salary, satisfaction_level,
## time_spend_company, Work_accident
names(Data)
## [1] "satisfaction_level" "last_evaluation" "number_project"
## [4] "average_monthly_hours" "time_spend_company" "Work_accident"
## [7] "left" "promotion_last_5years" "department"
## [10] "salary" "IT_Dummy" "support_Dummy"
## [13] "technical_Dummy"
#Create data frame with assignment 2 variables (add dummy variables in red)
DataFrame <- Data[,c("satisfaction_level", "last_evaluation", "average_monthly_hours", "time_spend_company",
"promotion_last_5years", "Work_accident", "IT_Dummy", "support_Dummy", "technical_Dummy")]
#Descriptive Statistics (mean and SD) and Correlation
stat.desc(DataFrame)
## satisfaction_level last_evaluation average_monthly_hours
## nbr.val 14999.000000000 14999.000000000 14999.0000000
## nbr.null 0.000000000 0.000000000 0.0000000
## nbr.na 0.000000000 0.000000000 0.0000000
## min 0.090000000 0.360000000 96.0000000
## max 1.000000000 1.000000000 310.0000000
## range 0.910000000 0.640000000 214.0000000
## sum 9191.890000000 10740.810000000 3015554.0000000
## median 0.640000000 0.720000000 200.0000000
## mean 0.612833522 0.716101740 201.0503367
## SE.mean 0.002030128 0.001397637 0.4077973
## CI.mean.0.95 0.003979300 0.002739538 0.7993325
## var 0.061817201 0.029298864 2494.3131748
## std.dev 0.248630651 0.171169111 49.9430994
## coef.var 0.405706676 0.239029039 0.2484109
## time_spend_company promotion_last_5years Work_accident
## nbr.val 14999.00000000 14999.000000000 14999.000000000
## nbr.null 0.00000000 14680.000000000 12830.000000000
## nbr.na 0.00000000 0.000000000 0.000000000
## min 2.00000000 0.000000000 0.000000000
## max 10.00000000 1.000000000 1.000000000
## range 8.00000000 1.000000000 1.000000000
## sum 52470.00000000 319.000000000 2169.000000000
## median 3.00000000 0.000000000 0.000000000
## mean 3.49823322 0.021268085 0.144609641
## SE.mean 0.01192236 0.001178092 0.002871866
## CI.mean.0.95 0.02336928 0.002309205 0.005629208
## var 2.13199781 0.020817141 0.123705940
## std.dev 1.46013623 0.144281465 0.351718552
## coef.var 0.41739248 6.783942593 2.432192977
## IT_Dummy support_Dummy technical_Dummy
## nbr.val 14999.000000000 14999.000000000 14999.000000000
## nbr.null 13772.000000000 12770.000000000 12279.000000000
## nbr.na 0.000000000 0.000000000 0.000000000
## min 0.000000000 0.000000000 0.000000000
## max 1.000000000 1.000000000 1.000000000
## range 1.000000000 1.000000000 1.000000000
## sum 1227.000000000 2229.000000000 2720.000000000
## median 0.000000000 0.000000000 0.000000000
## mean 0.081805454 0.148609907 0.181345423
## SE.mean 0.002237906 0.002904501 0.003146205
## CI.mean.0.95 0.004386569 0.005693176 0.006166946
## var 0.075118330 0.126533439 0.148469159
## std.dev 0.274077233 0.355715390 0.385316959
## coef.var 3.350354049 2.393618277 2.124768042
cor(DataFrame)
## satisfaction_level last_evaluation average_monthly_hours
## satisfaction_level 1.000000000 0.105021214 -0.020048113
## last_evaluation 0.105021214 1.000000000 0.339741800
## average_monthly_hours -0.020048113 0.339741800 1.000000000
## time_spend_company -0.100866073 0.131590722 0.127754910
## promotion_last_5years 0.025605186 -0.008683768 -0.003544414
## Work_accident 0.058697241 -0.007104289 -0.010142888
## IT_Dummy 0.006372913 0.001269404 0.006966685
## support_Dummy 0.009185484 0.017104011 -0.002444011
## technical_Dummy -0.009344987 0.013741906 0.013637583
## time_spend_company promotion_last_5years Work_accident
## satisfaction_level -0.100866073 0.025605186 0.058697241
## last_evaluation 0.131590722 -0.008683768 -0.007104289
## average_monthly_hours 0.127754910 -0.003544414 -0.010142888
## time_spend_company 1.000000000 0.067432925 0.002120418
## promotion_last_5years 0.067432925 1.000000000 0.039245435
## Work_accident 0.002120418 0.039245435 1.000000000
## IT_Dummy -0.006053290 -0.038942048 -0.009293287
## support_Dummy -0.030111194 -0.035604754 0.012078877
## technical_Dummy -0.027991402 -0.035798970 -0.006070240
## IT_Dummy support_Dummy technical_Dummy
## satisfaction_level 0.006372913 0.009185484 -0.009344987
## last_evaluation 0.001269404 0.017104011 0.013741906
## average_monthly_hours 0.006966685 -0.002444011 0.013637583
## time_spend_company -0.006053290 -0.030111194 -0.027991402
## promotion_last_5years -0.038942048 -0.035604754 -0.035798970
## Work_accident -0.009293287 0.012078877 -0.006070240
## IT_Dummy 1.000000000 -0.124704854 -0.140483955
## support_Dummy -0.124704854 1.000000000 -0.196635777
## technical_Dummy -0.140483955 -0.196635777 1.000000000
#Running the first regression (control variables) – update department variable names based on your selected
#departments
Model1 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + technical_Dummy + support_Dummy + technical_Dummy)
summary(Model1)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + technical_Dummy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55677 -0.17163 0.02823 0.20398 0.39823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.606020 0.002633 230.176 < 0.0000000000000002 ***
## promotion_last_5years 0.040312 0.014077 2.864 0.00419 **
## Work_accident 0.040747 0.005766 7.067 0.00000000000166 ***
## technical_Dummy -0.004245 0.005369 -0.791 0.42911
## support_Dummy 0.005612 0.005816 0.965 0.33461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2482 on 14994 degrees of freedom
## Multiple R-squared: 0.004118, Adjusted R-squared: 0.003852
## F-statistic: 15.5 on 4 and 14994 DF, p-value: 0.000000000001172
lm.beta(Model1)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + technical_Dummy)
##
## Standardized Coefficients::
## (Intercept) promotion_last_5years Work_accident
## NA 0.023393324 0.057642249
## technical_Dummy support_Dummy
## -0.006578939 0.008028489
confint(Model1)
## 2.5 % 97.5 %
## (Intercept) 0.600858873 0.611180301
## promotion_last_5years 0.012718894 0.067905423
## Work_accident 0.029445239 0.052049637
## technical_Dummy -0.014768235 0.006277947
## support_Dummy -0.005788037 0.017011214
#Running the second regression(controls and predictor variables) – update department variable names based on your
#selected departments
Model2 <- lm(satisfaction_level ~ promotion_last_5years + Work_accident + technical_Dummy + support_Dummy + IT_Dummy +
last_evaluation + average_monthly_hours + time_spend_company)
summary(Model2)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + IT_Dummy + last_evaluation +
## average_monthly_hours + time_spend_company)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58500 -0.16081 0.02387 0.19757 0.55300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.58237039 0.01088606 53.497 < 0.0000000000000002 ***
## promotion_last_5years 0.05521097 0.01393304 3.963 0.000074483291816 ***
## Work_accident 0.04107766 0.00568725 7.223 0.000000000000534 ***
## technical_Dummy -0.00703754 0.00537956 -1.308 0.191
## support_Dummy 0.00162294 0.00581530 0.279 0.780
## IT_Dummy 0.00581091 0.00746823 0.778 0.437
## last_evaluation 0.20113648 0.01247556 16.122 < 0.0000000000000002 ***
## average_monthly_hours -0.00025684 0.00004272 -6.012 0.000000001877311 ***
## time_spend_company -0.01957817 0.00139115 -14.073 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2447 on 14990 degrees of freedom
## Multiple R-squared: 0.03157, Adjusted R-squared: 0.03105
## F-statistic: 61.08 on 8 and 14990 DF, p-value: < 0.00000000000000022
lm.beta(Model2)
##
## Call:
## lm(formula = satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + IT_Dummy + last_evaluation +
## average_monthly_hours + time_spend_company)
##
## Standardized Coefficients::
## (Intercept) promotion_last_5years Work_accident
## NA 0.032039168 0.058109388
## technical_Dummy support_Dummy IT_Dummy
## -0.010906469 0.002321939 0.006405638
## last_evaluation average_monthly_hours time_spend_company
## 0.138471873 -0.051591181 -0.114976967
confint(Model2)
## 2.5 % 97.5 %
## (Intercept) 0.5610323694 0.6037084025
## promotion_last_5years 0.0279005113 0.0825214236
## Work_accident 0.0299299589 0.0522253602
## technical_Dummy -0.0175821363 0.0035070619
## support_Dummy -0.0097757616 0.0130216440
## IT_Dummy -0.0088277402 0.0204495587
## last_evaluation 0.1766828532 0.2255901017
## average_monthly_hours -0.0003405753 -0.0001730952
## time_spend_company -0.0223050008 -0.0168513430
#Compare the two regressions:
anova(Model1, Model2)
## Analysis of Variance Table
##
## Model 1: satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + technical_Dummy
## Model 2: satisfaction_level ~ promotion_last_5years + Work_accident +
## technical_Dummy + support_Dummy + IT_Dummy + last_evaluation +
## average_monthly_hours + time_spend_company
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 14994 923.32
## 2 14990 897.87 4 25.451 106.23 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate R-Squared change - there is no function for this, just subtract Model 1 R-Squared from Model 2 R-Squarednand click run
#Assumptions Test
par(mfrow=c(2,2))
plot(Model2)