hrdata <- read.csv("HR-Employee-Attrition.csv")
hrdata[ , c("Age", "DailyRate", "DistanceFromHome", "Education", "HourlyRate", "MonthlyIncome", "MonthlyRate", "NumCompaniesWorked", "TotalWorkingYears", "TrainingTimesLastYear")]
cor_matrix <- cor( hrdata[ , c("Age", "DailyRate", "DistanceFromHome", "Education", "HourlyRate", "MonthlyIncome", "MonthlyRate", "NumCompaniesWorked", "TotalWorkingYears", "TrainingTimesLastYear")] )
print(round(cor_matrix,2))
Age DailyRate DistanceFromHome Education HourlyRate MonthlyIncome MonthlyRate NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
Age 1.00 0.01 0.00 0.21 0.02 0.50 0.03 0.30 0.68 -0.02
DailyRate 0.01 1.00 0.00 -0.02 0.02 0.01 -0.03 0.04 0.01 0.00
DistanceFromHome 0.00 0.00 1.00 0.02 0.03 -0.02 0.03 -0.03 0.00 -0.04
Education 0.21 -0.02 0.02 1.00 0.02 0.09 -0.03 0.13 0.15 -0.03
HourlyRate 0.02 0.02 0.03 0.02 1.00 -0.02 -0.02 0.02 0.00 -0.01
MonthlyIncome 0.50 0.01 -0.02 0.09 -0.02 1.00 0.03 0.15 0.77 -0.02
MonthlyRate 0.03 -0.03 0.03 -0.03 -0.02 0.03 1.00 0.02 0.03 0.00
NumCompaniesWorked 0.30 0.04 -0.03 0.13 0.02 0.15 0.02 1.00 0.24 -0.07
TotalWorkingYears 0.68 0.01 0.00 0.15 0.00 0.77 0.03 0.24 1.00 -0.04
TrainingTimesLastYear -0.02 0.00 -0.04 -0.03 -0.01 -0.02 0.00 -0.07 -0.04 1.00
pairs(~MonthlyIncome+Age+TotalWorkingYears+Education,data = hrdata, main = "Scatterplot Matrix")

boxplot(Age~Attrition,data=hrdata, main= "Who Got Fired", xlab="Attrition", ylab="Age")

boxplot(Age ~ Attrition, data = hrdata,
main = "Who Got Fired", xlab = "Attrition", ylab = "Age")
# Add jittered points
stripchart(Age ~ Attrition, data = hrdata,
vertical = TRUE, method = "jitter", pch = 21, col = 'blue', bg = 'lightblue', add = TRUE)

yes_age <- hrdata[(hrdata$Attrition == "Yes"),'Age']
no_age <- hrdata[(hrdata$Attrition != "Yes"),'Age']
t.test(yes_age, no_age)
Welch Two Sample t-test
data: yes_age and no_age
t = -5.828, df = 316.93, p-value = 1.38e-08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-5.288346 -2.618930
sample estimates:
mean of x mean of y
33.60759 37.56123
model1 = lm(MonthlyIncome ~ Age, data=hrdata)
summary(model1)
Call:
lm(formula = MonthlyIncome ~ Age, data = hrdata)
Residuals:
Min 1Q Median 3Q Max
-9990.1 -2592.7 -677.9 1810.5 12540.8
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2970.67 443.70 -6.695 3.06e-11 ***
Age 256.57 11.67 21.995 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4084 on 1468 degrees of freedom
Multiple R-squared: 0.2479, Adjusted R-squared: 0.2473
F-statistic: 483.8 on 1 and 1468 DF, p-value: < 2.2e-16
model2 = lm(MonthlyIncome ~ Age + TotalWorkingYears, data=hrdata)
summary(model2)
Call:
lm(formula = MonthlyIncome ~ Age + TotalWorkingYears, data = hrdata)
Residuals:
Min 1Q Median 3Q Max
-11310.8 -1690.8 -91.4 1428.3 11461.5
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1978.08 352.36 5.614 2.36e-08 ***
Age -26.87 11.63 -2.311 0.021 *
TotalWorkingYears 489.13 13.65 35.824 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2984 on 1467 degrees of freedom
Multiple R-squared: 0.5988, Adjusted R-squared: 0.5983
F-statistic: 1095 on 2 and 1467 DF, p-value: < 2.2e-16
LS0tDQp0aXRsZTogIkhSIEFuYWx5c2lzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQpocmRhdGEgPC0gcmVhZC5jc3YoIkhSLUVtcGxveWVlLUF0dHJpdGlvbi5jc3YiKQ0KYGBgDQoNCmBgYHtyfQ0KaHJkYXRhWyAsIGMoIkFnZSIsICJEYWlseVJhdGUiLCAiRGlzdGFuY2VGcm9tSG9tZSIsICJFZHVjYXRpb24iLCAiSG91cmx5UmF0ZSIsICJNb250aGx5SW5jb21lIiwgIk1vbnRobHlSYXRlIiwgIk51bUNvbXBhbmllc1dvcmtlZCIsICJUb3RhbFdvcmtpbmdZZWFycyIsICJUcmFpbmluZ1RpbWVzTGFzdFllYXIiKV0NCmBgYA0KDQpgYGB7cn0NCmNvcl9tYXRyaXggPC0gY29yKCBocmRhdGFbICwgYygiQWdlIiwgIkRhaWx5UmF0ZSIsICJEaXN0YW5jZUZyb21Ib21lIiwgIkVkdWNhdGlvbiIsICJIb3VybHlSYXRlIiwgIk1vbnRobHlJbmNvbWUiLCAiTW9udGhseVJhdGUiLCAiTnVtQ29tcGFuaWVzV29ya2VkIiwgIlRvdGFsV29ya2luZ1llYXJzIiwgIlRyYWluaW5nVGltZXNMYXN0WWVhciIpXSApDQoNCnByaW50KHJvdW5kKGNvcl9tYXRyaXgsMikpDQoNCmBgYA0KDQpgYGB7cn0NCnBhaXJzKH5Nb250aGx5SW5jb21lK0FnZStUb3RhbFdvcmtpbmdZZWFycytFZHVjYXRpb24sZGF0YSA9IGhyZGF0YSwgbWFpbiA9ICJTY2F0dGVycGxvdCBNYXRyaXgiKQ0KYGBgDQoNCmBgYHtyfQ0KYm94cGxvdChBZ2V+QXR0cml0aW9uLGRhdGE9aHJkYXRhLCBtYWluPSAiV2hvIEdvdCBGaXJlZCIsIHhsYWI9IkF0dHJpdGlvbiIsIHlsYWI9IkFnZSIpDQpgYGANCg0KYGBge3J9DQpib3hwbG90KEFnZSB+IEF0dHJpdGlvbiwgZGF0YSA9IGhyZGF0YSwgDQogICAgICAgIG1haW4gPSAiV2hvIEdvdCBGaXJlZCIsIHhsYWIgPSAiQXR0cml0aW9uIiwgeWxhYiA9ICJBZ2UiKQ0KDQojIEFkZCBqaXR0ZXJlZCBwb2ludHMNCnN0cmlwY2hhcnQoQWdlIH4gQXR0cml0aW9uLCBkYXRhID0gaHJkYXRhLCANCiAgICAgICAgICAgdmVydGljYWwgPSBUUlVFLCBtZXRob2QgPSAiaml0dGVyIiwgcGNoID0gMjEsIGNvbCA9ICdibHVlJywgYmcgPSAnbGlnaHRibHVlJywgYWRkID0gVFJVRSkNCg0KYGBgDQoNCg0KDQoNCg0KDQoNCg0KYGBge3J9DQp5ZXNfYWdlIDwtIGhyZGF0YVsoaHJkYXRhJEF0dHJpdGlvbiA9PSAiWWVzIiksJ0FnZSddIA0Kbm9fYWdlIDwtIGhyZGF0YVsoaHJkYXRhJEF0dHJpdGlvbiAhPSAiWWVzIiksJ0FnZSddDQp0LnRlc3QoeWVzX2FnZSwgbm9fYWdlKQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWwxID0gbG0oTW9udGhseUluY29tZSB+IEFnZSwgZGF0YT1ocmRhdGEpIA0Kc3VtbWFyeShtb2RlbDEpDQpgYGANCg0KYGBge3J9DQptb2RlbDIgPSBsbShNb250aGx5SW5jb21lIH4gQWdlICsgVG90YWxXb3JraW5nWWVhcnMsIGRhdGE9aHJkYXRhKSANCnN1bW1hcnkobW9kZWwyKQ0KYGBgDQoNCg0K